radeonsi: move declaring streamout parameters to its own function
[mesa.git] / src / gallium / drivers / radeonsi / si_shader.c
1 /*
2 * Copyright 2012 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Tom Stellard <thomas.stellard@amd.com>
25 * Michel Dänzer <michel.daenzer@amd.com>
26 * Christian König <christian.koenig@amd.com>
27 */
28
29 #include "gallivm/lp_bld_const.h"
30 #include "gallivm/lp_bld_gather.h"
31 #include "gallivm/lp_bld_intr.h"
32 #include "gallivm/lp_bld_logic.h"
33 #include "gallivm/lp_bld_arit.h"
34 #include "gallivm/lp_bld_flow.h"
35 #include "radeon/r600_cs.h"
36 #include "radeon/radeon_llvm.h"
37 #include "radeon/radeon_elf_util.h"
38 #include "radeon/radeon_llvm_emit.h"
39 #include "util/u_memory.h"
40 #include "util/u_pstipple.h"
41 #include "tgsi/tgsi_parse.h"
42 #include "tgsi/tgsi_util.h"
43 #include "tgsi/tgsi_dump.h"
44
45 #include "si_pipe.h"
46 #include "si_shader.h"
47 #include "sid.h"
48
49 #include <errno.h>
50
51 static const char *scratch_rsrc_dword0_symbol =
52 "SCRATCH_RSRC_DWORD0";
53
54 static const char *scratch_rsrc_dword1_symbol =
55 "SCRATCH_RSRC_DWORD1";
56
57 struct si_shader_output_values
58 {
59 LLVMValueRef values[4];
60 unsigned name;
61 unsigned sid;
62 };
63
64 struct si_shader_context
65 {
66 struct radeon_llvm_context radeon_bld;
67 struct si_shader *shader;
68 struct si_screen *screen;
69 unsigned type; /* TGSI_PROCESSOR_* specifies the type of shader. */
70 int param_streamout_config;
71 int param_streamout_write_index;
72 int param_streamout_offset[4];
73 int param_vertex_id;
74 int param_instance_id;
75 int param_es2gs_offset;
76 LLVMTargetMachineRef tm;
77 LLVMValueRef const_md;
78 LLVMValueRef const_resource[SI_NUM_CONST_BUFFERS];
79 LLVMValueRef ddxy_lds;
80 LLVMValueRef *constants[SI_NUM_CONST_BUFFERS];
81 LLVMValueRef resources[SI_NUM_SAMPLER_VIEWS];
82 LLVMValueRef samplers[SI_NUM_SAMPLER_STATES];
83 LLVMValueRef so_buffers[4];
84 LLVMValueRef esgs_ring;
85 LLVMValueRef gsvs_ring;
86 LLVMValueRef gs_next_vertex;
87 };
88
89 static struct si_shader_context * si_shader_context(
90 struct lp_build_tgsi_context * bld_base)
91 {
92 return (struct si_shader_context *)bld_base;
93 }
94
95
96 #define PERSPECTIVE_BASE 0
97 #define LINEAR_BASE 9
98
99 #define SAMPLE_OFFSET 0
100 #define CENTER_OFFSET 2
101 #define CENTROID_OFSET 4
102
103 #define USE_SGPR_MAX_SUFFIX_LEN 5
104 #define CONST_ADDR_SPACE 2
105 #define LOCAL_ADDR_SPACE 3
106 #define USER_SGPR_ADDR_SPACE 8
107
108
109 #define SENDMSG_GS 2
110 #define SENDMSG_GS_DONE 3
111
112 #define SENDMSG_GS_OP_NOP (0 << 4)
113 #define SENDMSG_GS_OP_CUT (1 << 4)
114 #define SENDMSG_GS_OP_EMIT (2 << 4)
115 #define SENDMSG_GS_OP_EMIT_CUT (3 << 4)
116
117 /**
118 * Returns a unique index for a semantic name and index. The index must be
119 * less than 64, so that a 64-bit bitmask of used inputs or outputs can be
120 * calculated.
121 */
122 unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index)
123 {
124 switch (semantic_name) {
125 case TGSI_SEMANTIC_POSITION:
126 return 0;
127 case TGSI_SEMANTIC_PSIZE:
128 return 1;
129 case TGSI_SEMANTIC_CLIPDIST:
130 assert(index <= 1);
131 return 2 + index;
132 case TGSI_SEMANTIC_GENERIC:
133 assert(index <= 63-4);
134 return 4 + index;
135
136 default:
137 /* Don't fail here. The result of this function is only used
138 * for LS, TCS, TES, and GS, where legacy GL semantics can't
139 * occur, but this function is called for all vertex shaders
140 * before it's known whether LS will be compiled or not.
141 */
142 return 0;
143 }
144 }
145
146 /**
147 * Given a semantic name and index of a parameter and a mask of used parameters
148 * (inputs or outputs), return the index of the parameter in the list of all
149 * used parameters.
150 *
151 * For example, assume this list of parameters:
152 * POSITION, PSIZE, GENERIC0, GENERIC2
153 * which has the mask:
154 * 11000000000101
155 * Then:
156 * querying POSITION returns 0,
157 * querying PSIZE returns 1,
158 * querying GENERIC0 returns 2,
159 * querying GENERIC2 returns 3.
160 *
161 * Which can be used as an offset to a parameter buffer in units of vec4s.
162 */
163 static int get_param_index(unsigned semantic_name, unsigned index,
164 uint64_t mask)
165 {
166 unsigned unique_index = si_shader_io_get_unique_index(semantic_name, index);
167 int i, param_index = 0;
168
169 /* If not present... */
170 if (!((1llu << unique_index) & mask))
171 return -1;
172
173 for (i = 0; mask; i++) {
174 uint64_t bit = 1llu << i;
175
176 if (bit & mask) {
177 if (i == unique_index)
178 return param_index;
179
180 mask &= ~bit;
181 param_index++;
182 }
183 }
184
185 assert(!"unreachable");
186 return -1;
187 }
188
189 /**
190 * Get the value of a shader input parameter and extract a bitfield.
191 */
192 static LLVMValueRef unpack_param(struct si_shader_context *si_shader_ctx,
193 unsigned param, unsigned rshift,
194 unsigned bitwidth)
195 {
196 struct gallivm_state *gallivm = &si_shader_ctx->radeon_bld.gallivm;
197 LLVMValueRef value = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
198 param);
199
200 if (rshift)
201 value = LLVMBuildLShr(gallivm->builder, value,
202 lp_build_const_int32(gallivm, rshift), "");
203
204 if (rshift + bitwidth < 32) {
205 unsigned mask = (1 << bitwidth) - 1;
206 value = LLVMBuildAnd(gallivm->builder, value,
207 lp_build_const_int32(gallivm, mask), "");
208 }
209
210 return value;
211 }
212
213 /**
214 * Build an LLVM bytecode indexed load using LLVMBuildGEP + LLVMBuildLoad.
215 * It's equivalent to doing a load from &base_ptr[index].
216 *
217 * \param base_ptr Where the array starts.
218 * \param index The element index into the array.
219 */
220 static LLVMValueRef build_indexed_load(struct si_shader_context *si_shader_ctx,
221 LLVMValueRef base_ptr, LLVMValueRef index)
222 {
223 struct lp_build_tgsi_context *bld_base = &si_shader_ctx->radeon_bld.soa.bld_base;
224 struct gallivm_state *gallivm = bld_base->base.gallivm;
225 LLVMValueRef indices[2], pointer;
226
227 indices[0] = bld_base->uint_bld.zero;
228 indices[1] = index;
229
230 pointer = LLVMBuildGEP(gallivm->builder, base_ptr, indices, 2, "");
231 return LLVMBuildLoad(gallivm->builder, pointer, "");
232 }
233
234 /**
235 * Do a load from &base_ptr[index], but also add a flag that it's loading
236 * a constant.
237 */
238 static LLVMValueRef build_indexed_load_const(
239 struct si_shader_context * si_shader_ctx,
240 LLVMValueRef base_ptr, LLVMValueRef index)
241 {
242 LLVMValueRef result = build_indexed_load(si_shader_ctx, base_ptr, index);
243 LLVMSetMetadata(result, 1, si_shader_ctx->const_md);
244 return result;
245 }
246
247 static LLVMValueRef get_instance_index_for_fetch(
248 struct radeon_llvm_context * radeon_bld,
249 unsigned divisor)
250 {
251 struct si_shader_context *si_shader_ctx =
252 si_shader_context(&radeon_bld->soa.bld_base);
253 struct gallivm_state * gallivm = radeon_bld->soa.bld_base.base.gallivm;
254
255 LLVMValueRef result = LLVMGetParam(radeon_bld->main_fn,
256 si_shader_ctx->param_instance_id);
257
258 /* The division must be done before START_INSTANCE is added. */
259 if (divisor > 1)
260 result = LLVMBuildUDiv(gallivm->builder, result,
261 lp_build_const_int32(gallivm, divisor), "");
262
263 return LLVMBuildAdd(gallivm->builder, result, LLVMGetParam(
264 radeon_bld->main_fn, SI_PARAM_START_INSTANCE), "");
265 }
266
267 static void declare_input_vs(
268 struct radeon_llvm_context *radeon_bld,
269 unsigned input_index,
270 const struct tgsi_full_declaration *decl)
271 {
272 struct lp_build_context *base = &radeon_bld->soa.bld_base.base;
273 struct gallivm_state *gallivm = base->gallivm;
274 struct si_shader_context *si_shader_ctx =
275 si_shader_context(&radeon_bld->soa.bld_base);
276 unsigned divisor = si_shader_ctx->shader->key.vs.instance_divisors[input_index];
277
278 unsigned chan;
279
280 LLVMValueRef t_list_ptr;
281 LLVMValueRef t_offset;
282 LLVMValueRef t_list;
283 LLVMValueRef attribute_offset;
284 LLVMValueRef buffer_index;
285 LLVMValueRef args[3];
286 LLVMTypeRef vec4_type;
287 LLVMValueRef input;
288
289 /* Load the T list */
290 t_list_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_VERTEX_BUFFER);
291
292 t_offset = lp_build_const_int32(gallivm, input_index);
293
294 t_list = build_indexed_load_const(si_shader_ctx, t_list_ptr, t_offset);
295
296 /* Build the attribute offset */
297 attribute_offset = lp_build_const_int32(gallivm, 0);
298
299 if (divisor) {
300 /* Build index from instance ID, start instance and divisor */
301 si_shader_ctx->shader->uses_instanceid = true;
302 buffer_index = get_instance_index_for_fetch(&si_shader_ctx->radeon_bld, divisor);
303 } else {
304 /* Load the buffer index for vertices. */
305 LLVMValueRef vertex_id = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
306 si_shader_ctx->param_vertex_id);
307 LLVMValueRef base_vertex = LLVMGetParam(radeon_bld->main_fn,
308 SI_PARAM_BASE_VERTEX);
309 buffer_index = LLVMBuildAdd(gallivm->builder, base_vertex, vertex_id, "");
310 }
311
312 vec4_type = LLVMVectorType(base->elem_type, 4);
313 args[0] = t_list;
314 args[1] = attribute_offset;
315 args[2] = buffer_index;
316 input = build_intrinsic(gallivm->builder,
317 "llvm.SI.vs.load.input", vec4_type, args, 3,
318 LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
319
320 /* Break up the vec4 into individual components */
321 for (chan = 0; chan < 4; chan++) {
322 LLVMValueRef llvm_chan = lp_build_const_int32(gallivm, chan);
323 /* XXX: Use a helper function for this. There is one in
324 * tgsi_llvm.c. */
325 si_shader_ctx->radeon_bld.inputs[radeon_llvm_reg_index_soa(input_index, chan)] =
326 LLVMBuildExtractElement(gallivm->builder,
327 input, llvm_chan, "");
328 }
329 }
330
331 static LLVMValueRef fetch_input_gs(
332 struct lp_build_tgsi_context *bld_base,
333 const struct tgsi_full_src_register *reg,
334 enum tgsi_opcode_type type,
335 unsigned swizzle)
336 {
337 struct lp_build_context *base = &bld_base->base;
338 struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
339 struct si_shader *shader = si_shader_ctx->shader;
340 struct lp_build_context *uint = &si_shader_ctx->radeon_bld.soa.bld_base.uint_bld;
341 struct gallivm_state *gallivm = base->gallivm;
342 LLVMTypeRef i32 = LLVMInt32TypeInContext(gallivm->context);
343 LLVMValueRef vtx_offset;
344 LLVMValueRef args[9];
345 unsigned vtx_offset_param;
346 struct tgsi_shader_info *info = &shader->selector->info;
347 unsigned semantic_name = info->input_semantic_name[reg->Register.Index];
348 unsigned semantic_index = info->input_semantic_index[reg->Register.Index];
349
350 if (swizzle != ~0 && semantic_name == TGSI_SEMANTIC_PRIMID) {
351 if (swizzle == 0)
352 return LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
353 SI_PARAM_PRIMITIVE_ID);
354 else
355 return uint->zero;
356 }
357
358 if (!reg->Register.Dimension)
359 return NULL;
360
361 if (swizzle == ~0) {
362 LLVMValueRef values[TGSI_NUM_CHANNELS];
363 unsigned chan;
364 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
365 values[chan] = fetch_input_gs(bld_base, reg, type, chan);
366 }
367 return lp_build_gather_values(bld_base->base.gallivm, values,
368 TGSI_NUM_CHANNELS);
369 }
370
371 /* Get the vertex offset parameter */
372 vtx_offset_param = reg->Dimension.Index;
373 if (vtx_offset_param < 2) {
374 vtx_offset_param += SI_PARAM_VTX0_OFFSET;
375 } else {
376 assert(vtx_offset_param < 6);
377 vtx_offset_param += SI_PARAM_VTX2_OFFSET - 2;
378 }
379 vtx_offset = lp_build_mul_imm(uint,
380 LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
381 vtx_offset_param),
382 4);
383
384 args[0] = si_shader_ctx->esgs_ring;
385 args[1] = vtx_offset;
386 args[2] = lp_build_const_int32(gallivm,
387 (get_param_index(semantic_name, semantic_index,
388 shader->selector->gs_used_inputs) * 4 +
389 swizzle) * 256);
390 args[3] = uint->zero;
391 args[4] = uint->one; /* OFFEN */
392 args[5] = uint->zero; /* IDXEN */
393 args[6] = uint->one; /* GLC */
394 args[7] = uint->zero; /* SLC */
395 args[8] = uint->zero; /* TFE */
396
397 return LLVMBuildBitCast(gallivm->builder,
398 build_intrinsic(gallivm->builder,
399 "llvm.SI.buffer.load.dword.i32.i32",
400 i32, args, 9,
401 LLVMReadOnlyAttribute | LLVMNoUnwindAttribute),
402 tgsi2llvmtype(bld_base, type), "");
403 }
404
405 static void declare_input_fs(
406 struct radeon_llvm_context *radeon_bld,
407 unsigned input_index,
408 const struct tgsi_full_declaration *decl)
409 {
410 struct lp_build_context *base = &radeon_bld->soa.bld_base.base;
411 struct si_shader_context *si_shader_ctx =
412 si_shader_context(&radeon_bld->soa.bld_base);
413 struct si_shader *shader = si_shader_ctx->shader;
414 struct lp_build_context *uint = &radeon_bld->soa.bld_base.uint_bld;
415 struct gallivm_state *gallivm = base->gallivm;
416 LLVMTypeRef input_type = LLVMFloatTypeInContext(gallivm->context);
417 LLVMValueRef main_fn = radeon_bld->main_fn;
418
419 LLVMValueRef interp_param;
420 const char * intr_name;
421
422 /* This value is:
423 * [15:0] NewPrimMask (Bit mask for each quad. It is set it the
424 * quad begins a new primitive. Bit 0 always needs
425 * to be unset)
426 * [32:16] ParamOffset
427 *
428 */
429 LLVMValueRef params = LLVMGetParam(main_fn, SI_PARAM_PRIM_MASK);
430 LLVMValueRef attr_number;
431
432 unsigned chan;
433
434 if (decl->Semantic.Name == TGSI_SEMANTIC_POSITION) {
435 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
436 unsigned soa_index =
437 radeon_llvm_reg_index_soa(input_index, chan);
438 radeon_bld->inputs[soa_index] =
439 LLVMGetParam(main_fn, SI_PARAM_POS_X_FLOAT + chan);
440
441 if (chan == 3)
442 /* RCP for fragcoord.w */
443 radeon_bld->inputs[soa_index] =
444 LLVMBuildFDiv(gallivm->builder,
445 lp_build_const_float(gallivm, 1.0f),
446 radeon_bld->inputs[soa_index],
447 "");
448 }
449 return;
450 }
451
452 if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) {
453 radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 0)] =
454 LLVMGetParam(main_fn, SI_PARAM_FRONT_FACE);
455 radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 1)] =
456 radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 2)] =
457 lp_build_const_float(gallivm, 0.0f);
458 radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 3)] =
459 lp_build_const_float(gallivm, 1.0f);
460
461 return;
462 }
463
464 shader->ps_input_param_offset[input_index] = shader->nparam++;
465 attr_number = lp_build_const_int32(gallivm,
466 shader->ps_input_param_offset[input_index]);
467
468 switch (decl->Interp.Interpolate) {
469 case TGSI_INTERPOLATE_CONSTANT:
470 interp_param = 0;
471 break;
472 case TGSI_INTERPOLATE_LINEAR:
473 if (decl->Interp.Location == TGSI_INTERPOLATE_LOC_SAMPLE)
474 interp_param = LLVMGetParam(main_fn, SI_PARAM_LINEAR_SAMPLE);
475 else if (decl->Interp.Location == TGSI_INTERPOLATE_LOC_CENTROID)
476 interp_param = LLVMGetParam(main_fn, SI_PARAM_LINEAR_CENTROID);
477 else
478 interp_param = LLVMGetParam(main_fn, SI_PARAM_LINEAR_CENTER);
479 break;
480 case TGSI_INTERPOLATE_COLOR:
481 case TGSI_INTERPOLATE_PERSPECTIVE:
482 if (decl->Interp.Location == TGSI_INTERPOLATE_LOC_SAMPLE)
483 interp_param = LLVMGetParam(main_fn, SI_PARAM_PERSP_SAMPLE);
484 else if (decl->Interp.Location == TGSI_INTERPOLATE_LOC_CENTROID)
485 interp_param = LLVMGetParam(main_fn, SI_PARAM_PERSP_CENTROID);
486 else
487 interp_param = LLVMGetParam(main_fn, SI_PARAM_PERSP_CENTER);
488 break;
489 default:
490 fprintf(stderr, "Warning: Unhandled interpolation mode.\n");
491 return;
492 }
493
494 /* fs.constant returns the param from the middle vertex, so it's not
495 * really useful for flat shading. It's meant to be used for custom
496 * interpolation (but the intrinsic can't fetch from the other two
497 * vertices).
498 *
499 * Luckily, it doesn't matter, because we rely on the FLAT_SHADE state
500 * to do the right thing. The only reason we use fs.constant is that
501 * fs.interp cannot be used on integers, because they can be equal
502 * to NaN.
503 */
504 intr_name = interp_param ? "llvm.SI.fs.interp" : "llvm.SI.fs.constant";
505
506 if (decl->Semantic.Name == TGSI_SEMANTIC_COLOR &&
507 si_shader_ctx->shader->key.ps.color_two_side) {
508 LLVMValueRef args[4];
509 LLVMValueRef face, is_face_positive;
510 LLVMValueRef back_attr_number =
511 lp_build_const_int32(gallivm,
512 shader->ps_input_param_offset[input_index] + 1);
513
514 face = LLVMGetParam(main_fn, SI_PARAM_FRONT_FACE);
515
516 is_face_positive = LLVMBuildFCmp(gallivm->builder,
517 LLVMRealOGT, face,
518 lp_build_const_float(gallivm, 0.0f),
519 "");
520
521 args[2] = params;
522 args[3] = interp_param;
523 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
524 LLVMValueRef llvm_chan = lp_build_const_int32(gallivm, chan);
525 unsigned soa_index = radeon_llvm_reg_index_soa(input_index, chan);
526 LLVMValueRef front, back;
527
528 args[0] = llvm_chan;
529 args[1] = attr_number;
530 front = build_intrinsic(gallivm->builder, intr_name,
531 input_type, args, args[3] ? 4 : 3,
532 LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
533
534 args[1] = back_attr_number;
535 back = build_intrinsic(gallivm->builder, intr_name,
536 input_type, args, args[3] ? 4 : 3,
537 LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
538
539 radeon_bld->inputs[soa_index] =
540 LLVMBuildSelect(gallivm->builder,
541 is_face_positive,
542 front,
543 back,
544 "");
545 }
546
547 shader->nparam++;
548 } else if (decl->Semantic.Name == TGSI_SEMANTIC_FOG) {
549 LLVMValueRef args[4];
550
551 args[0] = uint->zero;
552 args[1] = attr_number;
553 args[2] = params;
554 args[3] = interp_param;
555 radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 0)] =
556 build_intrinsic(gallivm->builder, intr_name,
557 input_type, args, args[3] ? 4 : 3,
558 LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
559 radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 1)] =
560 radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 2)] =
561 lp_build_const_float(gallivm, 0.0f);
562 radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 3)] =
563 lp_build_const_float(gallivm, 1.0f);
564 } else {
565 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
566 LLVMValueRef args[4];
567 LLVMValueRef llvm_chan = lp_build_const_int32(gallivm, chan);
568 unsigned soa_index = radeon_llvm_reg_index_soa(input_index, chan);
569 args[0] = llvm_chan;
570 args[1] = attr_number;
571 args[2] = params;
572 args[3] = interp_param;
573 radeon_bld->inputs[soa_index] =
574 build_intrinsic(gallivm->builder, intr_name,
575 input_type, args, args[3] ? 4 : 3,
576 LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
577 }
578 }
579 }
580
581 static LLVMValueRef get_sample_id(struct radeon_llvm_context *radeon_bld)
582 {
583 return unpack_param(si_shader_context(&radeon_bld->soa.bld_base),
584 SI_PARAM_ANCILLARY, 8, 4);
585 }
586
587 /**
588 * Load a dword from a constant buffer.
589 */
590 static LLVMValueRef buffer_load_const(LLVMBuilderRef builder, LLVMValueRef resource,
591 LLVMValueRef offset, LLVMTypeRef return_type)
592 {
593 LLVMValueRef args[2] = {resource, offset};
594
595 return build_intrinsic(builder, "llvm.SI.load.const", return_type, args, 2,
596 LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
597 }
598
599 static void declare_system_value(
600 struct radeon_llvm_context * radeon_bld,
601 unsigned index,
602 const struct tgsi_full_declaration *decl)
603 {
604 struct si_shader_context *si_shader_ctx =
605 si_shader_context(&radeon_bld->soa.bld_base);
606 struct lp_build_context *uint_bld = &radeon_bld->soa.bld_base.uint_bld;
607 struct gallivm_state *gallivm = &radeon_bld->gallivm;
608 LLVMValueRef value = 0;
609
610 switch (decl->Semantic.Name) {
611 case TGSI_SEMANTIC_INSTANCEID:
612 value = LLVMGetParam(radeon_bld->main_fn,
613 si_shader_ctx->param_instance_id);
614 break;
615
616 case TGSI_SEMANTIC_VERTEXID:
617 value = LLVMBuildAdd(gallivm->builder,
618 LLVMGetParam(radeon_bld->main_fn,
619 si_shader_ctx->param_vertex_id),
620 LLVMGetParam(radeon_bld->main_fn,
621 SI_PARAM_BASE_VERTEX), "");
622 break;
623
624 case TGSI_SEMANTIC_VERTEXID_NOBASE:
625 value = LLVMGetParam(radeon_bld->main_fn,
626 si_shader_ctx->param_vertex_id);
627 break;
628
629 case TGSI_SEMANTIC_BASEVERTEX:
630 value = LLVMGetParam(radeon_bld->main_fn,
631 SI_PARAM_BASE_VERTEX);
632 break;
633
634 case TGSI_SEMANTIC_INVOCATIONID:
635 value = LLVMGetParam(radeon_bld->main_fn,
636 SI_PARAM_GS_INSTANCE_ID);
637 break;
638
639 case TGSI_SEMANTIC_SAMPLEID:
640 value = get_sample_id(radeon_bld);
641 break;
642
643 case TGSI_SEMANTIC_SAMPLEPOS:
644 {
645 LLVMBuilderRef builder = gallivm->builder;
646 LLVMValueRef desc = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_CONST);
647 LLVMValueRef buf_index = lp_build_const_int32(gallivm, SI_DRIVER_STATE_CONST_BUF);
648 LLVMValueRef resource = build_indexed_load_const(si_shader_ctx, desc, buf_index);
649
650 /* offset = sample_id * 8 (8 = 2 floats containing samplepos.xy) */
651 LLVMValueRef offset0 = lp_build_mul_imm(uint_bld, get_sample_id(radeon_bld), 8);
652 LLVMValueRef offset1 = LLVMBuildAdd(builder, offset0, lp_build_const_int32(gallivm, 4), "");
653
654 LLVMValueRef pos[4] = {
655 buffer_load_const(builder, resource, offset0, radeon_bld->soa.bld_base.base.elem_type),
656 buffer_load_const(builder, resource, offset1, radeon_bld->soa.bld_base.base.elem_type),
657 lp_build_const_float(gallivm, 0),
658 lp_build_const_float(gallivm, 0)
659 };
660 value = lp_build_gather_values(gallivm, pos, 4);
661 break;
662 }
663
664 case TGSI_SEMANTIC_SAMPLEMASK:
665 /* Smoothing isn't MSAA in GL, but it's MSAA in hardware.
666 * Therefore, force gl_SampleMaskIn to 1 for GL. */
667 if (si_shader_ctx->shader->key.ps.poly_line_smoothing)
668 value = uint_bld->one;
669 else
670 value = LLVMGetParam(radeon_bld->main_fn, SI_PARAM_SAMPLE_COVERAGE);
671 break;
672
673 default:
674 assert(!"unknown system value");
675 return;
676 }
677
678 radeon_bld->system_values[index] = value;
679 }
680
681 static LLVMValueRef fetch_constant(
682 struct lp_build_tgsi_context * bld_base,
683 const struct tgsi_full_src_register *reg,
684 enum tgsi_opcode_type type,
685 unsigned swizzle)
686 {
687 struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
688 struct lp_build_context * base = &bld_base->base;
689 const struct tgsi_ind_register *ireg = &reg->Indirect;
690 unsigned buf, idx;
691
692 LLVMValueRef addr;
693 LLVMValueRef result;
694
695 if (swizzle == LP_CHAN_ALL) {
696 unsigned chan;
697 LLVMValueRef values[4];
698 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan)
699 values[chan] = fetch_constant(bld_base, reg, type, chan);
700
701 return lp_build_gather_values(bld_base->base.gallivm, values, 4);
702 }
703
704 buf = reg->Register.Dimension ? reg->Dimension.Index : 0;
705 idx = reg->Register.Index * 4 + swizzle;
706
707 if (!reg->Register.Indirect) {
708 if (type != TGSI_TYPE_DOUBLE)
709 return bitcast(bld_base, type, si_shader_ctx->constants[buf][idx]);
710 else {
711 return radeon_llvm_emit_fetch_double(bld_base,
712 si_shader_ctx->constants[buf][idx],
713 si_shader_ctx->constants[buf][idx + 1]);
714 }
715 }
716
717 addr = si_shader_ctx->radeon_bld.soa.addr[ireg->Index][ireg->Swizzle];
718 addr = LLVMBuildLoad(base->gallivm->builder, addr, "load addr reg");
719 addr = lp_build_mul_imm(&bld_base->uint_bld, addr, 16);
720 addr = lp_build_add(&bld_base->uint_bld, addr,
721 lp_build_const_int32(base->gallivm, idx * 4));
722
723 result = buffer_load_const(base->gallivm->builder, si_shader_ctx->const_resource[buf],
724 addr, bld_base->base.elem_type);
725
726 if (type != TGSI_TYPE_DOUBLE)
727 result = bitcast(bld_base, type, result);
728 else {
729 LLVMValueRef addr2, result2;
730 addr2 = si_shader_ctx->radeon_bld.soa.addr[ireg->Index][ireg->Swizzle + 1];
731 addr2 = LLVMBuildLoad(base->gallivm->builder, addr2, "load addr reg2");
732 addr2 = lp_build_mul_imm(&bld_base->uint_bld, addr2, 16);
733 addr2 = lp_build_add(&bld_base->uint_bld, addr2,
734 lp_build_const_int32(base->gallivm, idx * 4));
735
736 result2 = buffer_load_const(base->gallivm->builder, si_shader_ctx->const_resource[buf],
737 addr2, bld_base->base.elem_type);
738
739 result = radeon_llvm_emit_fetch_double(bld_base,
740 result, result2);
741 }
742 return result;
743 }
744
745 /* Initialize arguments for the shader export intrinsic */
746 static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base,
747 LLVMValueRef *values,
748 unsigned target,
749 LLVMValueRef *args)
750 {
751 struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
752 struct lp_build_context *uint =
753 &si_shader_ctx->radeon_bld.soa.bld_base.uint_bld;
754 struct lp_build_context *base = &bld_base->base;
755 unsigned compressed = 0;
756 unsigned chan;
757
758 if (si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT) {
759 int cbuf = target - V_008DFC_SQ_EXP_MRT;
760
761 if (cbuf >= 0 && cbuf < 8) {
762 compressed = (si_shader_ctx->shader->key.ps.export_16bpc >> cbuf) & 0x1;
763
764 if (compressed)
765 si_shader_ctx->shader->spi_shader_col_format |=
766 V_028714_SPI_SHADER_FP16_ABGR << (4 * cbuf);
767 else
768 si_shader_ctx->shader->spi_shader_col_format |=
769 V_028714_SPI_SHADER_32_ABGR << (4 * cbuf);
770
771 si_shader_ctx->shader->cb_shader_mask |= 0xf << (4 * cbuf);
772 }
773 }
774
775 if (compressed) {
776 /* Pixel shader needs to pack output values before export */
777 for (chan = 0; chan < 2; chan++ ) {
778 args[0] = values[2 * chan];
779 args[1] = values[2 * chan + 1];
780 args[chan + 5] =
781 build_intrinsic(base->gallivm->builder,
782 "llvm.SI.packf16",
783 LLVMInt32TypeInContext(base->gallivm->context),
784 args, 2,
785 LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
786 args[chan + 7] = args[chan + 5] =
787 LLVMBuildBitCast(base->gallivm->builder,
788 args[chan + 5],
789 LLVMFloatTypeInContext(base->gallivm->context),
790 "");
791 }
792
793 /* Set COMPR flag */
794 args[4] = uint->one;
795 } else {
796 for (chan = 0; chan < 4; chan++ )
797 /* +5 because the first output value will be
798 * the 6th argument to the intrinsic. */
799 args[chan + 5] = values[chan];
800
801 /* Clear COMPR flag */
802 args[4] = uint->zero;
803 }
804
805 /* XXX: This controls which components of the output
806 * registers actually get exported. (e.g bit 0 means export
807 * X component, bit 1 means export Y component, etc.) I'm
808 * hard coding this to 0xf for now. In the future, we might
809 * want to do something else. */
810 args[0] = lp_build_const_int32(base->gallivm, 0xf);
811
812 /* Specify whether the EXEC mask represents the valid mask */
813 args[1] = uint->zero;
814
815 /* Specify whether this is the last export */
816 args[2] = uint->zero;
817
818 /* Specify the target we are exporting */
819 args[3] = lp_build_const_int32(base->gallivm, target);
820
821 /* XXX: We probably need to keep track of the output
822 * values, so we know what we are passing to the next
823 * stage. */
824 }
825
826 /* Load from output pointers and initialize arguments for the shader export intrinsic */
827 static void si_llvm_init_export_args_load(struct lp_build_tgsi_context *bld_base,
828 LLVMValueRef *out_ptr,
829 unsigned target,
830 LLVMValueRef *args)
831 {
832 struct gallivm_state *gallivm = bld_base->base.gallivm;
833 LLVMValueRef values[4];
834 int i;
835
836 for (i = 0; i < 4; i++)
837 values[i] = LLVMBuildLoad(gallivm->builder, out_ptr[i], "");
838
839 si_llvm_init_export_args(bld_base, values, target, args);
840 }
841
842 static void si_alpha_test(struct lp_build_tgsi_context *bld_base,
843 LLVMValueRef alpha_ptr)
844 {
845 struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
846 struct gallivm_state *gallivm = bld_base->base.gallivm;
847
848 if (si_shader_ctx->shader->key.ps.alpha_func != PIPE_FUNC_NEVER) {
849 LLVMValueRef alpha_ref = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
850 SI_PARAM_ALPHA_REF);
851
852 LLVMValueRef alpha_pass =
853 lp_build_cmp(&bld_base->base,
854 si_shader_ctx->shader->key.ps.alpha_func,
855 LLVMBuildLoad(gallivm->builder, alpha_ptr, ""),
856 alpha_ref);
857 LLVMValueRef arg =
858 lp_build_select(&bld_base->base,
859 alpha_pass,
860 lp_build_const_float(gallivm, 1.0f),
861 lp_build_const_float(gallivm, -1.0f));
862
863 build_intrinsic(gallivm->builder,
864 "llvm.AMDGPU.kill",
865 LLVMVoidTypeInContext(gallivm->context),
866 &arg, 1, 0);
867 } else {
868 build_intrinsic(gallivm->builder,
869 "llvm.AMDGPU.kilp",
870 LLVMVoidTypeInContext(gallivm->context),
871 NULL, 0, 0);
872 }
873
874 si_shader_ctx->shader->db_shader_control |= S_02880C_KILL_ENABLE(1);
875 }
876
877 static void si_scale_alpha_by_sample_mask(struct lp_build_tgsi_context *bld_base,
878 LLVMValueRef alpha_ptr)
879 {
880 struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
881 struct gallivm_state *gallivm = bld_base->base.gallivm;
882 LLVMValueRef coverage, alpha;
883
884 /* alpha = alpha * popcount(coverage) / SI_NUM_SMOOTH_AA_SAMPLES */
885 coverage = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
886 SI_PARAM_SAMPLE_COVERAGE);
887 coverage = bitcast(bld_base, TGSI_TYPE_SIGNED, coverage);
888
889 coverage = build_intrinsic(gallivm->builder, "llvm.ctpop.i32",
890 bld_base->int_bld.elem_type,
891 &coverage, 1, LLVMReadNoneAttribute);
892
893 coverage = LLVMBuildUIToFP(gallivm->builder, coverage,
894 bld_base->base.elem_type, "");
895
896 coverage = LLVMBuildFMul(gallivm->builder, coverage,
897 lp_build_const_float(gallivm,
898 1.0 / SI_NUM_SMOOTH_AA_SAMPLES), "");
899
900 alpha = LLVMBuildLoad(gallivm->builder, alpha_ptr, "");
901 alpha = LLVMBuildFMul(gallivm->builder, alpha, coverage, "");
902 LLVMBuildStore(gallivm->builder, alpha, alpha_ptr);
903 }
904
905 static void si_llvm_emit_clipvertex(struct lp_build_tgsi_context * bld_base,
906 LLVMValueRef (*pos)[9], LLVMValueRef *out_elts)
907 {
908 struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
909 struct lp_build_context *base = &bld_base->base;
910 struct lp_build_context *uint = &si_shader_ctx->radeon_bld.soa.bld_base.uint_bld;
911 unsigned reg_index;
912 unsigned chan;
913 unsigned const_chan;
914 LLVMValueRef base_elt;
915 LLVMValueRef ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_CONST);
916 LLVMValueRef constbuf_index = lp_build_const_int32(base->gallivm, SI_DRIVER_STATE_CONST_BUF);
917 LLVMValueRef const_resource = build_indexed_load_const(si_shader_ctx, ptr, constbuf_index);
918
919 for (reg_index = 0; reg_index < 2; reg_index ++) {
920 LLVMValueRef *args = pos[2 + reg_index];
921
922 args[5] =
923 args[6] =
924 args[7] =
925 args[8] = lp_build_const_float(base->gallivm, 0.0f);
926
927 /* Compute dot products of position and user clip plane vectors */
928 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
929 for (const_chan = 0; const_chan < TGSI_NUM_CHANNELS; const_chan++) {
930 args[1] = lp_build_const_int32(base->gallivm,
931 ((reg_index * 4 + chan) * 4 +
932 const_chan) * 4);
933 base_elt = buffer_load_const(base->gallivm->builder, const_resource,
934 args[1], base->elem_type);
935 args[5 + chan] =
936 lp_build_add(base, args[5 + chan],
937 lp_build_mul(base, base_elt,
938 out_elts[const_chan]));
939 }
940 }
941
942 args[0] = lp_build_const_int32(base->gallivm, 0xf);
943 args[1] = uint->zero;
944 args[2] = uint->zero;
945 args[3] = lp_build_const_int32(base->gallivm,
946 V_008DFC_SQ_EXP_POS + 2 + reg_index);
947 args[4] = uint->zero;
948 }
949 }
950
951 static void si_dump_streamout(struct pipe_stream_output_info *so)
952 {
953 unsigned i;
954
955 if (so->num_outputs)
956 fprintf(stderr, "STREAMOUT\n");
957
958 for (i = 0; i < so->num_outputs; i++) {
959 unsigned mask = ((1 << so->output[i].num_components) - 1) <<
960 so->output[i].start_component;
961 fprintf(stderr, " %i: BUF%i[%i..%i] <- OUT[%i].%s%s%s%s\n",
962 i, so->output[i].output_buffer,
963 so->output[i].dst_offset, so->output[i].dst_offset + so->output[i].num_components - 1,
964 so->output[i].register_index,
965 mask & 1 ? "x" : "",
966 mask & 2 ? "y" : "",
967 mask & 4 ? "z" : "",
968 mask & 8 ? "w" : "");
969 }
970 }
971
972 /* TBUFFER_STORE_FORMAT_{X,XY,XYZ,XYZW} <- the suffix is selected by num_channels=1..4.
973 * The type of vdata must be one of i32 (num_channels=1), v2i32 (num_channels=2),
974 * or v4i32 (num_channels=3,4). */
975 static void build_tbuffer_store(struct si_shader_context *shader,
976 LLVMValueRef rsrc,
977 LLVMValueRef vdata,
978 unsigned num_channels,
979 LLVMValueRef vaddr,
980 LLVMValueRef soffset,
981 unsigned inst_offset,
982 unsigned dfmt,
983 unsigned nfmt,
984 unsigned offen,
985 unsigned idxen,
986 unsigned glc,
987 unsigned slc,
988 unsigned tfe)
989 {
990 struct gallivm_state *gallivm = &shader->radeon_bld.gallivm;
991 LLVMTypeRef i32 = LLVMInt32TypeInContext(gallivm->context);
992 LLVMValueRef args[] = {
993 rsrc,
994 vdata,
995 LLVMConstInt(i32, num_channels, 0),
996 vaddr,
997 soffset,
998 LLVMConstInt(i32, inst_offset, 0),
999 LLVMConstInt(i32, dfmt, 0),
1000 LLVMConstInt(i32, nfmt, 0),
1001 LLVMConstInt(i32, offen, 0),
1002 LLVMConstInt(i32, idxen, 0),
1003 LLVMConstInt(i32, glc, 0),
1004 LLVMConstInt(i32, slc, 0),
1005 LLVMConstInt(i32, tfe, 0)
1006 };
1007
1008 /* The instruction offset field has 12 bits */
1009 assert(offen || inst_offset < (1 << 12));
1010
1011 /* The intrinsic is overloaded, we need to add a type suffix for overloading to work. */
1012 unsigned func = CLAMP(num_channels, 1, 3) - 1;
1013 const char *types[] = {"i32", "v2i32", "v4i32"};
1014 char name[256];
1015 snprintf(name, sizeof(name), "llvm.SI.tbuffer.store.%s", types[func]);
1016
1017 lp_build_intrinsic(gallivm->builder, name,
1018 LLVMVoidTypeInContext(gallivm->context),
1019 args, Elements(args));
1020 }
1021
1022 static void build_streamout_store(struct si_shader_context *shader,
1023 LLVMValueRef rsrc,
1024 LLVMValueRef vdata,
1025 unsigned num_channels,
1026 LLVMValueRef vaddr,
1027 LLVMValueRef soffset,
1028 unsigned inst_offset)
1029 {
1030 static unsigned dfmt[] = {
1031 V_008F0C_BUF_DATA_FORMAT_32,
1032 V_008F0C_BUF_DATA_FORMAT_32_32,
1033 V_008F0C_BUF_DATA_FORMAT_32_32_32,
1034 V_008F0C_BUF_DATA_FORMAT_32_32_32_32
1035 };
1036 assert(num_channels >= 1 && num_channels <= 4);
1037
1038 build_tbuffer_store(shader, rsrc, vdata, num_channels, vaddr, soffset,
1039 inst_offset, dfmt[num_channels-1],
1040 V_008F0C_BUF_NUM_FORMAT_UINT, 1, 0, 1, 1, 0);
1041 }
1042
1043 /* On SI, the vertex shader is responsible for writing streamout data
1044 * to buffers. */
1045 static void si_llvm_emit_streamout(struct si_shader_context *shader,
1046 struct si_shader_output_values *outputs,
1047 unsigned noutput)
1048 {
1049 struct pipe_stream_output_info *so = &shader->shader->selector->so;
1050 struct gallivm_state *gallivm = &shader->radeon_bld.gallivm;
1051 LLVMBuilderRef builder = gallivm->builder;
1052 int i, j;
1053 struct lp_build_if_state if_ctx;
1054
1055 LLVMTypeRef i32 = LLVMInt32TypeInContext(gallivm->context);
1056
1057 /* Get bits [22:16], i.e. (so_param >> 16) & 127; */
1058 LLVMValueRef so_vtx_count =
1059 unpack_param(shader, shader->param_streamout_config, 16, 7);
1060
1061 LLVMValueRef tid = build_intrinsic(builder, "llvm.SI.tid", i32,
1062 NULL, 0, LLVMReadNoneAttribute);
1063
1064 /* can_emit = tid < so_vtx_count; */
1065 LLVMValueRef can_emit =
1066 LLVMBuildICmp(builder, LLVMIntULT, tid, so_vtx_count, "");
1067
1068 /* Emit the streamout code conditionally. This actually avoids
1069 * out-of-bounds buffer access. The hw tells us via the SGPR
1070 * (so_vtx_count) which threads are allowed to emit streamout data. */
1071 lp_build_if(&if_ctx, gallivm, can_emit);
1072 {
1073 /* The buffer offset is computed as follows:
1074 * ByteOffset = streamout_offset[buffer_id]*4 +
1075 * (streamout_write_index + thread_id)*stride[buffer_id] +
1076 * attrib_offset
1077 */
1078
1079 LLVMValueRef so_write_index =
1080 LLVMGetParam(shader->radeon_bld.main_fn,
1081 shader->param_streamout_write_index);
1082
1083 /* Compute (streamout_write_index + thread_id). */
1084 so_write_index = LLVMBuildAdd(builder, so_write_index, tid, "");
1085
1086 /* Compute the write offset for each enabled buffer. */
1087 LLVMValueRef so_write_offset[4] = {};
1088 for (i = 0; i < 4; i++) {
1089 if (!so->stride[i])
1090 continue;
1091
1092 LLVMValueRef so_offset = LLVMGetParam(shader->radeon_bld.main_fn,
1093 shader->param_streamout_offset[i]);
1094 so_offset = LLVMBuildMul(builder, so_offset, LLVMConstInt(i32, 4, 0), "");
1095
1096 so_write_offset[i] = LLVMBuildMul(builder, so_write_index,
1097 LLVMConstInt(i32, so->stride[i]*4, 0), "");
1098 so_write_offset[i] = LLVMBuildAdd(builder, so_write_offset[i], so_offset, "");
1099 }
1100
1101 /* Write streamout data. */
1102 for (i = 0; i < so->num_outputs; i++) {
1103 unsigned buf_idx = so->output[i].output_buffer;
1104 unsigned reg = so->output[i].register_index;
1105 unsigned start = so->output[i].start_component;
1106 unsigned num_comps = so->output[i].num_components;
1107 LLVMValueRef out[4];
1108
1109 assert(num_comps && num_comps <= 4);
1110 if (!num_comps || num_comps > 4)
1111 continue;
1112
1113 if (reg >= noutput)
1114 continue;
1115
1116 /* Load the output as int. */
1117 for (j = 0; j < num_comps; j++) {
1118 out[j] = LLVMBuildBitCast(builder,
1119 outputs[reg].values[start+j],
1120 i32, "");
1121 }
1122
1123 /* Pack the output. */
1124 LLVMValueRef vdata = NULL;
1125
1126 switch (num_comps) {
1127 case 1: /* as i32 */
1128 vdata = out[0];
1129 break;
1130 case 2: /* as v2i32 */
1131 case 3: /* as v4i32 (aligned to 4) */
1132 case 4: /* as v4i32 */
1133 vdata = LLVMGetUndef(LLVMVectorType(i32, util_next_power_of_two(num_comps)));
1134 for (j = 0; j < num_comps; j++) {
1135 vdata = LLVMBuildInsertElement(builder, vdata, out[j],
1136 LLVMConstInt(i32, j, 0), "");
1137 }
1138 break;
1139 }
1140
1141 build_streamout_store(shader, shader->so_buffers[buf_idx],
1142 vdata, num_comps,
1143 so_write_offset[buf_idx],
1144 LLVMConstInt(i32, 0, 0),
1145 so->output[i].dst_offset*4);
1146 }
1147 }
1148 lp_build_endif(&if_ctx);
1149 }
1150
1151
1152 /* Generate export instructions for hardware VS shader stage */
1153 static void si_llvm_export_vs(struct lp_build_tgsi_context *bld_base,
1154 struct si_shader_output_values *outputs,
1155 unsigned noutput)
1156 {
1157 struct si_shader_context * si_shader_ctx = si_shader_context(bld_base);
1158 struct si_shader * shader = si_shader_ctx->shader;
1159 struct lp_build_context * base = &bld_base->base;
1160 struct lp_build_context * uint =
1161 &si_shader_ctx->radeon_bld.soa.bld_base.uint_bld;
1162 LLVMValueRef args[9];
1163 LLVMValueRef pos_args[4][9] = { { 0 } };
1164 LLVMValueRef psize_value = NULL, edgeflag_value = NULL, layer_value = NULL, viewport_index_value = NULL;
1165 unsigned semantic_name, semantic_index;
1166 unsigned target;
1167 unsigned param_count = 0;
1168 unsigned pos_idx;
1169 int i;
1170
1171 if (outputs && si_shader_ctx->shader->selector->so.num_outputs) {
1172 si_llvm_emit_streamout(si_shader_ctx, outputs, noutput);
1173 }
1174
1175 for (i = 0; i < noutput; i++) {
1176 semantic_name = outputs[i].name;
1177 semantic_index = outputs[i].sid;
1178
1179 handle_semantic:
1180 /* Select the correct target */
1181 switch(semantic_name) {
1182 case TGSI_SEMANTIC_PSIZE:
1183 psize_value = outputs[i].values[0];
1184 continue;
1185 case TGSI_SEMANTIC_EDGEFLAG:
1186 edgeflag_value = outputs[i].values[0];
1187 continue;
1188 case TGSI_SEMANTIC_LAYER:
1189 layer_value = outputs[i].values[0];
1190 semantic_name = TGSI_SEMANTIC_GENERIC;
1191 goto handle_semantic;
1192 case TGSI_SEMANTIC_VIEWPORT_INDEX:
1193 viewport_index_value = outputs[i].values[0];
1194 semantic_name = TGSI_SEMANTIC_GENERIC;
1195 goto handle_semantic;
1196 case TGSI_SEMANTIC_POSITION:
1197 target = V_008DFC_SQ_EXP_POS;
1198 break;
1199 case TGSI_SEMANTIC_COLOR:
1200 case TGSI_SEMANTIC_BCOLOR:
1201 target = V_008DFC_SQ_EXP_PARAM + param_count;
1202 shader->vs_output_param_offset[i] = param_count;
1203 param_count++;
1204 break;
1205 case TGSI_SEMANTIC_CLIPDIST:
1206 target = V_008DFC_SQ_EXP_POS + 2 + semantic_index;
1207 break;
1208 case TGSI_SEMANTIC_CLIPVERTEX:
1209 si_llvm_emit_clipvertex(bld_base, pos_args, outputs[i].values);
1210 continue;
1211 case TGSI_SEMANTIC_PRIMID:
1212 case TGSI_SEMANTIC_FOG:
1213 case TGSI_SEMANTIC_TEXCOORD:
1214 case TGSI_SEMANTIC_GENERIC:
1215 target = V_008DFC_SQ_EXP_PARAM + param_count;
1216 shader->vs_output_param_offset[i] = param_count;
1217 param_count++;
1218 break;
1219 default:
1220 target = 0;
1221 fprintf(stderr,
1222 "Warning: SI unhandled vs output type:%d\n",
1223 semantic_name);
1224 }
1225
1226 si_llvm_init_export_args(bld_base, outputs[i].values, target, args);
1227
1228 if (target >= V_008DFC_SQ_EXP_POS &&
1229 target <= (V_008DFC_SQ_EXP_POS + 3)) {
1230 memcpy(pos_args[target - V_008DFC_SQ_EXP_POS],
1231 args, sizeof(args));
1232 } else {
1233 lp_build_intrinsic(base->gallivm->builder,
1234 "llvm.SI.export",
1235 LLVMVoidTypeInContext(base->gallivm->context),
1236 args, 9);
1237 }
1238
1239 if (semantic_name == TGSI_SEMANTIC_CLIPDIST) {
1240 semantic_name = TGSI_SEMANTIC_GENERIC;
1241 goto handle_semantic;
1242 }
1243 }
1244
1245 shader->nr_param_exports = param_count;
1246
1247 /* We need to add the position output manually if it's missing. */
1248 if (!pos_args[0][0]) {
1249 pos_args[0][0] = lp_build_const_int32(base->gallivm, 0xf); /* writemask */
1250 pos_args[0][1] = uint->zero; /* EXEC mask */
1251 pos_args[0][2] = uint->zero; /* last export? */
1252 pos_args[0][3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_POS);
1253 pos_args[0][4] = uint->zero; /* COMPR flag */
1254 pos_args[0][5] = base->zero; /* X */
1255 pos_args[0][6] = base->zero; /* Y */
1256 pos_args[0][7] = base->zero; /* Z */
1257 pos_args[0][8] = base->one; /* W */
1258 }
1259
1260 /* Write the misc vector (point size, edgeflag, layer, viewport). */
1261 if (shader->selector->info.writes_psize ||
1262 shader->selector->info.writes_edgeflag ||
1263 shader->selector->info.writes_viewport_index ||
1264 shader->selector->info.writes_layer) {
1265 pos_args[1][0] = lp_build_const_int32(base->gallivm, /* writemask */
1266 shader->selector->info.writes_psize |
1267 (shader->selector->info.writes_edgeflag << 1) |
1268 (shader->selector->info.writes_layer << 2) |
1269 (shader->selector->info.writes_viewport_index << 3));
1270 pos_args[1][1] = uint->zero; /* EXEC mask */
1271 pos_args[1][2] = uint->zero; /* last export? */
1272 pos_args[1][3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_POS + 1);
1273 pos_args[1][4] = uint->zero; /* COMPR flag */
1274 pos_args[1][5] = base->zero; /* X */
1275 pos_args[1][6] = base->zero; /* Y */
1276 pos_args[1][7] = base->zero; /* Z */
1277 pos_args[1][8] = base->zero; /* W */
1278
1279 if (shader->selector->info.writes_psize)
1280 pos_args[1][5] = psize_value;
1281
1282 if (shader->selector->info.writes_edgeflag) {
1283 /* The output is a float, but the hw expects an integer
1284 * with the first bit containing the edge flag. */
1285 edgeflag_value = LLVMBuildFPToUI(base->gallivm->builder,
1286 edgeflag_value,
1287 bld_base->uint_bld.elem_type, "");
1288 edgeflag_value = lp_build_min(&bld_base->int_bld,
1289 edgeflag_value,
1290 bld_base->int_bld.one);
1291
1292 /* The LLVM intrinsic expects a float. */
1293 pos_args[1][6] = LLVMBuildBitCast(base->gallivm->builder,
1294 edgeflag_value,
1295 base->elem_type, "");
1296 }
1297
1298 if (shader->selector->info.writes_layer)
1299 pos_args[1][7] = layer_value;
1300
1301 if (shader->selector->info.writes_viewport_index)
1302 pos_args[1][8] = viewport_index_value;
1303 }
1304
1305 for (i = 0; i < 4; i++)
1306 if (pos_args[i][0])
1307 shader->nr_pos_exports++;
1308
1309 pos_idx = 0;
1310 for (i = 0; i < 4; i++) {
1311 if (!pos_args[i][0])
1312 continue;
1313
1314 /* Specify the target we are exporting */
1315 pos_args[i][3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_POS + pos_idx++);
1316
1317 if (pos_idx == shader->nr_pos_exports)
1318 /* Specify that this is the last export */
1319 pos_args[i][2] = uint->one;
1320
1321 lp_build_intrinsic(base->gallivm->builder,
1322 "llvm.SI.export",
1323 LLVMVoidTypeInContext(base->gallivm->context),
1324 pos_args[i], 9);
1325 }
1326 }
1327
1328 static void si_llvm_emit_es_epilogue(struct lp_build_tgsi_context * bld_base)
1329 {
1330 struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
1331 struct gallivm_state *gallivm = bld_base->base.gallivm;
1332 struct si_shader *es = si_shader_ctx->shader;
1333 struct tgsi_shader_info *info = &es->selector->info;
1334 LLVMTypeRef i32 = LLVMInt32TypeInContext(gallivm->context);
1335 LLVMValueRef soffset = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
1336 si_shader_ctx->param_es2gs_offset);
1337 unsigned chan;
1338 int i;
1339
1340 for (i = 0; i < info->num_outputs; i++) {
1341 LLVMValueRef *out_ptr =
1342 si_shader_ctx->radeon_bld.soa.outputs[i];
1343 int param_index;
1344
1345 if (info->output_semantic_name[i] == TGSI_SEMANTIC_VIEWPORT_INDEX ||
1346 info->output_semantic_name[i] == TGSI_SEMANTIC_LAYER)
1347 continue;
1348
1349 param_index = get_param_index(info->output_semantic_name[i],
1350 info->output_semantic_index[i],
1351 es->key.vs.gs_used_inputs);
1352 if (param_index < 0)
1353 continue;
1354
1355 for (chan = 0; chan < 4; chan++) {
1356 LLVMValueRef out_val = LLVMBuildLoad(gallivm->builder, out_ptr[chan], "");
1357 out_val = LLVMBuildBitCast(gallivm->builder, out_val, i32, "");
1358
1359 build_tbuffer_store(si_shader_ctx,
1360 si_shader_ctx->esgs_ring,
1361 out_val, 1,
1362 LLVMGetUndef(i32), soffset,
1363 (4 * param_index + chan) * 4,
1364 V_008F0C_BUF_DATA_FORMAT_32,
1365 V_008F0C_BUF_NUM_FORMAT_UINT,
1366 0, 0, 1, 1, 0);
1367 }
1368 }
1369 }
1370
1371 static void si_llvm_emit_gs_epilogue(struct lp_build_tgsi_context *bld_base)
1372 {
1373 struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
1374 struct gallivm_state *gallivm = bld_base->base.gallivm;
1375 LLVMValueRef args[2];
1376
1377 args[0] = lp_build_const_int32(gallivm, SENDMSG_GS_OP_NOP | SENDMSG_GS_DONE);
1378 args[1] = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_GS_WAVE_ID);
1379 build_intrinsic(gallivm->builder, "llvm.SI.sendmsg",
1380 LLVMVoidTypeInContext(gallivm->context), args, 2,
1381 LLVMNoUnwindAttribute);
1382 }
1383
1384 static void si_llvm_emit_vs_epilogue(struct lp_build_tgsi_context * bld_base)
1385 {
1386 struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
1387 struct gallivm_state *gallivm = bld_base->base.gallivm;
1388 struct tgsi_shader_info *info = &si_shader_ctx->shader->selector->info;
1389 struct si_shader_output_values *outputs = NULL;
1390 int i,j;
1391
1392 outputs = MALLOC(info->num_outputs * sizeof(outputs[0]));
1393
1394 for (i = 0; i < info->num_outputs; i++) {
1395 outputs[i].name = info->output_semantic_name[i];
1396 outputs[i].sid = info->output_semantic_index[i];
1397
1398 for (j = 0; j < 4; j++)
1399 outputs[i].values[j] =
1400 LLVMBuildLoad(gallivm->builder,
1401 si_shader_ctx->radeon_bld.soa.outputs[i][j],
1402 "");
1403 }
1404
1405 si_llvm_export_vs(bld_base, outputs, info->num_outputs);
1406 FREE(outputs);
1407 }
1408
1409 static void si_llvm_emit_fs_epilogue(struct lp_build_tgsi_context * bld_base)
1410 {
1411 struct si_shader_context * si_shader_ctx = si_shader_context(bld_base);
1412 struct si_shader * shader = si_shader_ctx->shader;
1413 struct lp_build_context * base = &bld_base->base;
1414 struct lp_build_context * uint = &bld_base->uint_bld;
1415 struct tgsi_shader_info *info = &shader->selector->info;
1416 LLVMValueRef args[9];
1417 LLVMValueRef last_args[9] = { 0 };
1418 int depth_index = -1, stencil_index = -1, samplemask_index = -1;
1419 int i;
1420
1421 for (i = 0; i < info->num_outputs; i++) {
1422 unsigned semantic_name = info->output_semantic_name[i];
1423 unsigned semantic_index = info->output_semantic_index[i];
1424 unsigned target;
1425 LLVMValueRef alpha_ptr;
1426
1427 /* Select the correct target */
1428 switch (semantic_name) {
1429 case TGSI_SEMANTIC_POSITION:
1430 depth_index = i;
1431 continue;
1432 case TGSI_SEMANTIC_STENCIL:
1433 stencil_index = i;
1434 continue;
1435 case TGSI_SEMANTIC_SAMPLEMASK:
1436 samplemask_index = i;
1437 continue;
1438 case TGSI_SEMANTIC_COLOR:
1439 target = V_008DFC_SQ_EXP_MRT + semantic_index;
1440 alpha_ptr = si_shader_ctx->radeon_bld.soa.outputs[i][3];
1441
1442 if (si_shader_ctx->shader->key.ps.alpha_to_one)
1443 LLVMBuildStore(base->gallivm->builder,
1444 base->one, alpha_ptr);
1445
1446 if (semantic_index == 0 &&
1447 si_shader_ctx->shader->key.ps.alpha_func != PIPE_FUNC_ALWAYS)
1448 si_alpha_test(bld_base, alpha_ptr);
1449
1450 if (si_shader_ctx->shader->key.ps.poly_line_smoothing)
1451 si_scale_alpha_by_sample_mask(bld_base, alpha_ptr);
1452 break;
1453 default:
1454 target = 0;
1455 fprintf(stderr,
1456 "Warning: SI unhandled fs output type:%d\n",
1457 semantic_name);
1458 }
1459
1460 si_llvm_init_export_args_load(bld_base,
1461 si_shader_ctx->radeon_bld.soa.outputs[i],
1462 target, args);
1463
1464 if (semantic_name == TGSI_SEMANTIC_COLOR) {
1465 /* If there is an export instruction waiting to be emitted, do so now. */
1466 if (last_args[0]) {
1467 lp_build_intrinsic(base->gallivm->builder,
1468 "llvm.SI.export",
1469 LLVMVoidTypeInContext(base->gallivm->context),
1470 last_args, 9);
1471 }
1472
1473 /* This instruction will be emitted at the end of the shader. */
1474 memcpy(last_args, args, sizeof(args));
1475
1476 /* Handle FS_COLOR0_WRITES_ALL_CBUFS. */
1477 if (shader->selector->info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS] &&
1478 semantic_index == 0 &&
1479 si_shader_ctx->shader->key.ps.last_cbuf > 0) {
1480 for (int c = 1; c <= si_shader_ctx->shader->key.ps.last_cbuf; c++) {
1481 si_llvm_init_export_args_load(bld_base,
1482 si_shader_ctx->radeon_bld.soa.outputs[i],
1483 V_008DFC_SQ_EXP_MRT + c, args);
1484 lp_build_intrinsic(base->gallivm->builder,
1485 "llvm.SI.export",
1486 LLVMVoidTypeInContext(base->gallivm->context),
1487 args, 9);
1488 }
1489 }
1490 } else {
1491 lp_build_intrinsic(base->gallivm->builder,
1492 "llvm.SI.export",
1493 LLVMVoidTypeInContext(base->gallivm->context),
1494 args, 9);
1495 }
1496 }
1497
1498 if (depth_index >= 0 || stencil_index >= 0 || samplemask_index >= 0) {
1499 LLVMValueRef out_ptr;
1500 unsigned mask = 0;
1501
1502 /* Specify the target we are exporting */
1503 args[3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_MRTZ);
1504
1505 args[5] = base->zero; /* R, depth */
1506 args[6] = base->zero; /* G, stencil test value[0:7], stencil op value[8:15] */
1507 args[7] = base->zero; /* B, sample mask */
1508 args[8] = base->zero; /* A, alpha to mask */
1509
1510 if (depth_index >= 0) {
1511 out_ptr = si_shader_ctx->radeon_bld.soa.outputs[depth_index][2];
1512 args[5] = LLVMBuildLoad(base->gallivm->builder, out_ptr, "");
1513 mask |= 0x1;
1514 si_shader_ctx->shader->db_shader_control |= S_02880C_Z_EXPORT_ENABLE(1);
1515 }
1516
1517 if (stencil_index >= 0) {
1518 out_ptr = si_shader_ctx->radeon_bld.soa.outputs[stencil_index][1];
1519 args[6] = LLVMBuildLoad(base->gallivm->builder, out_ptr, "");
1520 mask |= 0x2;
1521 si_shader_ctx->shader->db_shader_control |=
1522 S_02880C_STENCIL_TEST_VAL_EXPORT_ENABLE(1);
1523 }
1524
1525 if (samplemask_index >= 0) {
1526 out_ptr = si_shader_ctx->radeon_bld.soa.outputs[samplemask_index][0];
1527 args[7] = LLVMBuildLoad(base->gallivm->builder, out_ptr, "");
1528 mask |= 0x4;
1529 si_shader_ctx->shader->db_shader_control |= S_02880C_MASK_EXPORT_ENABLE(1);
1530 }
1531
1532 /* SI (except OLAND) has a bug that it only looks
1533 * at the X writemask component. */
1534 if (si_shader_ctx->screen->b.chip_class == SI &&
1535 si_shader_ctx->screen->b.family != CHIP_OLAND)
1536 mask |= 0x1;
1537
1538 if (samplemask_index >= 0)
1539 si_shader_ctx->shader->spi_shader_z_format = V_028710_SPI_SHADER_32_ABGR;
1540 else if (stencil_index >= 0)
1541 si_shader_ctx->shader->spi_shader_z_format = V_028710_SPI_SHADER_32_GR;
1542 else
1543 si_shader_ctx->shader->spi_shader_z_format = V_028710_SPI_SHADER_32_R;
1544
1545 /* Specify which components to enable */
1546 args[0] = lp_build_const_int32(base->gallivm, mask);
1547
1548 args[1] =
1549 args[2] =
1550 args[4] = uint->zero;
1551
1552 if (last_args[0])
1553 lp_build_intrinsic(base->gallivm->builder,
1554 "llvm.SI.export",
1555 LLVMVoidTypeInContext(base->gallivm->context),
1556 args, 9);
1557 else
1558 memcpy(last_args, args, sizeof(args));
1559 }
1560
1561 if (!last_args[0]) {
1562 /* Specify which components to enable */
1563 last_args[0] = lp_build_const_int32(base->gallivm, 0x0);
1564
1565 /* Specify the target we are exporting */
1566 last_args[3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_MRT);
1567
1568 /* Set COMPR flag to zero to export data as 32-bit */
1569 last_args[4] = uint->zero;
1570
1571 /* dummy bits */
1572 last_args[5]= uint->zero;
1573 last_args[6]= uint->zero;
1574 last_args[7]= uint->zero;
1575 last_args[8]= uint->zero;
1576 }
1577
1578 /* Specify whether the EXEC mask represents the valid mask */
1579 last_args[1] = uint->one;
1580
1581 /* Specify that this is the last export */
1582 last_args[2] = lp_build_const_int32(base->gallivm, 1);
1583
1584 lp_build_intrinsic(base->gallivm->builder,
1585 "llvm.SI.export",
1586 LLVMVoidTypeInContext(base->gallivm->context),
1587 last_args, 9);
1588 }
1589
1590 static void build_tex_intrinsic(const struct lp_build_tgsi_action * action,
1591 struct lp_build_tgsi_context * bld_base,
1592 struct lp_build_emit_data * emit_data);
1593
1594 static bool tgsi_is_shadow_sampler(unsigned target)
1595 {
1596 return target == TGSI_TEXTURE_SHADOW1D ||
1597 target == TGSI_TEXTURE_SHADOW1D_ARRAY ||
1598 target == TGSI_TEXTURE_SHADOW2D ||
1599 target == TGSI_TEXTURE_SHADOW2D_ARRAY ||
1600 target == TGSI_TEXTURE_SHADOWCUBE ||
1601 target == TGSI_TEXTURE_SHADOWCUBE_ARRAY ||
1602 target == TGSI_TEXTURE_SHADOWRECT;
1603 }
1604
1605 static const struct lp_build_tgsi_action tex_action;
1606
1607 static void tex_fetch_args(
1608 struct lp_build_tgsi_context * bld_base,
1609 struct lp_build_emit_data * emit_data)
1610 {
1611 struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
1612 struct gallivm_state *gallivm = bld_base->base.gallivm;
1613 const struct tgsi_full_instruction * inst = emit_data->inst;
1614 unsigned opcode = inst->Instruction.Opcode;
1615 unsigned target = inst->Texture.Texture;
1616 LLVMValueRef coords[5];
1617 LLVMValueRef address[16];
1618 int ref_pos;
1619 unsigned num_coords = tgsi_util_get_texture_coord_dim(target, &ref_pos);
1620 unsigned count = 0;
1621 unsigned chan;
1622 unsigned sampler_src = emit_data->inst->Instruction.NumSrcRegs - 1;
1623 unsigned sampler_index = emit_data->inst->Src[sampler_src].Register.Index;
1624 bool has_offset = HAVE_LLVM >= 0x0305 ? inst->Texture.NumOffsets > 0 : false;
1625
1626 if (target == TGSI_TEXTURE_BUFFER) {
1627 LLVMTypeRef i128 = LLVMIntTypeInContext(gallivm->context, 128);
1628 LLVMTypeRef v2i128 = LLVMVectorType(i128, 2);
1629 LLVMTypeRef i8 = LLVMInt8TypeInContext(gallivm->context);
1630 LLVMTypeRef v16i8 = LLVMVectorType(i8, 16);
1631
1632 /* Bitcast and truncate v8i32 to v16i8. */
1633 LLVMValueRef res = si_shader_ctx->resources[sampler_index];
1634 res = LLVMBuildBitCast(gallivm->builder, res, v2i128, "");
1635 res = LLVMBuildExtractElement(gallivm->builder, res, bld_base->uint_bld.one, "");
1636 res = LLVMBuildBitCast(gallivm->builder, res, v16i8, "");
1637
1638 emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4);
1639 emit_data->args[0] = res;
1640 emit_data->args[1] = bld_base->uint_bld.zero;
1641 emit_data->args[2] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, 0);
1642 emit_data->arg_count = 3;
1643 return;
1644 }
1645
1646 /* Fetch and project texture coordinates */
1647 coords[3] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_W);
1648 for (chan = 0; chan < 3; chan++ ) {
1649 coords[chan] = lp_build_emit_fetch(bld_base,
1650 emit_data->inst, 0,
1651 chan);
1652 if (opcode == TGSI_OPCODE_TXP)
1653 coords[chan] = lp_build_emit_llvm_binary(bld_base,
1654 TGSI_OPCODE_DIV,
1655 coords[chan],
1656 coords[3]);
1657 }
1658
1659 if (opcode == TGSI_OPCODE_TXP)
1660 coords[3] = bld_base->base.one;
1661
1662 /* Pack offsets. */
1663 if (has_offset && opcode != TGSI_OPCODE_TXF) {
1664 /* The offsets are six-bit signed integers packed like this:
1665 * X=[5:0], Y=[13:8], and Z=[21:16].
1666 */
1667 LLVMValueRef offset[3], pack;
1668
1669 assert(inst->Texture.NumOffsets == 1);
1670
1671 for (chan = 0; chan < 3; chan++) {
1672 offset[chan] = lp_build_emit_fetch_texoffset(bld_base,
1673 emit_data->inst, 0, chan);
1674 offset[chan] = LLVMBuildAnd(gallivm->builder, offset[chan],
1675 lp_build_const_int32(gallivm, 0x3f), "");
1676 if (chan)
1677 offset[chan] = LLVMBuildShl(gallivm->builder, offset[chan],
1678 lp_build_const_int32(gallivm, chan*8), "");
1679 }
1680
1681 pack = LLVMBuildOr(gallivm->builder, offset[0], offset[1], "");
1682 pack = LLVMBuildOr(gallivm->builder, pack, offset[2], "");
1683 address[count++] = pack;
1684 }
1685
1686 /* Pack LOD bias value */
1687 if (opcode == TGSI_OPCODE_TXB)
1688 address[count++] = coords[3];
1689 if (opcode == TGSI_OPCODE_TXB2)
1690 address[count++] = lp_build_emit_fetch(bld_base, inst, 1, 0);
1691
1692 /* Pack depth comparison value */
1693 if (tgsi_is_shadow_sampler(target) && opcode != TGSI_OPCODE_LODQ) {
1694 if (target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
1695 address[count++] = lp_build_emit_fetch(bld_base, inst, 1, 0);
1696 } else {
1697 assert(ref_pos >= 0);
1698 address[count++] = coords[ref_pos];
1699 }
1700 }
1701
1702 if (target == TGSI_TEXTURE_CUBE ||
1703 target == TGSI_TEXTURE_CUBE_ARRAY ||
1704 target == TGSI_TEXTURE_SHADOWCUBE ||
1705 target == TGSI_TEXTURE_SHADOWCUBE_ARRAY)
1706 radeon_llvm_emit_prepare_cube_coords(bld_base, emit_data, coords);
1707
1708 /* Pack user derivatives */
1709 if (opcode == TGSI_OPCODE_TXD) {
1710 int num_deriv_channels, param;
1711
1712 switch (target) {
1713 case TGSI_TEXTURE_3D:
1714 num_deriv_channels = 3;
1715 break;
1716 case TGSI_TEXTURE_2D:
1717 case TGSI_TEXTURE_SHADOW2D:
1718 case TGSI_TEXTURE_RECT:
1719 case TGSI_TEXTURE_SHADOWRECT:
1720 case TGSI_TEXTURE_2D_ARRAY:
1721 case TGSI_TEXTURE_SHADOW2D_ARRAY:
1722 case TGSI_TEXTURE_CUBE:
1723 case TGSI_TEXTURE_SHADOWCUBE:
1724 case TGSI_TEXTURE_CUBE_ARRAY:
1725 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
1726 num_deriv_channels = 2;
1727 break;
1728 case TGSI_TEXTURE_1D:
1729 case TGSI_TEXTURE_SHADOW1D:
1730 case TGSI_TEXTURE_1D_ARRAY:
1731 case TGSI_TEXTURE_SHADOW1D_ARRAY:
1732 num_deriv_channels = 1;
1733 break;
1734 default:
1735 assert(0); /* no other targets are valid here */
1736 }
1737
1738 for (param = 1; param <= 2; param++)
1739 for (chan = 0; chan < num_deriv_channels; chan++)
1740 address[count++] = lp_build_emit_fetch(bld_base, inst, param, chan);
1741 }
1742
1743 /* Pack texture coordinates */
1744 address[count++] = coords[0];
1745 if (num_coords > 1)
1746 address[count++] = coords[1];
1747 if (num_coords > 2)
1748 address[count++] = coords[2];
1749
1750 /* Pack LOD or sample index */
1751 if (opcode == TGSI_OPCODE_TXL || opcode == TGSI_OPCODE_TXF)
1752 address[count++] = coords[3];
1753 else if (opcode == TGSI_OPCODE_TXL2)
1754 address[count++] = lp_build_emit_fetch(bld_base, inst, 1, 0);
1755
1756 if (count > 16) {
1757 assert(!"Cannot handle more than 16 texture address parameters");
1758 count = 16;
1759 }
1760
1761 for (chan = 0; chan < count; chan++ ) {
1762 address[chan] = LLVMBuildBitCast(gallivm->builder,
1763 address[chan],
1764 LLVMInt32TypeInContext(gallivm->context),
1765 "");
1766 }
1767
1768 /* Adjust the sample index according to FMASK.
1769 *
1770 * For uncompressed MSAA surfaces, FMASK should return 0x76543210,
1771 * which is the identity mapping. Each nibble says which physical sample
1772 * should be fetched to get that sample.
1773 *
1774 * For example, 0x11111100 means there are only 2 samples stored and
1775 * the second sample covers 3/4 of the pixel. When reading samples 0
1776 * and 1, return physical sample 0 (determined by the first two 0s
1777 * in FMASK), otherwise return physical sample 1.
1778 *
1779 * The sample index should be adjusted as follows:
1780 * sample_index = (fmask >> (sample_index * 4)) & 0xF;
1781 */
1782 if (target == TGSI_TEXTURE_2D_MSAA ||
1783 target == TGSI_TEXTURE_2D_ARRAY_MSAA) {
1784 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1785 struct lp_build_emit_data txf_emit_data = *emit_data;
1786 LLVMValueRef txf_address[4];
1787 unsigned txf_count = count;
1788 struct tgsi_full_instruction inst = {};
1789
1790 memcpy(txf_address, address, sizeof(txf_address));
1791
1792 if (target == TGSI_TEXTURE_2D_MSAA) {
1793 txf_address[2] = bld_base->uint_bld.zero;
1794 }
1795 txf_address[3] = bld_base->uint_bld.zero;
1796
1797 /* Pad to a power-of-two size. */
1798 while (txf_count < util_next_power_of_two(txf_count))
1799 txf_address[txf_count++] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
1800
1801 /* Read FMASK using TXF. */
1802 inst.Instruction.Opcode = TGSI_OPCODE_TXF;
1803 inst.Texture.Texture = target == TGSI_TEXTURE_2D_MSAA ? TGSI_TEXTURE_2D : TGSI_TEXTURE_2D_ARRAY;
1804 txf_emit_data.inst = &inst;
1805 txf_emit_data.chan = 0;
1806 txf_emit_data.dst_type = LLVMVectorType(
1807 LLVMInt32TypeInContext(gallivm->context), 4);
1808 txf_emit_data.args[0] = lp_build_gather_values(gallivm, txf_address, txf_count);
1809 txf_emit_data.args[1] = si_shader_ctx->resources[SI_FMASK_TEX_OFFSET + sampler_index];
1810 txf_emit_data.args[2] = lp_build_const_int32(gallivm, inst.Texture.Texture);
1811 txf_emit_data.arg_count = 3;
1812
1813 build_tex_intrinsic(&tex_action, bld_base, &txf_emit_data);
1814
1815 /* Initialize some constants. */
1816 LLVMValueRef four = LLVMConstInt(uint_bld->elem_type, 4, 0);
1817 LLVMValueRef F = LLVMConstInt(uint_bld->elem_type, 0xF, 0);
1818
1819 /* Apply the formula. */
1820 LLVMValueRef fmask =
1821 LLVMBuildExtractElement(gallivm->builder,
1822 txf_emit_data.output[0],
1823 uint_bld->zero, "");
1824
1825 unsigned sample_chan = target == TGSI_TEXTURE_2D_MSAA ? 2 : 3;
1826
1827 LLVMValueRef sample_index4 =
1828 LLVMBuildMul(gallivm->builder, address[sample_chan], four, "");
1829
1830 LLVMValueRef shifted_fmask =
1831 LLVMBuildLShr(gallivm->builder, fmask, sample_index4, "");
1832
1833 LLVMValueRef final_sample =
1834 LLVMBuildAnd(gallivm->builder, shifted_fmask, F, "");
1835
1836 /* Don't rewrite the sample index if WORD1.DATA_FORMAT of the FMASK
1837 * resource descriptor is 0 (invalid),
1838 */
1839 LLVMValueRef fmask_desc =
1840 LLVMBuildBitCast(gallivm->builder,
1841 si_shader_ctx->resources[SI_FMASK_TEX_OFFSET + sampler_index],
1842 LLVMVectorType(uint_bld->elem_type, 8), "");
1843
1844 LLVMValueRef fmask_word1 =
1845 LLVMBuildExtractElement(gallivm->builder, fmask_desc,
1846 uint_bld->one, "");
1847
1848 LLVMValueRef word1_is_nonzero =
1849 LLVMBuildICmp(gallivm->builder, LLVMIntNE,
1850 fmask_word1, uint_bld->zero, "");
1851
1852 /* Replace the MSAA sample index. */
1853 address[sample_chan] =
1854 LLVMBuildSelect(gallivm->builder, word1_is_nonzero,
1855 final_sample, address[sample_chan], "");
1856 }
1857
1858 /* Resource */
1859 emit_data->args[1] = si_shader_ctx->resources[sampler_index];
1860
1861 if (opcode == TGSI_OPCODE_TXF) {
1862 /* add tex offsets */
1863 if (inst->Texture.NumOffsets) {
1864 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1865 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
1866 const struct tgsi_texture_offset * off = inst->TexOffsets;
1867
1868 assert(inst->Texture.NumOffsets == 1);
1869
1870 switch (target) {
1871 case TGSI_TEXTURE_3D:
1872 address[2] = lp_build_add(uint_bld, address[2],
1873 bld->immediates[off->Index][off->SwizzleZ]);
1874 /* fall through */
1875 case TGSI_TEXTURE_2D:
1876 case TGSI_TEXTURE_SHADOW2D:
1877 case TGSI_TEXTURE_RECT:
1878 case TGSI_TEXTURE_SHADOWRECT:
1879 case TGSI_TEXTURE_2D_ARRAY:
1880 case TGSI_TEXTURE_SHADOW2D_ARRAY:
1881 address[1] =
1882 lp_build_add(uint_bld, address[1],
1883 bld->immediates[off->Index][off->SwizzleY]);
1884 /* fall through */
1885 case TGSI_TEXTURE_1D:
1886 case TGSI_TEXTURE_SHADOW1D:
1887 case TGSI_TEXTURE_1D_ARRAY:
1888 case TGSI_TEXTURE_SHADOW1D_ARRAY:
1889 address[0] =
1890 lp_build_add(uint_bld, address[0],
1891 bld->immediates[off->Index][off->SwizzleX]);
1892 break;
1893 /* texture offsets do not apply to other texture targets */
1894 }
1895 }
1896
1897 emit_data->args[2] = lp_build_const_int32(gallivm, target);
1898 emit_data->arg_count = 3;
1899
1900 emit_data->dst_type = LLVMVectorType(
1901 LLVMInt32TypeInContext(gallivm->context),
1902 4);
1903 } else if (opcode == TGSI_OPCODE_TG4 ||
1904 opcode == TGSI_OPCODE_LODQ ||
1905 has_offset) {
1906 unsigned is_array = target == TGSI_TEXTURE_1D_ARRAY ||
1907 target == TGSI_TEXTURE_SHADOW1D_ARRAY ||
1908 target == TGSI_TEXTURE_2D_ARRAY ||
1909 target == TGSI_TEXTURE_SHADOW2D_ARRAY ||
1910 target == TGSI_TEXTURE_CUBE_ARRAY ||
1911 target == TGSI_TEXTURE_SHADOWCUBE_ARRAY;
1912 unsigned is_rect = target == TGSI_TEXTURE_RECT;
1913 unsigned dmask = 0xf;
1914
1915 if (opcode == TGSI_OPCODE_TG4) {
1916 unsigned gather_comp = 0;
1917
1918 /* DMASK was repurposed for GATHER4. 4 components are always
1919 * returned and DMASK works like a swizzle - it selects
1920 * the component to fetch. The only valid DMASK values are
1921 * 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
1922 * (red,red,red,red) etc.) The ISA document doesn't mention
1923 * this.
1924 */
1925
1926 /* Get the component index from src1.x for Gather4. */
1927 if (!tgsi_is_shadow_sampler(target)) {
1928 LLVMValueRef (*imms)[4] = lp_soa_context(bld_base)->immediates;
1929 LLVMValueRef comp_imm;
1930 struct tgsi_src_register src1 = inst->Src[1].Register;
1931
1932 assert(src1.File == TGSI_FILE_IMMEDIATE);
1933
1934 comp_imm = imms[src1.Index][src1.SwizzleX];
1935 gather_comp = LLVMConstIntGetZExtValue(comp_imm);
1936 gather_comp = CLAMP(gather_comp, 0, 3);
1937 }
1938
1939 dmask = 1 << gather_comp;
1940 }
1941
1942 emit_data->args[2] = si_shader_ctx->samplers[sampler_index];
1943 emit_data->args[3] = lp_build_const_int32(gallivm, dmask);
1944 emit_data->args[4] = lp_build_const_int32(gallivm, is_rect); /* unorm */
1945 emit_data->args[5] = lp_build_const_int32(gallivm, 0); /* r128 */
1946 emit_data->args[6] = lp_build_const_int32(gallivm, is_array); /* da */
1947 emit_data->args[7] = lp_build_const_int32(gallivm, 0); /* glc */
1948 emit_data->args[8] = lp_build_const_int32(gallivm, 0); /* slc */
1949 emit_data->args[9] = lp_build_const_int32(gallivm, 0); /* tfe */
1950 emit_data->args[10] = lp_build_const_int32(gallivm, 0); /* lwe */
1951
1952 emit_data->arg_count = 11;
1953
1954 emit_data->dst_type = LLVMVectorType(
1955 LLVMFloatTypeInContext(gallivm->context),
1956 4);
1957 } else {
1958 emit_data->args[2] = si_shader_ctx->samplers[sampler_index];
1959 emit_data->args[3] = lp_build_const_int32(gallivm, target);
1960 emit_data->arg_count = 4;
1961
1962 emit_data->dst_type = LLVMVectorType(
1963 LLVMFloatTypeInContext(gallivm->context),
1964 4);
1965 }
1966
1967 /* The fetch opcode has been converted to a 2D array fetch.
1968 * This simplifies the LLVM backend. */
1969 if (target == TGSI_TEXTURE_CUBE_ARRAY)
1970 target = TGSI_TEXTURE_2D_ARRAY;
1971 else if (target == TGSI_TEXTURE_SHADOWCUBE_ARRAY)
1972 target = TGSI_TEXTURE_SHADOW2D_ARRAY;
1973
1974 /* Pad to power of two vector */
1975 while (count < util_next_power_of_two(count))
1976 address[count++] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
1977
1978 emit_data->args[0] = lp_build_gather_values(gallivm, address, count);
1979 }
1980
1981 static void build_tex_intrinsic(const struct lp_build_tgsi_action * action,
1982 struct lp_build_tgsi_context * bld_base,
1983 struct lp_build_emit_data * emit_data)
1984 {
1985 struct lp_build_context * base = &bld_base->base;
1986 unsigned opcode = emit_data->inst->Instruction.Opcode;
1987 unsigned target = emit_data->inst->Texture.Texture;
1988 char intr_name[127];
1989 bool has_offset = HAVE_LLVM >= 0x0305 ?
1990 emit_data->inst->Texture.NumOffsets > 0 : false;
1991
1992 if (target == TGSI_TEXTURE_BUFFER) {
1993 emit_data->output[emit_data->chan] = build_intrinsic(
1994 base->gallivm->builder,
1995 "llvm.SI.vs.load.input", emit_data->dst_type,
1996 emit_data->args, emit_data->arg_count,
1997 LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
1998 return;
1999 }
2000
2001 if (opcode == TGSI_OPCODE_TG4 ||
2002 opcode == TGSI_OPCODE_LODQ ||
2003 (opcode != TGSI_OPCODE_TXF && has_offset)) {
2004 bool is_shadow = tgsi_is_shadow_sampler(target);
2005 const char *name = "llvm.SI.image.sample";
2006 const char *infix = "";
2007
2008 switch (opcode) {
2009 case TGSI_OPCODE_TEX:
2010 case TGSI_OPCODE_TEX2:
2011 case TGSI_OPCODE_TXP:
2012 break;
2013 case TGSI_OPCODE_TXB:
2014 case TGSI_OPCODE_TXB2:
2015 infix = ".b";
2016 break;
2017 case TGSI_OPCODE_TXL:
2018 case TGSI_OPCODE_TXL2:
2019 infix = ".l";
2020 break;
2021 case TGSI_OPCODE_TXD:
2022 infix = ".d";
2023 break;
2024 case TGSI_OPCODE_TG4:
2025 name = "llvm.SI.gather4";
2026 break;
2027 case TGSI_OPCODE_LODQ:
2028 name = "llvm.SI.getlod";
2029 is_shadow = false;
2030 has_offset = false;
2031 break;
2032 default:
2033 assert(0);
2034 return;
2035 }
2036
2037 /* Add the type and suffixes .c, .o if needed. */
2038 sprintf(intr_name, "%s%s%s%s.v%ui32", name,
2039 is_shadow ? ".c" : "", infix, has_offset ? ".o" : "",
2040 LLVMGetVectorSize(LLVMTypeOf(emit_data->args[0])));
2041
2042 emit_data->output[emit_data->chan] = build_intrinsic(
2043 base->gallivm->builder, intr_name, emit_data->dst_type,
2044 emit_data->args, emit_data->arg_count,
2045 LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
2046 } else {
2047 LLVMTypeRef i8, v16i8, v32i8;
2048 const char *name;
2049
2050 switch (opcode) {
2051 case TGSI_OPCODE_TEX:
2052 case TGSI_OPCODE_TEX2:
2053 case TGSI_OPCODE_TXP:
2054 name = "llvm.SI.sample";
2055 break;
2056 case TGSI_OPCODE_TXB:
2057 case TGSI_OPCODE_TXB2:
2058 name = "llvm.SI.sampleb";
2059 break;
2060 case TGSI_OPCODE_TXD:
2061 name = "llvm.SI.sampled";
2062 break;
2063 case TGSI_OPCODE_TXF:
2064 name = "llvm.SI.imageload";
2065 break;
2066 case TGSI_OPCODE_TXL:
2067 case TGSI_OPCODE_TXL2:
2068 name = "llvm.SI.samplel";
2069 break;
2070 default:
2071 assert(0);
2072 return;
2073 }
2074
2075 i8 = LLVMInt8TypeInContext(base->gallivm->context);
2076 v16i8 = LLVMVectorType(i8, 16);
2077 v32i8 = LLVMVectorType(i8, 32);
2078
2079 emit_data->args[1] = LLVMBuildBitCast(base->gallivm->builder,
2080 emit_data->args[1], v32i8, "");
2081 if (opcode != TGSI_OPCODE_TXF) {
2082 emit_data->args[2] = LLVMBuildBitCast(base->gallivm->builder,
2083 emit_data->args[2], v16i8, "");
2084 }
2085
2086 sprintf(intr_name, "%s.v%ui32", name,
2087 LLVMGetVectorSize(LLVMTypeOf(emit_data->args[0])));
2088
2089 emit_data->output[emit_data->chan] = build_intrinsic(
2090 base->gallivm->builder, intr_name, emit_data->dst_type,
2091 emit_data->args, emit_data->arg_count,
2092 LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
2093 }
2094 }
2095
2096 static void txq_fetch_args(
2097 struct lp_build_tgsi_context * bld_base,
2098 struct lp_build_emit_data * emit_data)
2099 {
2100 struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
2101 const struct tgsi_full_instruction *inst = emit_data->inst;
2102 struct gallivm_state *gallivm = bld_base->base.gallivm;
2103 unsigned target = inst->Texture.Texture;
2104
2105 if (target == TGSI_TEXTURE_BUFFER) {
2106 LLVMTypeRef i32 = LLVMInt32TypeInContext(gallivm->context);
2107 LLVMTypeRef v8i32 = LLVMVectorType(i32, 8);
2108
2109 /* Read the size from the buffer descriptor directly. */
2110 LLVMValueRef size = si_shader_ctx->resources[inst->Src[1].Register.Index];
2111 size = LLVMBuildBitCast(gallivm->builder, size, v8i32, "");
2112 size = LLVMBuildExtractElement(gallivm->builder, size,
2113 lp_build_const_int32(gallivm, 6), "");
2114 emit_data->args[0] = size;
2115 return;
2116 }
2117
2118 /* Mip level */
2119 emit_data->args[0] = lp_build_emit_fetch(bld_base, inst, 0, TGSI_CHAN_X);
2120
2121 /* Resource */
2122 emit_data->args[1] = si_shader_ctx->resources[inst->Src[1].Register.Index];
2123
2124 /* Texture target */
2125 if (target == TGSI_TEXTURE_CUBE_ARRAY ||
2126 target == TGSI_TEXTURE_SHADOWCUBE_ARRAY)
2127 target = TGSI_TEXTURE_2D_ARRAY;
2128
2129 emit_data->args[2] = lp_build_const_int32(bld_base->base.gallivm,
2130 target);
2131
2132 emit_data->arg_count = 3;
2133
2134 emit_data->dst_type = LLVMVectorType(
2135 LLVMInt32TypeInContext(bld_base->base.gallivm->context),
2136 4);
2137 }
2138
2139 static void build_txq_intrinsic(const struct lp_build_tgsi_action * action,
2140 struct lp_build_tgsi_context * bld_base,
2141 struct lp_build_emit_data * emit_data)
2142 {
2143 unsigned target = emit_data->inst->Texture.Texture;
2144
2145 if (target == TGSI_TEXTURE_BUFFER) {
2146 /* Just return the buffer size. */
2147 emit_data->output[emit_data->chan] = emit_data->args[0];
2148 return;
2149 }
2150
2151 build_tgsi_intrinsic_nomem(action, bld_base, emit_data);
2152
2153 /* Divide the number of layers by 6 to get the number of cubes. */
2154 if (target == TGSI_TEXTURE_CUBE_ARRAY ||
2155 target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
2156 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
2157 LLVMValueRef two = lp_build_const_int32(bld_base->base.gallivm, 2);
2158 LLVMValueRef six = lp_build_const_int32(bld_base->base.gallivm, 6);
2159
2160 LLVMValueRef v4 = emit_data->output[emit_data->chan];
2161 LLVMValueRef z = LLVMBuildExtractElement(builder, v4, two, "");
2162 z = LLVMBuildSDiv(builder, z, six, "");
2163
2164 emit_data->output[emit_data->chan] =
2165 LLVMBuildInsertElement(builder, v4, z, two, "");
2166 }
2167 }
2168
2169 static void si_llvm_emit_ddxy(
2170 const struct lp_build_tgsi_action * action,
2171 struct lp_build_tgsi_context * bld_base,
2172 struct lp_build_emit_data * emit_data)
2173 {
2174 struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
2175 struct gallivm_state *gallivm = bld_base->base.gallivm;
2176 struct lp_build_context * base = &bld_base->base;
2177 const struct tgsi_full_instruction *inst = emit_data->inst;
2178 unsigned opcode = inst->Instruction.Opcode;
2179 LLVMValueRef indices[2];
2180 LLVMValueRef store_ptr, load_ptr0, load_ptr1;
2181 LLVMValueRef tl, trbl, result[4];
2182 LLVMTypeRef i32;
2183 unsigned swizzle[4];
2184 unsigned c;
2185
2186 i32 = LLVMInt32TypeInContext(gallivm->context);
2187
2188 indices[0] = bld_base->uint_bld.zero;
2189 indices[1] = build_intrinsic(gallivm->builder, "llvm.SI.tid", i32,
2190 NULL, 0, LLVMReadNoneAttribute);
2191 store_ptr = LLVMBuildGEP(gallivm->builder, si_shader_ctx->ddxy_lds,
2192 indices, 2, "");
2193
2194 indices[1] = LLVMBuildAnd(gallivm->builder, indices[1],
2195 lp_build_const_int32(gallivm, 0xfffffffc), "");
2196 load_ptr0 = LLVMBuildGEP(gallivm->builder, si_shader_ctx->ddxy_lds,
2197 indices, 2, "");
2198
2199 indices[1] = LLVMBuildAdd(gallivm->builder, indices[1],
2200 lp_build_const_int32(gallivm,
2201 opcode == TGSI_OPCODE_DDX ? 1 : 2),
2202 "");
2203 load_ptr1 = LLVMBuildGEP(gallivm->builder, si_shader_ctx->ddxy_lds,
2204 indices, 2, "");
2205
2206 for (c = 0; c < 4; ++c) {
2207 unsigned i;
2208
2209 swizzle[c] = tgsi_util_get_full_src_register_swizzle(&inst->Src[0], c);
2210 for (i = 0; i < c; ++i) {
2211 if (swizzle[i] == swizzle[c]) {
2212 result[c] = result[i];
2213 break;
2214 }
2215 }
2216 if (i != c)
2217 continue;
2218
2219 LLVMBuildStore(gallivm->builder,
2220 LLVMBuildBitCast(gallivm->builder,
2221 lp_build_emit_fetch(bld_base, inst, 0, c),
2222 i32, ""),
2223 store_ptr);
2224
2225 tl = LLVMBuildLoad(gallivm->builder, load_ptr0, "");
2226 tl = LLVMBuildBitCast(gallivm->builder, tl, base->elem_type, "");
2227
2228 trbl = LLVMBuildLoad(gallivm->builder, load_ptr1, "");
2229 trbl = LLVMBuildBitCast(gallivm->builder, trbl, base->elem_type, "");
2230
2231 result[c] = LLVMBuildFSub(gallivm->builder, trbl, tl, "");
2232 }
2233
2234 emit_data->output[0] = lp_build_gather_values(gallivm, result, 4);
2235 }
2236
2237 /* Emit one vertex from the geometry shader */
2238 static void si_llvm_emit_vertex(
2239 const struct lp_build_tgsi_action *action,
2240 struct lp_build_tgsi_context *bld_base,
2241 struct lp_build_emit_data *emit_data)
2242 {
2243 struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
2244 struct lp_build_context *uint = &bld_base->uint_bld;
2245 struct si_shader *shader = si_shader_ctx->shader;
2246 struct tgsi_shader_info *info = &shader->selector->info;
2247 struct gallivm_state *gallivm = bld_base->base.gallivm;
2248 LLVMTypeRef i32 = LLVMInt32TypeInContext(gallivm->context);
2249 LLVMValueRef soffset = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
2250 SI_PARAM_GS2VS_OFFSET);
2251 LLVMValueRef gs_next_vertex;
2252 LLVMValueRef can_emit, kill;
2253 LLVMValueRef args[2];
2254 unsigned chan;
2255 int i;
2256
2257 /* Write vertex attribute values to GSVS ring */
2258 gs_next_vertex = LLVMBuildLoad(gallivm->builder, si_shader_ctx->gs_next_vertex, "");
2259
2260 /* If this thread has already emitted the declared maximum number of
2261 * vertices, kill it: excessive vertex emissions are not supposed to
2262 * have any effect, and GS threads have no externally observable
2263 * effects other than emitting vertices.
2264 */
2265 can_emit = LLVMBuildICmp(gallivm->builder, LLVMIntULE, gs_next_vertex,
2266 lp_build_const_int32(gallivm,
2267 shader->selector->gs_max_out_vertices), "");
2268 kill = lp_build_select(&bld_base->base, can_emit,
2269 lp_build_const_float(gallivm, 1.0f),
2270 lp_build_const_float(gallivm, -1.0f));
2271 build_intrinsic(gallivm->builder, "llvm.AMDGPU.kill",
2272 LLVMVoidTypeInContext(gallivm->context), &kill, 1, 0);
2273
2274 for (i = 0; i < info->num_outputs; i++) {
2275 LLVMValueRef *out_ptr =
2276 si_shader_ctx->radeon_bld.soa.outputs[i];
2277
2278 for (chan = 0; chan < 4; chan++) {
2279 LLVMValueRef out_val = LLVMBuildLoad(gallivm->builder, out_ptr[chan], "");
2280 LLVMValueRef voffset =
2281 lp_build_const_int32(gallivm, (i * 4 + chan) *
2282 shader->selector->gs_max_out_vertices);
2283
2284 voffset = lp_build_add(uint, voffset, gs_next_vertex);
2285 voffset = lp_build_mul_imm(uint, voffset, 4);
2286
2287 out_val = LLVMBuildBitCast(gallivm->builder, out_val, i32, "");
2288
2289 build_tbuffer_store(si_shader_ctx,
2290 si_shader_ctx->gsvs_ring,
2291 out_val, 1,
2292 voffset, soffset, 0,
2293 V_008F0C_BUF_DATA_FORMAT_32,
2294 V_008F0C_BUF_NUM_FORMAT_UINT,
2295 1, 0, 1, 1, 0);
2296 }
2297 }
2298 gs_next_vertex = lp_build_add(uint, gs_next_vertex,
2299 lp_build_const_int32(gallivm, 1));
2300 LLVMBuildStore(gallivm->builder, gs_next_vertex, si_shader_ctx->gs_next_vertex);
2301
2302 /* Signal vertex emission */
2303 args[0] = lp_build_const_int32(gallivm, SENDMSG_GS_OP_EMIT | SENDMSG_GS);
2304 args[1] = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_GS_WAVE_ID);
2305 build_intrinsic(gallivm->builder, "llvm.SI.sendmsg",
2306 LLVMVoidTypeInContext(gallivm->context), args, 2,
2307 LLVMNoUnwindAttribute);
2308 }
2309
2310 /* Cut one primitive from the geometry shader */
2311 static void si_llvm_emit_primitive(
2312 const struct lp_build_tgsi_action *action,
2313 struct lp_build_tgsi_context *bld_base,
2314 struct lp_build_emit_data *emit_data)
2315 {
2316 struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
2317 struct gallivm_state *gallivm = bld_base->base.gallivm;
2318 LLVMValueRef args[2];
2319
2320 /* Signal primitive cut */
2321 args[0] = lp_build_const_int32(gallivm, SENDMSG_GS_OP_CUT | SENDMSG_GS);
2322 args[1] = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_GS_WAVE_ID);
2323 build_intrinsic(gallivm->builder, "llvm.SI.sendmsg",
2324 LLVMVoidTypeInContext(gallivm->context), args, 2,
2325 LLVMNoUnwindAttribute);
2326 }
2327
2328 static const struct lp_build_tgsi_action tex_action = {
2329 .fetch_args = tex_fetch_args,
2330 .emit = build_tex_intrinsic,
2331 };
2332
2333 static const struct lp_build_tgsi_action txq_action = {
2334 .fetch_args = txq_fetch_args,
2335 .emit = build_txq_intrinsic,
2336 .intr_name = "llvm.SI.resinfo"
2337 };
2338
2339 static void create_meta_data(struct si_shader_context *si_shader_ctx)
2340 {
2341 struct gallivm_state *gallivm = si_shader_ctx->radeon_bld.soa.bld_base.base.gallivm;
2342 LLVMValueRef args[3];
2343
2344 args[0] = LLVMMDStringInContext(gallivm->context, "const", 5);
2345 args[1] = 0;
2346 args[2] = lp_build_const_int32(gallivm, 1);
2347
2348 si_shader_ctx->const_md = LLVMMDNodeInContext(gallivm->context, args, 3);
2349 }
2350
2351 static LLVMTypeRef const_array(LLVMTypeRef elem_type, int num_elements)
2352 {
2353 return LLVMPointerType(LLVMArrayType(elem_type, num_elements),
2354 CONST_ADDR_SPACE);
2355 }
2356
2357 static void declare_streamout_params(struct si_shader_context *si_shader_ctx,
2358 struct pipe_stream_output_info *so,
2359 LLVMTypeRef *params, LLVMTypeRef i32,
2360 unsigned *num_params)
2361 {
2362 int i;
2363
2364 /* Streamout SGPRs. */
2365 if (so->num_outputs) {
2366 params[si_shader_ctx->param_streamout_config = (*num_params)++] = i32;
2367 params[si_shader_ctx->param_streamout_write_index = (*num_params)++] = i32;
2368 }
2369 /* A streamout buffer offset is loaded if the stride is non-zero. */
2370 for (i = 0; i < 4; i++) {
2371 if (!so->stride[i])
2372 continue;
2373
2374 params[si_shader_ctx->param_streamout_offset[i] = (*num_params)++] = i32;
2375 }
2376 }
2377
2378 static void create_function(struct si_shader_context *si_shader_ctx)
2379 {
2380 struct lp_build_tgsi_context *bld_base = &si_shader_ctx->radeon_bld.soa.bld_base;
2381 struct gallivm_state *gallivm = bld_base->base.gallivm;
2382 struct si_shader *shader = si_shader_ctx->shader;
2383 LLVMTypeRef params[SI_NUM_PARAMS], f32, i8, i32, v2i32, v3i32, v16i8, v4i32, v8i32;
2384 unsigned i, last_array_pointer, last_sgpr, num_params;
2385
2386 i8 = LLVMInt8TypeInContext(gallivm->context);
2387 i32 = LLVMInt32TypeInContext(gallivm->context);
2388 f32 = LLVMFloatTypeInContext(gallivm->context);
2389 v2i32 = LLVMVectorType(i32, 2);
2390 v3i32 = LLVMVectorType(i32, 3);
2391 v4i32 = LLVMVectorType(i32, 4);
2392 v8i32 = LLVMVectorType(i32, 8);
2393 v16i8 = LLVMVectorType(i8, 16);
2394
2395 params[SI_PARAM_RW_BUFFERS] = const_array(v16i8, SI_NUM_RW_BUFFERS);
2396 params[SI_PARAM_CONST] = const_array(v16i8, SI_NUM_CONST_BUFFERS);
2397 params[SI_PARAM_SAMPLER] = const_array(v4i32, SI_NUM_SAMPLER_STATES);
2398 params[SI_PARAM_RESOURCE] = const_array(v8i32, SI_NUM_SAMPLER_VIEWS);
2399 last_array_pointer = SI_PARAM_RESOURCE;
2400
2401 switch (si_shader_ctx->type) {
2402 case TGSI_PROCESSOR_VERTEX:
2403 params[SI_PARAM_VERTEX_BUFFER] = const_array(v16i8, SI_NUM_VERTEX_BUFFERS);
2404 last_array_pointer = SI_PARAM_VERTEX_BUFFER;
2405 params[SI_PARAM_BASE_VERTEX] = i32;
2406 params[SI_PARAM_START_INSTANCE] = i32;
2407 num_params = SI_PARAM_START_INSTANCE+1;
2408
2409 if (shader->key.vs.as_es) {
2410 params[si_shader_ctx->param_es2gs_offset = num_params++] = i32;
2411 } else {
2412 if (shader->is_gs_copy_shader) {
2413 last_array_pointer = SI_PARAM_CONST;
2414 num_params = SI_PARAM_CONST+1;
2415 }
2416
2417 /* The locations of the other parameters are assigned dynamically. */
2418 declare_streamout_params(si_shader_ctx, &shader->selector->so,
2419 params, i32, &num_params);
2420 }
2421
2422 last_sgpr = num_params-1;
2423
2424 /* VGPRs */
2425 params[si_shader_ctx->param_vertex_id = num_params++] = i32;
2426 params[num_params++] = i32; /* unused*/
2427 params[num_params++] = i32; /* unused */
2428 params[si_shader_ctx->param_instance_id = num_params++] = i32;
2429 break;
2430
2431 case TGSI_PROCESSOR_GEOMETRY:
2432 params[SI_PARAM_GS2VS_OFFSET] = i32;
2433 params[SI_PARAM_GS_WAVE_ID] = i32;
2434 last_sgpr = SI_PARAM_GS_WAVE_ID;
2435
2436 /* VGPRs */
2437 params[SI_PARAM_VTX0_OFFSET] = i32;
2438 params[SI_PARAM_VTX1_OFFSET] = i32;
2439 params[SI_PARAM_PRIMITIVE_ID] = i32;
2440 params[SI_PARAM_VTX2_OFFSET] = i32;
2441 params[SI_PARAM_VTX3_OFFSET] = i32;
2442 params[SI_PARAM_VTX4_OFFSET] = i32;
2443 params[SI_PARAM_VTX5_OFFSET] = i32;
2444 params[SI_PARAM_GS_INSTANCE_ID] = i32;
2445 num_params = SI_PARAM_GS_INSTANCE_ID+1;
2446 break;
2447
2448 case TGSI_PROCESSOR_FRAGMENT:
2449 params[SI_PARAM_ALPHA_REF] = f32;
2450 params[SI_PARAM_PRIM_MASK] = i32;
2451 last_sgpr = SI_PARAM_PRIM_MASK;
2452 params[SI_PARAM_PERSP_SAMPLE] = v2i32;
2453 params[SI_PARAM_PERSP_CENTER] = v2i32;
2454 params[SI_PARAM_PERSP_CENTROID] = v2i32;
2455 params[SI_PARAM_PERSP_PULL_MODEL] = v3i32;
2456 params[SI_PARAM_LINEAR_SAMPLE] = v2i32;
2457 params[SI_PARAM_LINEAR_CENTER] = v2i32;
2458 params[SI_PARAM_LINEAR_CENTROID] = v2i32;
2459 params[SI_PARAM_LINE_STIPPLE_TEX] = f32;
2460 params[SI_PARAM_POS_X_FLOAT] = f32;
2461 params[SI_PARAM_POS_Y_FLOAT] = f32;
2462 params[SI_PARAM_POS_Z_FLOAT] = f32;
2463 params[SI_PARAM_POS_W_FLOAT] = f32;
2464 params[SI_PARAM_FRONT_FACE] = f32;
2465 params[SI_PARAM_ANCILLARY] = i32;
2466 params[SI_PARAM_SAMPLE_COVERAGE] = f32;
2467 params[SI_PARAM_POS_FIXED_PT] = f32;
2468 num_params = SI_PARAM_POS_FIXED_PT+1;
2469 break;
2470
2471 default:
2472 assert(0 && "unimplemented shader");
2473 return;
2474 }
2475
2476 assert(num_params <= Elements(params));
2477 radeon_llvm_create_func(&si_shader_ctx->radeon_bld, params, num_params);
2478 radeon_llvm_shader_type(si_shader_ctx->radeon_bld.main_fn, si_shader_ctx->type);
2479
2480 if (shader->dx10_clamp_mode)
2481 LLVMAddTargetDependentFunctionAttr(si_shader_ctx->radeon_bld.main_fn,
2482 "enable-no-nans-fp-math", "true");
2483
2484 for (i = 0; i <= last_sgpr; ++i) {
2485 LLVMValueRef P = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, i);
2486
2487 /* We tell llvm that array inputs are passed by value to allow Sinking pass
2488 * to move load. Inputs are constant so this is fine. */
2489 if (i <= last_array_pointer)
2490 LLVMAddAttribute(P, LLVMByValAttribute);
2491 else
2492 LLVMAddAttribute(P, LLVMInRegAttribute);
2493 }
2494
2495 if (bld_base->info &&
2496 (bld_base->info->opcode_count[TGSI_OPCODE_DDX] > 0 ||
2497 bld_base->info->opcode_count[TGSI_OPCODE_DDY] > 0))
2498 si_shader_ctx->ddxy_lds =
2499 LLVMAddGlobalInAddressSpace(gallivm->module,
2500 LLVMArrayType(i32, 64),
2501 "ddxy_lds",
2502 LOCAL_ADDR_SPACE);
2503 }
2504
2505 static void preload_constants(struct si_shader_context *si_shader_ctx)
2506 {
2507 struct lp_build_tgsi_context * bld_base = &si_shader_ctx->radeon_bld.soa.bld_base;
2508 struct gallivm_state * gallivm = bld_base->base.gallivm;
2509 const struct tgsi_shader_info * info = bld_base->info;
2510 unsigned buf;
2511 LLVMValueRef ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_CONST);
2512
2513 for (buf = 0; buf < SI_NUM_CONST_BUFFERS; buf++) {
2514 unsigned i, num_const = info->const_file_max[buf] + 1;
2515
2516 if (num_const == 0)
2517 continue;
2518
2519 /* Allocate space for the constant values */
2520 si_shader_ctx->constants[buf] = CALLOC(num_const * 4, sizeof(LLVMValueRef));
2521
2522 /* Load the resource descriptor */
2523 si_shader_ctx->const_resource[buf] =
2524 build_indexed_load_const(si_shader_ctx, ptr, lp_build_const_int32(gallivm, buf));
2525
2526 /* Load the constants, we rely on the code sinking to do the rest */
2527 for (i = 0; i < num_const * 4; ++i) {
2528 si_shader_ctx->constants[buf][i] =
2529 buffer_load_const(gallivm->builder,
2530 si_shader_ctx->const_resource[buf],
2531 lp_build_const_int32(gallivm, i * 4),
2532 bld_base->base.elem_type);
2533 }
2534 }
2535 }
2536
2537 static void preload_samplers(struct si_shader_context *si_shader_ctx)
2538 {
2539 struct lp_build_tgsi_context * bld_base = &si_shader_ctx->radeon_bld.soa.bld_base;
2540 struct gallivm_state * gallivm = bld_base->base.gallivm;
2541 const struct tgsi_shader_info * info = bld_base->info;
2542
2543 unsigned i, num_samplers = info->file_max[TGSI_FILE_SAMPLER] + 1;
2544
2545 LLVMValueRef res_ptr, samp_ptr;
2546 LLVMValueRef offset;
2547
2548 if (num_samplers == 0)
2549 return;
2550
2551 res_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_RESOURCE);
2552 samp_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_SAMPLER);
2553
2554 /* Load the resources and samplers, we rely on the code sinking to do the rest */
2555 for (i = 0; i < num_samplers; ++i) {
2556 /* Resource */
2557 offset = lp_build_const_int32(gallivm, i);
2558 si_shader_ctx->resources[i] = build_indexed_load_const(si_shader_ctx, res_ptr, offset);
2559
2560 /* Sampler */
2561 offset = lp_build_const_int32(gallivm, i);
2562 si_shader_ctx->samplers[i] = build_indexed_load_const(si_shader_ctx, samp_ptr, offset);
2563
2564 /* FMASK resource */
2565 if (info->is_msaa_sampler[i]) {
2566 offset = lp_build_const_int32(gallivm, SI_FMASK_TEX_OFFSET + i);
2567 si_shader_ctx->resources[SI_FMASK_TEX_OFFSET + i] =
2568 build_indexed_load_const(si_shader_ctx, res_ptr, offset);
2569 }
2570 }
2571 }
2572
2573 static void preload_streamout_buffers(struct si_shader_context *si_shader_ctx)
2574 {
2575 struct lp_build_tgsi_context * bld_base = &si_shader_ctx->radeon_bld.soa.bld_base;
2576 struct gallivm_state * gallivm = bld_base->base.gallivm;
2577 unsigned i;
2578
2579 if (si_shader_ctx->type != TGSI_PROCESSOR_VERTEX ||
2580 si_shader_ctx->shader->key.vs.as_es ||
2581 !si_shader_ctx->shader->selector->so.num_outputs)
2582 return;
2583
2584 LLVMValueRef buf_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
2585 SI_PARAM_RW_BUFFERS);
2586
2587 /* Load the resources, we rely on the code sinking to do the rest */
2588 for (i = 0; i < 4; ++i) {
2589 if (si_shader_ctx->shader->selector->so.stride[i]) {
2590 LLVMValueRef offset = lp_build_const_int32(gallivm,
2591 SI_SO_BUF_OFFSET + i);
2592
2593 si_shader_ctx->so_buffers[i] = build_indexed_load_const(si_shader_ctx, buf_ptr, offset);
2594 }
2595 }
2596 }
2597
2598 /**
2599 * Load ESGS and GSVS ring buffer resource descriptors and save the variables
2600 * for later use.
2601 */
2602 static void preload_ring_buffers(struct si_shader_context *si_shader_ctx)
2603 {
2604 struct gallivm_state *gallivm =
2605 si_shader_ctx->radeon_bld.soa.bld_base.base.gallivm;
2606
2607 LLVMValueRef buf_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
2608 SI_PARAM_RW_BUFFERS);
2609
2610 if ((si_shader_ctx->type == TGSI_PROCESSOR_VERTEX &&
2611 si_shader_ctx->shader->key.vs.as_es) ||
2612 si_shader_ctx->type == TGSI_PROCESSOR_GEOMETRY) {
2613 LLVMValueRef offset = lp_build_const_int32(gallivm, SI_RING_ESGS);
2614
2615 si_shader_ctx->esgs_ring =
2616 build_indexed_load_const(si_shader_ctx, buf_ptr, offset);
2617 }
2618
2619 if (si_shader_ctx->type == TGSI_PROCESSOR_GEOMETRY ||
2620 si_shader_ctx->shader->is_gs_copy_shader) {
2621 LLVMValueRef offset = lp_build_const_int32(gallivm, SI_RING_GSVS);
2622
2623 si_shader_ctx->gsvs_ring =
2624 build_indexed_load_const(si_shader_ctx, buf_ptr, offset);
2625 }
2626 }
2627
2628 void si_shader_binary_read_config(const struct si_screen *sscreen,
2629 struct si_shader *shader,
2630 unsigned symbol_offset)
2631 {
2632 unsigned i;
2633 const unsigned char *config =
2634 radeon_shader_binary_config_start(&shader->binary,
2635 symbol_offset);
2636
2637 /* XXX: We may be able to emit some of these values directly rather than
2638 * extracting fields to be emitted later.
2639 */
2640
2641 for (i = 0; i < shader->binary.config_size_per_symbol; i+= 8) {
2642 unsigned reg = util_le32_to_cpu(*(uint32_t*)(config + i));
2643 unsigned value = util_le32_to_cpu(*(uint32_t*)(config + i + 4));
2644 switch (reg) {
2645 case R_00B028_SPI_SHADER_PGM_RSRC1_PS:
2646 case R_00B128_SPI_SHADER_PGM_RSRC1_VS:
2647 case R_00B228_SPI_SHADER_PGM_RSRC1_GS:
2648 case R_00B848_COMPUTE_PGM_RSRC1:
2649 shader->num_sgprs = MAX2(shader->num_sgprs, (G_00B028_SGPRS(value) + 1) * 8);
2650 shader->num_vgprs = MAX2(shader->num_vgprs, (G_00B028_VGPRS(value) + 1) * 4);
2651 shader->float_mode = G_00B028_FLOAT_MODE(value);
2652 break;
2653 case R_00B02C_SPI_SHADER_PGM_RSRC2_PS:
2654 shader->lds_size = MAX2(shader->lds_size, G_00B02C_EXTRA_LDS_SIZE(value));
2655 break;
2656 case R_00B84C_COMPUTE_PGM_RSRC2:
2657 shader->lds_size = MAX2(shader->lds_size, G_00B84C_LDS_SIZE(value));
2658 break;
2659 case R_0286CC_SPI_PS_INPUT_ENA:
2660 shader->spi_ps_input_ena = value;
2661 break;
2662 case R_0286E8_SPI_TMPRING_SIZE:
2663 case R_00B860_COMPUTE_TMPRING_SIZE:
2664 /* WAVESIZE is in units of 256 dwords. */
2665 shader->scratch_bytes_per_wave =
2666 G_00B860_WAVESIZE(value) * 256 * 4 * 1;
2667 break;
2668 default:
2669 fprintf(stderr, "Warning: Compiler emitted unknown "
2670 "config register: 0x%x\n", reg);
2671 break;
2672 }
2673 }
2674 }
2675
2676 void si_shader_apply_scratch_relocs(struct si_context *sctx,
2677 struct si_shader *shader,
2678 uint64_t scratch_va)
2679 {
2680 unsigned i;
2681 uint32_t scratch_rsrc_dword0 = scratch_va & 0xffffffff;
2682 uint32_t scratch_rsrc_dword1 =
2683 S_008F04_BASE_ADDRESS_HI(scratch_va >> 32)
2684 | S_008F04_STRIDE(shader->scratch_bytes_per_wave / 64);
2685
2686 for (i = 0 ; i < shader->binary.reloc_count; i++) {
2687 const struct radeon_shader_reloc *reloc =
2688 &shader->binary.relocs[i];
2689 if (!strcmp(scratch_rsrc_dword0_symbol, reloc->name)) {
2690 util_memcpy_cpu_to_le32(shader->binary.code + reloc->offset,
2691 &scratch_rsrc_dword0, 4);
2692 } else if (!strcmp(scratch_rsrc_dword1_symbol, reloc->name)) {
2693 util_memcpy_cpu_to_le32(shader->binary.code + reloc->offset,
2694 &scratch_rsrc_dword1, 4);
2695 }
2696 }
2697 }
2698
2699 int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader)
2700 {
2701 const struct radeon_shader_binary *binary = &shader->binary;
2702 unsigned code_size = binary->code_size + binary->rodata_size;
2703 unsigned char *ptr;
2704
2705 r600_resource_reference(&shader->bo, NULL);
2706 shader->bo = si_resource_create_custom(&sscreen->b.b,
2707 PIPE_USAGE_IMMUTABLE,
2708 code_size);
2709 if (!shader->bo)
2710 return -ENOMEM;
2711
2712 ptr = sscreen->b.ws->buffer_map(shader->bo->cs_buf, NULL,
2713 PIPE_TRANSFER_READ_WRITE);
2714 util_memcpy_cpu_to_le32(ptr, binary->code, binary->code_size);
2715 if (binary->rodata_size > 0) {
2716 ptr += binary->code_size;
2717 util_memcpy_cpu_to_le32(ptr, binary->rodata,
2718 binary->rodata_size);
2719 }
2720
2721 sscreen->b.ws->buffer_unmap(shader->bo->cs_buf);
2722 return 0;
2723 }
2724
2725 int si_shader_binary_read(struct si_screen *sscreen, struct si_shader *shader)
2726 {
2727 const struct radeon_shader_binary *binary = &shader->binary;
2728 unsigned i;
2729 bool dump = r600_can_dump_shader(&sscreen->b,
2730 shader->selector ? shader->selector->tokens : NULL);
2731
2732 si_shader_binary_read_config(sscreen, shader, 0);
2733 si_shader_binary_upload(sscreen, shader);
2734
2735 if (dump) {
2736 if (!binary->disassembled) {
2737 fprintf(stderr, "SI CODE:\n");
2738 for (i = 0; i < binary->code_size; i+=4 ) {
2739 fprintf(stderr, "@0x%x: %02x%02x%02x%02x\n", i, binary->code[i + 3],
2740 binary->code[i + 2], binary->code[i + 1],
2741 binary->code[i]);
2742 }
2743 }
2744
2745 fprintf(stderr, "*** SHADER STATS ***\n"
2746 "SGPRS: %d\nVGPRS: %d\nCode Size: %d bytes\nLDS: %d blocks\n"
2747 "Scratch: %d bytes per wave\n********************\n",
2748 shader->num_sgprs, shader->num_vgprs, binary->code_size,
2749 shader->lds_size, shader->scratch_bytes_per_wave);
2750 }
2751 return 0;
2752 }
2753
2754 int si_compile_llvm(struct si_screen *sscreen, struct si_shader *shader,
2755 LLVMTargetMachineRef tm, LLVMModuleRef mod)
2756 {
2757 int r = 0;
2758 bool dump = r600_can_dump_shader(&sscreen->b,
2759 shader->selector ? shader->selector->tokens : NULL);
2760 r = radeon_llvm_compile(mod, &shader->binary,
2761 r600_get_llvm_processor_name(sscreen->b.family), dump, tm);
2762
2763 if (r) {
2764 return r;
2765 }
2766 r = si_shader_binary_read(sscreen, shader);
2767
2768 FREE(shader->binary.config);
2769 FREE(shader->binary.rodata);
2770 FREE(shader->binary.global_symbol_offsets);
2771 if (shader->scratch_bytes_per_wave == 0) {
2772 FREE(shader->binary.code);
2773 FREE(shader->binary.relocs);
2774 memset(&shader->binary, 0, sizeof(shader->binary));
2775 }
2776 return r;
2777 }
2778
2779 /* Generate code for the hardware VS shader stage to go with a geometry shader */
2780 static int si_generate_gs_copy_shader(struct si_screen *sscreen,
2781 struct si_shader_context *si_shader_ctx,
2782 struct si_shader *gs, bool dump)
2783 {
2784 struct gallivm_state *gallivm = &si_shader_ctx->radeon_bld.gallivm;
2785 struct lp_build_tgsi_context *bld_base = &si_shader_ctx->radeon_bld.soa.bld_base;
2786 struct lp_build_context *base = &bld_base->base;
2787 struct lp_build_context *uint = &bld_base->uint_bld;
2788 struct si_shader *shader = si_shader_ctx->shader;
2789 struct si_shader_output_values *outputs;
2790 struct tgsi_shader_info *gsinfo = &gs->selector->info;
2791 LLVMValueRef args[9];
2792 int i, r;
2793
2794 outputs = MALLOC(gsinfo->num_outputs * sizeof(outputs[0]));
2795
2796 si_shader_ctx->type = TGSI_PROCESSOR_VERTEX;
2797 shader->is_gs_copy_shader = true;
2798
2799 radeon_llvm_context_init(&si_shader_ctx->radeon_bld);
2800
2801 create_meta_data(si_shader_ctx);
2802 create_function(si_shader_ctx);
2803 preload_streamout_buffers(si_shader_ctx);
2804 preload_ring_buffers(si_shader_ctx);
2805
2806 args[0] = si_shader_ctx->gsvs_ring;
2807 args[1] = lp_build_mul_imm(uint,
2808 LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
2809 si_shader_ctx->param_vertex_id),
2810 4);
2811 args[3] = uint->zero;
2812 args[4] = uint->one; /* OFFEN */
2813 args[5] = uint->zero; /* IDXEN */
2814 args[6] = uint->one; /* GLC */
2815 args[7] = uint->one; /* SLC */
2816 args[8] = uint->zero; /* TFE */
2817
2818 /* Fetch vertex data from GSVS ring */
2819 for (i = 0; i < gsinfo->num_outputs; ++i) {
2820 unsigned chan;
2821
2822 outputs[i].name = gsinfo->output_semantic_name[i];
2823 outputs[i].sid = gsinfo->output_semantic_index[i];
2824
2825 for (chan = 0; chan < 4; chan++) {
2826 args[2] = lp_build_const_int32(gallivm,
2827 (i * 4 + chan) *
2828 gs->selector->gs_max_out_vertices * 16 * 4);
2829
2830 outputs[i].values[chan] =
2831 LLVMBuildBitCast(gallivm->builder,
2832 build_intrinsic(gallivm->builder,
2833 "llvm.SI.buffer.load.dword.i32.i32",
2834 LLVMInt32TypeInContext(gallivm->context),
2835 args, 9,
2836 LLVMReadOnlyAttribute | LLVMNoUnwindAttribute),
2837 base->elem_type, "");
2838 }
2839 }
2840
2841 si_llvm_export_vs(bld_base, outputs, gsinfo->num_outputs);
2842
2843 radeon_llvm_finalize_module(&si_shader_ctx->radeon_bld);
2844
2845 if (dump)
2846 fprintf(stderr, "Copy Vertex Shader for Geometry Shader:\n\n");
2847
2848 r = si_compile_llvm(sscreen, si_shader_ctx->shader,
2849 si_shader_ctx->tm, bld_base->base.gallivm->module);
2850
2851 radeon_llvm_dispose(&si_shader_ctx->radeon_bld);
2852
2853 FREE(outputs);
2854 return r;
2855 }
2856
2857 static void si_dump_key(unsigned shader, union si_shader_key *key)
2858 {
2859 int i;
2860
2861 fprintf(stderr, "SHADER KEY\n");
2862
2863 switch (shader) {
2864 case PIPE_SHADER_VERTEX:
2865 fprintf(stderr, " instance_divisors = {");
2866 for (i = 0; i < Elements(key->vs.instance_divisors); i++)
2867 fprintf(stderr, !i ? "%u" : ", %u",
2868 key->vs.instance_divisors[i]);
2869 fprintf(stderr, "}\n");
2870
2871 if (key->vs.as_es)
2872 fprintf(stderr, " gs_used_inputs = 0x%"PRIx64"\n",
2873 key->vs.gs_used_inputs);
2874 fprintf(stderr, " as_es = %u\n", key->vs.as_es);
2875 break;
2876
2877 case PIPE_SHADER_GEOMETRY:
2878 break;
2879
2880 case PIPE_SHADER_FRAGMENT:
2881 fprintf(stderr, " export_16bpc = 0x%X\n", key->ps.export_16bpc);
2882 fprintf(stderr, " last_cbuf = %u\n", key->ps.last_cbuf);
2883 fprintf(stderr, " color_two_side = %u\n", key->ps.color_two_side);
2884 fprintf(stderr, " alpha_func = %u\n", key->ps.alpha_func);
2885 fprintf(stderr, " alpha_to_one = %u\n", key->ps.alpha_to_one);
2886 fprintf(stderr, " poly_stipple = %u\n", key->ps.poly_stipple);
2887 break;
2888
2889 default:
2890 assert(0);
2891 }
2892 }
2893
2894 int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
2895 struct si_shader *shader)
2896 {
2897 struct si_shader_selector *sel = shader->selector;
2898 struct tgsi_token *tokens = sel->tokens;
2899 struct si_shader_context si_shader_ctx;
2900 struct lp_build_tgsi_context * bld_base;
2901 struct tgsi_shader_info stipple_shader_info;
2902 LLVMModuleRef mod;
2903 int r = 0;
2904 bool poly_stipple = sel->type == PIPE_SHADER_FRAGMENT &&
2905 shader->key.ps.poly_stipple;
2906 bool dump = r600_can_dump_shader(&sscreen->b, sel->tokens);
2907
2908 if (poly_stipple) {
2909 tokens = util_pstipple_create_fragment_shader(tokens, NULL,
2910 SI_POLY_STIPPLE_SAMPLER);
2911 tgsi_scan_shader(tokens, &stipple_shader_info);
2912 }
2913
2914 /* Dump TGSI code before doing TGSI->LLVM conversion in case the
2915 * conversion fails. */
2916 if (dump) {
2917 si_dump_key(sel->type, &shader->key);
2918 tgsi_dump(tokens, 0);
2919 si_dump_streamout(&sel->so);
2920 }
2921
2922 assert(shader->nparam == 0);
2923
2924 memset(&si_shader_ctx, 0, sizeof(si_shader_ctx));
2925 radeon_llvm_context_init(&si_shader_ctx.radeon_bld);
2926 bld_base = &si_shader_ctx.radeon_bld.soa.bld_base;
2927
2928 if (sel->type != PIPE_SHADER_COMPUTE)
2929 shader->dx10_clamp_mode = true;
2930
2931 if (sel->info.uses_kill)
2932 shader->db_shader_control |= S_02880C_KILL_ENABLE(1);
2933
2934 shader->uses_instanceid = sel->info.uses_instanceid;
2935 bld_base->info = poly_stipple ? &stipple_shader_info : &sel->info;
2936 bld_base->emit_fetch_funcs[TGSI_FILE_CONSTANT] = fetch_constant;
2937
2938 bld_base->op_actions[TGSI_OPCODE_TEX] = tex_action;
2939 bld_base->op_actions[TGSI_OPCODE_TEX2] = tex_action;
2940 bld_base->op_actions[TGSI_OPCODE_TXB] = tex_action;
2941 bld_base->op_actions[TGSI_OPCODE_TXB2] = tex_action;
2942 bld_base->op_actions[TGSI_OPCODE_TXD] = tex_action;
2943 bld_base->op_actions[TGSI_OPCODE_TXF] = tex_action;
2944 bld_base->op_actions[TGSI_OPCODE_TXL] = tex_action;
2945 bld_base->op_actions[TGSI_OPCODE_TXL2] = tex_action;
2946 bld_base->op_actions[TGSI_OPCODE_TXP] = tex_action;
2947 bld_base->op_actions[TGSI_OPCODE_TXQ] = txq_action;
2948 bld_base->op_actions[TGSI_OPCODE_TG4] = tex_action;
2949 bld_base->op_actions[TGSI_OPCODE_LODQ] = tex_action;
2950
2951 bld_base->op_actions[TGSI_OPCODE_DDX].emit = si_llvm_emit_ddxy;
2952 bld_base->op_actions[TGSI_OPCODE_DDY].emit = si_llvm_emit_ddxy;
2953
2954 bld_base->op_actions[TGSI_OPCODE_EMIT].emit = si_llvm_emit_vertex;
2955 bld_base->op_actions[TGSI_OPCODE_ENDPRIM].emit = si_llvm_emit_primitive;
2956
2957 if (HAVE_LLVM >= 0x0306) {
2958 bld_base->op_actions[TGSI_OPCODE_MAX].emit = build_tgsi_intrinsic_nomem;
2959 bld_base->op_actions[TGSI_OPCODE_MAX].intr_name = "llvm.maxnum.f32";
2960 bld_base->op_actions[TGSI_OPCODE_MIN].emit = build_tgsi_intrinsic_nomem;
2961 bld_base->op_actions[TGSI_OPCODE_MIN].intr_name = "llvm.minnum.f32";
2962 }
2963
2964 si_shader_ctx.radeon_bld.load_system_value = declare_system_value;
2965 si_shader_ctx.shader = shader;
2966 si_shader_ctx.type = tgsi_get_processor_type(tokens);
2967 si_shader_ctx.screen = sscreen;
2968 si_shader_ctx.tm = tm;
2969
2970 switch (si_shader_ctx.type) {
2971 case TGSI_PROCESSOR_VERTEX:
2972 si_shader_ctx.radeon_bld.load_input = declare_input_vs;
2973 if (shader->key.vs.as_es) {
2974 bld_base->emit_epilogue = si_llvm_emit_es_epilogue;
2975 } else {
2976 bld_base->emit_epilogue = si_llvm_emit_vs_epilogue;
2977 }
2978 break;
2979 case TGSI_PROCESSOR_GEOMETRY:
2980 bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = fetch_input_gs;
2981 bld_base->emit_epilogue = si_llvm_emit_gs_epilogue;
2982 break;
2983 case TGSI_PROCESSOR_FRAGMENT:
2984 si_shader_ctx.radeon_bld.load_input = declare_input_fs;
2985 bld_base->emit_epilogue = si_llvm_emit_fs_epilogue;
2986
2987 switch (sel->info.properties[TGSI_PROPERTY_FS_DEPTH_LAYOUT]) {
2988 case TGSI_FS_DEPTH_LAYOUT_GREATER:
2989 shader->db_shader_control |=
2990 S_02880C_CONSERVATIVE_Z_EXPORT(V_02880C_EXPORT_GREATER_THAN_Z);
2991 break;
2992 case TGSI_FS_DEPTH_LAYOUT_LESS:
2993 shader->db_shader_control |=
2994 S_02880C_CONSERVATIVE_Z_EXPORT(V_02880C_EXPORT_LESS_THAN_Z);
2995 break;
2996 }
2997 break;
2998 default:
2999 assert(!"Unsupported shader type");
3000 return -1;
3001 }
3002
3003 create_meta_data(&si_shader_ctx);
3004 create_function(&si_shader_ctx);
3005 preload_constants(&si_shader_ctx);
3006 preload_samplers(&si_shader_ctx);
3007 preload_streamout_buffers(&si_shader_ctx);
3008 preload_ring_buffers(&si_shader_ctx);
3009
3010 if (si_shader_ctx.type == TGSI_PROCESSOR_GEOMETRY) {
3011 si_shader_ctx.gs_next_vertex =
3012 lp_build_alloca(bld_base->base.gallivm,
3013 bld_base->uint_bld.elem_type, "");
3014 }
3015
3016 if (!lp_build_tgsi_llvm(bld_base, tokens)) {
3017 fprintf(stderr, "Failed to translate shader from TGSI to LLVM\n");
3018 goto out;
3019 }
3020
3021 radeon_llvm_finalize_module(&si_shader_ctx.radeon_bld);
3022
3023 mod = bld_base->base.gallivm->module;
3024 r = si_compile_llvm(sscreen, shader, tm, mod);
3025 if (r) {
3026 fprintf(stderr, "LLVM failed to compile shader\n");
3027 goto out;
3028 }
3029
3030 radeon_llvm_dispose(&si_shader_ctx.radeon_bld);
3031
3032 if (si_shader_ctx.type == TGSI_PROCESSOR_GEOMETRY) {
3033 shader->gs_copy_shader = CALLOC_STRUCT(si_shader);
3034 shader->gs_copy_shader->selector = shader->selector;
3035 shader->gs_copy_shader->key = shader->key;
3036 si_shader_ctx.shader = shader->gs_copy_shader;
3037 if ((r = si_generate_gs_copy_shader(sscreen, &si_shader_ctx,
3038 shader, dump))) {
3039 free(shader->gs_copy_shader);
3040 shader->gs_copy_shader = NULL;
3041 goto out;
3042 }
3043 }
3044
3045 out:
3046 for (int i = 0; i < SI_NUM_CONST_BUFFERS; i++)
3047 FREE(si_shader_ctx.constants[i]);
3048 if (poly_stipple)
3049 tgsi_free_tokens(tokens);
3050 return r;
3051 }
3052
3053 void si_shader_destroy(struct pipe_context *ctx, struct si_shader *shader)
3054 {
3055 if (shader->gs_copy_shader)
3056 si_shader_destroy(ctx, shader->gs_copy_shader);
3057
3058 if (shader->scratch_bo)
3059 r600_resource_reference(&shader->scratch_bo, NULL);
3060
3061 r600_resource_reference(&shader->bo, NULL);
3062
3063 FREE(shader->binary.code);
3064 FREE(shader->binary.relocs);
3065 }