radeonsi: move MRTZ export into a separate function
[mesa.git] / src / gallium / drivers / radeonsi / si_shader.c
1 /*
2 * Copyright 2012 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Tom Stellard <thomas.stellard@amd.com>
25 * Michel Dänzer <michel.daenzer@amd.com>
26 * Christian König <christian.koenig@amd.com>
27 */
28
29 #include "gallivm/lp_bld_const.h"
30 #include "gallivm/lp_bld_gather.h"
31 #include "gallivm/lp_bld_intr.h"
32 #include "gallivm/lp_bld_logic.h"
33 #include "gallivm/lp_bld_arit.h"
34 #include "gallivm/lp_bld_bitarit.h"
35 #include "gallivm/lp_bld_flow.h"
36 #include "radeon/r600_cs.h"
37 #include "radeon/radeon_llvm.h"
38 #include "radeon/radeon_elf_util.h"
39 #include "radeon/radeon_llvm_emit.h"
40 #include "util/u_memory.h"
41 #include "util/u_pstipple.h"
42 #include "tgsi/tgsi_parse.h"
43 #include "tgsi/tgsi_util.h"
44 #include "tgsi/tgsi_dump.h"
45
46 #include "si_pipe.h"
47 #include "si_shader.h"
48 #include "sid.h"
49
50 #include <errno.h>
51
52 static const char *scratch_rsrc_dword0_symbol =
53 "SCRATCH_RSRC_DWORD0";
54
55 static const char *scratch_rsrc_dword1_symbol =
56 "SCRATCH_RSRC_DWORD1";
57
58 struct si_shader_output_values
59 {
60 LLVMValueRef values[4];
61 unsigned name;
62 unsigned sid;
63 };
64
65 struct si_shader_context
66 {
67 struct radeon_llvm_context radeon_bld;
68 struct si_shader *shader;
69 struct si_screen *screen;
70 unsigned type; /* TGSI_PROCESSOR_* specifies the type of shader. */
71 int param_streamout_config;
72 int param_streamout_write_index;
73 int param_streamout_offset[4];
74 int param_vertex_id;
75 int param_rel_auto_id;
76 int param_vs_prim_id;
77 int param_instance_id;
78 int param_tes_u;
79 int param_tes_v;
80 int param_tes_rel_patch_id;
81 int param_tes_patch_id;
82 int param_es2gs_offset;
83 LLVMTargetMachineRef tm;
84 LLVMValueRef const_md;
85 LLVMValueRef const_buffers[SI_NUM_CONST_BUFFERS];
86 LLVMValueRef lds;
87 LLVMValueRef *constants[SI_NUM_CONST_BUFFERS];
88 LLVMValueRef sampler_views[SI_NUM_SAMPLER_VIEWS];
89 LLVMValueRef sampler_states[SI_NUM_SAMPLER_STATES];
90 LLVMValueRef so_buffers[4];
91 LLVMValueRef esgs_ring;
92 LLVMValueRef gsvs_ring[4];
93 LLVMValueRef gs_next_vertex[4];
94 };
95
96 static struct si_shader_context * si_shader_context(
97 struct lp_build_tgsi_context * bld_base)
98 {
99 return (struct si_shader_context *)bld_base;
100 }
101
102
103 #define PERSPECTIVE_BASE 0
104 #define LINEAR_BASE 9
105
106 #define SAMPLE_OFFSET 0
107 #define CENTER_OFFSET 2
108 #define CENTROID_OFSET 4
109
110 #define USE_SGPR_MAX_SUFFIX_LEN 5
111 #define CONST_ADDR_SPACE 2
112 #define LOCAL_ADDR_SPACE 3
113 #define USER_SGPR_ADDR_SPACE 8
114
115
116 #define SENDMSG_GS 2
117 #define SENDMSG_GS_DONE 3
118
119 #define SENDMSG_GS_OP_NOP (0 << 4)
120 #define SENDMSG_GS_OP_CUT (1 << 4)
121 #define SENDMSG_GS_OP_EMIT (2 << 4)
122 #define SENDMSG_GS_OP_EMIT_CUT (3 << 4)
123
124 /**
125 * Returns a unique index for a semantic name and index. The index must be
126 * less than 64, so that a 64-bit bitmask of used inputs or outputs can be
127 * calculated.
128 */
129 unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index)
130 {
131 switch (semantic_name) {
132 case TGSI_SEMANTIC_POSITION:
133 return 0;
134 case TGSI_SEMANTIC_PSIZE:
135 return 1;
136 case TGSI_SEMANTIC_CLIPDIST:
137 assert(index <= 1);
138 return 2 + index;
139 case TGSI_SEMANTIC_GENERIC:
140 if (index <= 63-4)
141 return 4 + index;
142 else
143 /* same explanation as in the default statement,
144 * the only user hitting this is st/nine.
145 */
146 return 0;
147
148 /* patch indices are completely separate and thus start from 0 */
149 case TGSI_SEMANTIC_TESSOUTER:
150 return 0;
151 case TGSI_SEMANTIC_TESSINNER:
152 return 1;
153 case TGSI_SEMANTIC_PATCH:
154 return 2 + index;
155
156 default:
157 /* Don't fail here. The result of this function is only used
158 * for LS, TCS, TES, and GS, where legacy GL semantics can't
159 * occur, but this function is called for all vertex shaders
160 * before it's known whether LS will be compiled or not.
161 */
162 return 0;
163 }
164 }
165
166 /**
167 * Get the value of a shader input parameter and extract a bitfield.
168 */
169 static LLVMValueRef unpack_param(struct si_shader_context *si_shader_ctx,
170 unsigned param, unsigned rshift,
171 unsigned bitwidth)
172 {
173 struct gallivm_state *gallivm = &si_shader_ctx->radeon_bld.gallivm;
174 LLVMValueRef value = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
175 param);
176
177 if (rshift)
178 value = LLVMBuildLShr(gallivm->builder, value,
179 lp_build_const_int32(gallivm, rshift), "");
180
181 if (rshift + bitwidth < 32) {
182 unsigned mask = (1 << bitwidth) - 1;
183 value = LLVMBuildAnd(gallivm->builder, value,
184 lp_build_const_int32(gallivm, mask), "");
185 }
186
187 return value;
188 }
189
190 static LLVMValueRef get_rel_patch_id(struct si_shader_context *si_shader_ctx)
191 {
192 switch (si_shader_ctx->type) {
193 case TGSI_PROCESSOR_TESS_CTRL:
194 return unpack_param(si_shader_ctx, SI_PARAM_REL_IDS, 0, 8);
195
196 case TGSI_PROCESSOR_TESS_EVAL:
197 return LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
198 si_shader_ctx->param_tes_rel_patch_id);
199
200 default:
201 assert(0);
202 return NULL;
203 }
204 }
205
206 /* Tessellation shaders pass outputs to the next shader using LDS.
207 *
208 * LS outputs = TCS inputs
209 * TCS outputs = TES inputs
210 *
211 * The LDS layout is:
212 * - TCS inputs for patch 0
213 * - TCS inputs for patch 1
214 * - TCS inputs for patch 2 = get_tcs_in_current_patch_offset (if RelPatchID==2)
215 * - ...
216 * - TCS outputs for patch 0 = get_tcs_out_patch0_offset
217 * - Per-patch TCS outputs for patch 0 = get_tcs_out_patch0_patch_data_offset
218 * - TCS outputs for patch 1
219 * - Per-patch TCS outputs for patch 1
220 * - TCS outputs for patch 2 = get_tcs_out_current_patch_offset (if RelPatchID==2)
221 * - Per-patch TCS outputs for patch 2 = get_tcs_out_current_patch_data_offset (if RelPatchID==2)
222 * - ...
223 *
224 * All three shaders VS(LS), TCS, TES share the same LDS space.
225 */
226
227 static LLVMValueRef
228 get_tcs_in_patch_stride(struct si_shader_context *si_shader_ctx)
229 {
230 if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX)
231 return unpack_param(si_shader_ctx, SI_PARAM_LS_OUT_LAYOUT, 0, 13);
232 else if (si_shader_ctx->type == TGSI_PROCESSOR_TESS_CTRL)
233 return unpack_param(si_shader_ctx, SI_PARAM_TCS_IN_LAYOUT, 0, 13);
234 else {
235 assert(0);
236 return NULL;
237 }
238 }
239
240 static LLVMValueRef
241 get_tcs_out_patch_stride(struct si_shader_context *si_shader_ctx)
242 {
243 return unpack_param(si_shader_ctx, SI_PARAM_TCS_OUT_LAYOUT, 0, 13);
244 }
245
246 static LLVMValueRef
247 get_tcs_out_patch0_offset(struct si_shader_context *si_shader_ctx)
248 {
249 return lp_build_mul_imm(&si_shader_ctx->radeon_bld.soa.bld_base.uint_bld,
250 unpack_param(si_shader_ctx,
251 SI_PARAM_TCS_OUT_OFFSETS,
252 0, 16),
253 4);
254 }
255
256 static LLVMValueRef
257 get_tcs_out_patch0_patch_data_offset(struct si_shader_context *si_shader_ctx)
258 {
259 return lp_build_mul_imm(&si_shader_ctx->radeon_bld.soa.bld_base.uint_bld,
260 unpack_param(si_shader_ctx,
261 SI_PARAM_TCS_OUT_OFFSETS,
262 16, 16),
263 4);
264 }
265
266 static LLVMValueRef
267 get_tcs_in_current_patch_offset(struct si_shader_context *si_shader_ctx)
268 {
269 struct gallivm_state *gallivm = &si_shader_ctx->radeon_bld.gallivm;
270 LLVMValueRef patch_stride = get_tcs_in_patch_stride(si_shader_ctx);
271 LLVMValueRef rel_patch_id = get_rel_patch_id(si_shader_ctx);
272
273 return LLVMBuildMul(gallivm->builder, patch_stride, rel_patch_id, "");
274 }
275
276 static LLVMValueRef
277 get_tcs_out_current_patch_offset(struct si_shader_context *si_shader_ctx)
278 {
279 struct gallivm_state *gallivm = &si_shader_ctx->radeon_bld.gallivm;
280 LLVMValueRef patch0_offset = get_tcs_out_patch0_offset(si_shader_ctx);
281 LLVMValueRef patch_stride = get_tcs_out_patch_stride(si_shader_ctx);
282 LLVMValueRef rel_patch_id = get_rel_patch_id(si_shader_ctx);
283
284 return LLVMBuildAdd(gallivm->builder, patch0_offset,
285 LLVMBuildMul(gallivm->builder, patch_stride,
286 rel_patch_id, ""),
287 "");
288 }
289
290 static LLVMValueRef
291 get_tcs_out_current_patch_data_offset(struct si_shader_context *si_shader_ctx)
292 {
293 struct gallivm_state *gallivm = &si_shader_ctx->radeon_bld.gallivm;
294 LLVMValueRef patch0_patch_data_offset =
295 get_tcs_out_patch0_patch_data_offset(si_shader_ctx);
296 LLVMValueRef patch_stride = get_tcs_out_patch_stride(si_shader_ctx);
297 LLVMValueRef rel_patch_id = get_rel_patch_id(si_shader_ctx);
298
299 return LLVMBuildAdd(gallivm->builder, patch0_patch_data_offset,
300 LLVMBuildMul(gallivm->builder, patch_stride,
301 rel_patch_id, ""),
302 "");
303 }
304
305 static void build_indexed_store(struct si_shader_context *si_shader_ctx,
306 LLVMValueRef base_ptr, LLVMValueRef index,
307 LLVMValueRef value)
308 {
309 struct lp_build_tgsi_context *bld_base = &si_shader_ctx->radeon_bld.soa.bld_base;
310 struct gallivm_state *gallivm = bld_base->base.gallivm;
311 LLVMValueRef indices[2], pointer;
312
313 indices[0] = bld_base->uint_bld.zero;
314 indices[1] = index;
315
316 pointer = LLVMBuildGEP(gallivm->builder, base_ptr, indices, 2, "");
317 LLVMBuildStore(gallivm->builder, value, pointer);
318 }
319
320 /**
321 * Build an LLVM bytecode indexed load using LLVMBuildGEP + LLVMBuildLoad.
322 * It's equivalent to doing a load from &base_ptr[index].
323 *
324 * \param base_ptr Where the array starts.
325 * \param index The element index into the array.
326 */
327 static LLVMValueRef build_indexed_load(struct si_shader_context *si_shader_ctx,
328 LLVMValueRef base_ptr, LLVMValueRef index)
329 {
330 struct lp_build_tgsi_context *bld_base = &si_shader_ctx->radeon_bld.soa.bld_base;
331 struct gallivm_state *gallivm = bld_base->base.gallivm;
332 LLVMValueRef indices[2], pointer;
333
334 indices[0] = bld_base->uint_bld.zero;
335 indices[1] = index;
336
337 pointer = LLVMBuildGEP(gallivm->builder, base_ptr, indices, 2, "");
338 return LLVMBuildLoad(gallivm->builder, pointer, "");
339 }
340
341 /**
342 * Do a load from &base_ptr[index], but also add a flag that it's loading
343 * a constant.
344 */
345 static LLVMValueRef build_indexed_load_const(
346 struct si_shader_context * si_shader_ctx,
347 LLVMValueRef base_ptr, LLVMValueRef index)
348 {
349 LLVMValueRef result = build_indexed_load(si_shader_ctx, base_ptr, index);
350 LLVMSetMetadata(result, 1, si_shader_ctx->const_md);
351 return result;
352 }
353
354 static LLVMValueRef get_instance_index_for_fetch(
355 struct radeon_llvm_context * radeon_bld,
356 unsigned divisor)
357 {
358 struct si_shader_context *si_shader_ctx =
359 si_shader_context(&radeon_bld->soa.bld_base);
360 struct gallivm_state * gallivm = radeon_bld->soa.bld_base.base.gallivm;
361
362 LLVMValueRef result = LLVMGetParam(radeon_bld->main_fn,
363 si_shader_ctx->param_instance_id);
364
365 /* The division must be done before START_INSTANCE is added. */
366 if (divisor > 1)
367 result = LLVMBuildUDiv(gallivm->builder, result,
368 lp_build_const_int32(gallivm, divisor), "");
369
370 return LLVMBuildAdd(gallivm->builder, result, LLVMGetParam(
371 radeon_bld->main_fn, SI_PARAM_START_INSTANCE), "");
372 }
373
374 static void declare_input_vs(
375 struct radeon_llvm_context *radeon_bld,
376 unsigned input_index,
377 const struct tgsi_full_declaration *decl)
378 {
379 struct lp_build_context *base = &radeon_bld->soa.bld_base.base;
380 struct gallivm_state *gallivm = base->gallivm;
381 struct si_shader_context *si_shader_ctx =
382 si_shader_context(&radeon_bld->soa.bld_base);
383 unsigned divisor = si_shader_ctx->shader->key.vs.instance_divisors[input_index];
384
385 unsigned chan;
386
387 LLVMValueRef t_list_ptr;
388 LLVMValueRef t_offset;
389 LLVMValueRef t_list;
390 LLVMValueRef attribute_offset;
391 LLVMValueRef buffer_index;
392 LLVMValueRef args[3];
393 LLVMTypeRef vec4_type;
394 LLVMValueRef input;
395
396 /* Load the T list */
397 t_list_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_VERTEX_BUFFERS);
398
399 t_offset = lp_build_const_int32(gallivm, input_index);
400
401 t_list = build_indexed_load_const(si_shader_ctx, t_list_ptr, t_offset);
402
403 /* Build the attribute offset */
404 attribute_offset = lp_build_const_int32(gallivm, 0);
405
406 if (divisor) {
407 /* Build index from instance ID, start instance and divisor */
408 si_shader_ctx->shader->uses_instanceid = true;
409 buffer_index = get_instance_index_for_fetch(&si_shader_ctx->radeon_bld, divisor);
410 } else {
411 /* Load the buffer index for vertices. */
412 LLVMValueRef vertex_id = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
413 si_shader_ctx->param_vertex_id);
414 LLVMValueRef base_vertex = LLVMGetParam(radeon_bld->main_fn,
415 SI_PARAM_BASE_VERTEX);
416 buffer_index = LLVMBuildAdd(gallivm->builder, base_vertex, vertex_id, "");
417 }
418
419 vec4_type = LLVMVectorType(base->elem_type, 4);
420 args[0] = t_list;
421 args[1] = attribute_offset;
422 args[2] = buffer_index;
423 input = lp_build_intrinsic(gallivm->builder,
424 "llvm.SI.vs.load.input", vec4_type, args, 3,
425 LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
426
427 /* Break up the vec4 into individual components */
428 for (chan = 0; chan < 4; chan++) {
429 LLVMValueRef llvm_chan = lp_build_const_int32(gallivm, chan);
430 /* XXX: Use a helper function for this. There is one in
431 * tgsi_llvm.c. */
432 si_shader_ctx->radeon_bld.inputs[radeon_llvm_reg_index_soa(input_index, chan)] =
433 LLVMBuildExtractElement(gallivm->builder,
434 input, llvm_chan, "");
435 }
436 }
437
438 static LLVMValueRef get_primitive_id(struct lp_build_tgsi_context *bld_base,
439 unsigned swizzle)
440 {
441 struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
442
443 if (swizzle > 0)
444 return bld_base->uint_bld.zero;
445
446 switch (si_shader_ctx->type) {
447 case TGSI_PROCESSOR_VERTEX:
448 return LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
449 si_shader_ctx->param_vs_prim_id);
450 case TGSI_PROCESSOR_TESS_CTRL:
451 return LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
452 SI_PARAM_PATCH_ID);
453 case TGSI_PROCESSOR_TESS_EVAL:
454 return LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
455 si_shader_ctx->param_tes_patch_id);
456 case TGSI_PROCESSOR_GEOMETRY:
457 return LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
458 SI_PARAM_PRIMITIVE_ID);
459 default:
460 assert(0);
461 return bld_base->uint_bld.zero;
462 }
463 }
464
465 /**
466 * Return the value of tgsi_ind_register for indexing.
467 * This is the indirect index with the constant offset added to it.
468 */
469 static LLVMValueRef get_indirect_index(struct si_shader_context *si_shader_ctx,
470 const struct tgsi_ind_register *ind,
471 int rel_index)
472 {
473 struct gallivm_state *gallivm = si_shader_ctx->radeon_bld.soa.bld_base.base.gallivm;
474 LLVMValueRef result;
475
476 result = si_shader_ctx->radeon_bld.soa.addr[ind->Index][ind->Swizzle];
477 result = LLVMBuildLoad(gallivm->builder, result, "");
478 result = LLVMBuildAdd(gallivm->builder, result,
479 lp_build_const_int32(gallivm, rel_index), "");
480 return result;
481 }
482
483 /**
484 * Calculate a dword address given an input or output register and a stride.
485 */
486 static LLVMValueRef get_dw_address(struct si_shader_context *si_shader_ctx,
487 const struct tgsi_full_dst_register *dst,
488 const struct tgsi_full_src_register *src,
489 LLVMValueRef vertex_dw_stride,
490 LLVMValueRef base_addr)
491 {
492 struct gallivm_state *gallivm = si_shader_ctx->radeon_bld.soa.bld_base.base.gallivm;
493 struct tgsi_shader_info *info = &si_shader_ctx->shader->selector->info;
494 ubyte *name, *index, *array_first;
495 int first, param;
496 struct tgsi_full_dst_register reg;
497
498 /* Set the register description. The address computation is the same
499 * for sources and destinations. */
500 if (src) {
501 reg.Register.File = src->Register.File;
502 reg.Register.Index = src->Register.Index;
503 reg.Register.Indirect = src->Register.Indirect;
504 reg.Register.Dimension = src->Register.Dimension;
505 reg.Indirect = src->Indirect;
506 reg.Dimension = src->Dimension;
507 reg.DimIndirect = src->DimIndirect;
508 } else
509 reg = *dst;
510
511 /* If the register is 2-dimensional (e.g. an array of vertices
512 * in a primitive), calculate the base address of the vertex. */
513 if (reg.Register.Dimension) {
514 LLVMValueRef index;
515
516 if (reg.Dimension.Indirect)
517 index = get_indirect_index(si_shader_ctx, &reg.DimIndirect,
518 reg.Dimension.Index);
519 else
520 index = lp_build_const_int32(gallivm, reg.Dimension.Index);
521
522 base_addr = LLVMBuildAdd(gallivm->builder, base_addr,
523 LLVMBuildMul(gallivm->builder, index,
524 vertex_dw_stride, ""), "");
525 }
526
527 /* Get information about the register. */
528 if (reg.Register.File == TGSI_FILE_INPUT) {
529 name = info->input_semantic_name;
530 index = info->input_semantic_index;
531 array_first = info->input_array_first;
532 } else if (reg.Register.File == TGSI_FILE_OUTPUT) {
533 name = info->output_semantic_name;
534 index = info->output_semantic_index;
535 array_first = info->output_array_first;
536 } else {
537 assert(0);
538 return NULL;
539 }
540
541 if (reg.Register.Indirect) {
542 /* Add the relative address of the element. */
543 LLVMValueRef ind_index;
544
545 if (reg.Indirect.ArrayID)
546 first = array_first[reg.Indirect.ArrayID];
547 else
548 first = reg.Register.Index;
549
550 ind_index = get_indirect_index(si_shader_ctx, &reg.Indirect,
551 reg.Register.Index - first);
552
553 base_addr = LLVMBuildAdd(gallivm->builder, base_addr,
554 LLVMBuildMul(gallivm->builder, ind_index,
555 lp_build_const_int32(gallivm, 4), ""), "");
556
557 param = si_shader_io_get_unique_index(name[first], index[first]);
558 } else {
559 param = si_shader_io_get_unique_index(name[reg.Register.Index],
560 index[reg.Register.Index]);
561 }
562
563 /* Add the base address of the element. */
564 return LLVMBuildAdd(gallivm->builder, base_addr,
565 lp_build_const_int32(gallivm, param * 4), "");
566 }
567
568 /**
569 * Load from LDS.
570 *
571 * \param type output value type
572 * \param swizzle offset (typically 0..3); it can be ~0, which loads a vec4
573 * \param dw_addr address in dwords
574 */
575 static LLVMValueRef lds_load(struct lp_build_tgsi_context *bld_base,
576 enum tgsi_opcode_type type, unsigned swizzle,
577 LLVMValueRef dw_addr)
578 {
579 struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
580 struct gallivm_state *gallivm = bld_base->base.gallivm;
581 LLVMValueRef value;
582
583 if (swizzle == ~0) {
584 LLVMValueRef values[TGSI_NUM_CHANNELS];
585
586 for (unsigned chan = 0; chan < TGSI_NUM_CHANNELS; chan++)
587 values[chan] = lds_load(bld_base, type, chan, dw_addr);
588
589 return lp_build_gather_values(bld_base->base.gallivm, values,
590 TGSI_NUM_CHANNELS);
591 }
592
593 dw_addr = lp_build_add(&bld_base->uint_bld, dw_addr,
594 lp_build_const_int32(gallivm, swizzle));
595
596 value = build_indexed_load(si_shader_ctx, si_shader_ctx->lds, dw_addr);
597 if (type == TGSI_TYPE_DOUBLE) {
598 LLVMValueRef value2;
599 dw_addr = lp_build_add(&bld_base->uint_bld, dw_addr,
600 lp_build_const_int32(gallivm, swizzle + 1));
601 value2 = build_indexed_load(si_shader_ctx, si_shader_ctx->lds, dw_addr);
602 return radeon_llvm_emit_fetch_double(bld_base, value, value2);
603 }
604
605 return LLVMBuildBitCast(gallivm->builder, value,
606 tgsi2llvmtype(bld_base, type), "");
607 }
608
609 /**
610 * Store to LDS.
611 *
612 * \param swizzle offset (typically 0..3)
613 * \param dw_addr address in dwords
614 * \param value value to store
615 */
616 static void lds_store(struct lp_build_tgsi_context * bld_base,
617 unsigned swizzle, LLVMValueRef dw_addr,
618 LLVMValueRef value)
619 {
620 struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
621 struct gallivm_state *gallivm = bld_base->base.gallivm;
622
623 dw_addr = lp_build_add(&bld_base->uint_bld, dw_addr,
624 lp_build_const_int32(gallivm, swizzle));
625
626 value = LLVMBuildBitCast(gallivm->builder, value,
627 LLVMInt32TypeInContext(gallivm->context), "");
628 build_indexed_store(si_shader_ctx, si_shader_ctx->lds,
629 dw_addr, value);
630 }
631
632 static LLVMValueRef fetch_input_tcs(
633 struct lp_build_tgsi_context *bld_base,
634 const struct tgsi_full_src_register *reg,
635 enum tgsi_opcode_type type, unsigned swizzle)
636 {
637 struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
638 LLVMValueRef dw_addr, stride;
639
640 stride = unpack_param(si_shader_ctx, SI_PARAM_TCS_IN_LAYOUT, 13, 8);
641 dw_addr = get_tcs_in_current_patch_offset(si_shader_ctx);
642 dw_addr = get_dw_address(si_shader_ctx, NULL, reg, stride, dw_addr);
643
644 return lds_load(bld_base, type, swizzle, dw_addr);
645 }
646
647 static LLVMValueRef fetch_output_tcs(
648 struct lp_build_tgsi_context *bld_base,
649 const struct tgsi_full_src_register *reg,
650 enum tgsi_opcode_type type, unsigned swizzle)
651 {
652 struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
653 LLVMValueRef dw_addr, stride;
654
655 if (reg->Register.Dimension) {
656 stride = unpack_param(si_shader_ctx, SI_PARAM_TCS_OUT_LAYOUT, 13, 8);
657 dw_addr = get_tcs_out_current_patch_offset(si_shader_ctx);
658 dw_addr = get_dw_address(si_shader_ctx, NULL, reg, stride, dw_addr);
659 } else {
660 dw_addr = get_tcs_out_current_patch_data_offset(si_shader_ctx);
661 dw_addr = get_dw_address(si_shader_ctx, NULL, reg, NULL, dw_addr);
662 }
663
664 return lds_load(bld_base, type, swizzle, dw_addr);
665 }
666
667 static LLVMValueRef fetch_input_tes(
668 struct lp_build_tgsi_context *bld_base,
669 const struct tgsi_full_src_register *reg,
670 enum tgsi_opcode_type type, unsigned swizzle)
671 {
672 struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
673 LLVMValueRef dw_addr, stride;
674
675 if (reg->Register.Dimension) {
676 stride = unpack_param(si_shader_ctx, SI_PARAM_TCS_OUT_LAYOUT, 13, 8);
677 dw_addr = get_tcs_out_current_patch_offset(si_shader_ctx);
678 dw_addr = get_dw_address(si_shader_ctx, NULL, reg, stride, dw_addr);
679 } else {
680 dw_addr = get_tcs_out_current_patch_data_offset(si_shader_ctx);
681 dw_addr = get_dw_address(si_shader_ctx, NULL, reg, NULL, dw_addr);
682 }
683
684 return lds_load(bld_base, type, swizzle, dw_addr);
685 }
686
687 static void store_output_tcs(struct lp_build_tgsi_context * bld_base,
688 const struct tgsi_full_instruction * inst,
689 const struct tgsi_opcode_info * info,
690 LLVMValueRef dst[4])
691 {
692 struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
693 const struct tgsi_full_dst_register *reg = &inst->Dst[0];
694 unsigned chan_index;
695 LLVMValueRef dw_addr, stride;
696
697 /* Only handle per-patch and per-vertex outputs here.
698 * Vectors will be lowered to scalars and this function will be called again.
699 */
700 if (reg->Register.File != TGSI_FILE_OUTPUT ||
701 (dst[0] && LLVMGetTypeKind(LLVMTypeOf(dst[0])) == LLVMVectorTypeKind)) {
702 radeon_llvm_emit_store(bld_base, inst, info, dst);
703 return;
704 }
705
706 if (reg->Register.Dimension) {
707 stride = unpack_param(si_shader_ctx, SI_PARAM_TCS_OUT_LAYOUT, 13, 8);
708 dw_addr = get_tcs_out_current_patch_offset(si_shader_ctx);
709 dw_addr = get_dw_address(si_shader_ctx, reg, NULL, stride, dw_addr);
710 } else {
711 dw_addr = get_tcs_out_current_patch_data_offset(si_shader_ctx);
712 dw_addr = get_dw_address(si_shader_ctx, reg, NULL, NULL, dw_addr);
713 }
714
715 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan_index) {
716 LLVMValueRef value = dst[chan_index];
717
718 if (inst->Instruction.Saturate)
719 value = radeon_llvm_saturate(bld_base, value);
720
721 lds_store(bld_base, chan_index, dw_addr, value);
722 }
723 }
724
725 static LLVMValueRef fetch_input_gs(
726 struct lp_build_tgsi_context *bld_base,
727 const struct tgsi_full_src_register *reg,
728 enum tgsi_opcode_type type,
729 unsigned swizzle)
730 {
731 struct lp_build_context *base = &bld_base->base;
732 struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
733 struct si_shader *shader = si_shader_ctx->shader;
734 struct lp_build_context *uint = &si_shader_ctx->radeon_bld.soa.bld_base.uint_bld;
735 struct gallivm_state *gallivm = base->gallivm;
736 LLVMTypeRef i32 = LLVMInt32TypeInContext(gallivm->context);
737 LLVMValueRef vtx_offset;
738 LLVMValueRef args[9];
739 unsigned vtx_offset_param;
740 struct tgsi_shader_info *info = &shader->selector->info;
741 unsigned semantic_name = info->input_semantic_name[reg->Register.Index];
742 unsigned semantic_index = info->input_semantic_index[reg->Register.Index];
743 unsigned param;
744 LLVMValueRef value;
745
746 if (swizzle != ~0 && semantic_name == TGSI_SEMANTIC_PRIMID)
747 return get_primitive_id(bld_base, swizzle);
748
749 if (!reg->Register.Dimension)
750 return NULL;
751
752 if (swizzle == ~0) {
753 LLVMValueRef values[TGSI_NUM_CHANNELS];
754 unsigned chan;
755 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
756 values[chan] = fetch_input_gs(bld_base, reg, type, chan);
757 }
758 return lp_build_gather_values(bld_base->base.gallivm, values,
759 TGSI_NUM_CHANNELS);
760 }
761
762 /* Get the vertex offset parameter */
763 vtx_offset_param = reg->Dimension.Index;
764 if (vtx_offset_param < 2) {
765 vtx_offset_param += SI_PARAM_VTX0_OFFSET;
766 } else {
767 assert(vtx_offset_param < 6);
768 vtx_offset_param += SI_PARAM_VTX2_OFFSET - 2;
769 }
770 vtx_offset = lp_build_mul_imm(uint,
771 LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
772 vtx_offset_param),
773 4);
774
775 param = si_shader_io_get_unique_index(semantic_name, semantic_index);
776 args[0] = si_shader_ctx->esgs_ring;
777 args[1] = vtx_offset;
778 args[2] = lp_build_const_int32(gallivm, (param * 4 + swizzle) * 256);
779 args[3] = uint->zero;
780 args[4] = uint->one; /* OFFEN */
781 args[5] = uint->zero; /* IDXEN */
782 args[6] = uint->one; /* GLC */
783 args[7] = uint->zero; /* SLC */
784 args[8] = uint->zero; /* TFE */
785
786 value = lp_build_intrinsic(gallivm->builder,
787 "llvm.SI.buffer.load.dword.i32.i32",
788 i32, args, 9,
789 LLVMReadOnlyAttribute | LLVMNoUnwindAttribute);
790 if (type == TGSI_TYPE_DOUBLE) {
791 LLVMValueRef value2;
792 args[2] = lp_build_const_int32(gallivm, (param * 4 + swizzle + 1) * 256);
793 value2 = lp_build_intrinsic(gallivm->builder,
794 "llvm.SI.buffer.load.dword.i32.i32",
795 i32, args, 9,
796 LLVMReadOnlyAttribute | LLVMNoUnwindAttribute);
797 return radeon_llvm_emit_fetch_double(bld_base,
798 value, value2);
799 }
800 return LLVMBuildBitCast(gallivm->builder,
801 value,
802 tgsi2llvmtype(bld_base, type), "");
803 }
804
805 static int lookup_interp_param_index(unsigned interpolate, unsigned location)
806 {
807 switch (interpolate) {
808 case TGSI_INTERPOLATE_CONSTANT:
809 return 0;
810
811 case TGSI_INTERPOLATE_LINEAR:
812 if (location == TGSI_INTERPOLATE_LOC_SAMPLE)
813 return SI_PARAM_LINEAR_SAMPLE;
814 else if (location == TGSI_INTERPOLATE_LOC_CENTROID)
815 return SI_PARAM_LINEAR_CENTROID;
816 else
817 return SI_PARAM_LINEAR_CENTER;
818 break;
819 case TGSI_INTERPOLATE_COLOR:
820 case TGSI_INTERPOLATE_PERSPECTIVE:
821 if (location == TGSI_INTERPOLATE_LOC_SAMPLE)
822 return SI_PARAM_PERSP_SAMPLE;
823 else if (location == TGSI_INTERPOLATE_LOC_CENTROID)
824 return SI_PARAM_PERSP_CENTROID;
825 else
826 return SI_PARAM_PERSP_CENTER;
827 break;
828 default:
829 fprintf(stderr, "Warning: Unhandled interpolation mode.\n");
830 return -1;
831 }
832 }
833
834 /* This shouldn't be used by explicit INTERP opcodes. */
835 static LLVMValueRef get_interp_param(struct si_shader_context *si_shader_ctx,
836 unsigned param)
837 {
838 struct gallivm_state *gallivm = &si_shader_ctx->radeon_bld.gallivm;
839 unsigned sample_param = 0;
840 LLVMValueRef default_ij, sample_ij, force_sample;
841
842 default_ij = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, param);
843
844 /* If the shader doesn't use center/centroid, just return the parameter.
845 *
846 * If the shader only uses one set of (i,j), "si_emit_spi_ps_input" can
847 * switch between center/centroid and sample without shader changes.
848 */
849 switch (param) {
850 case SI_PARAM_PERSP_CENTROID:
851 case SI_PARAM_PERSP_CENTER:
852 if (!si_shader_ctx->shader->selector->forces_persample_interp_for_persp)
853 return default_ij;
854
855 sample_param = SI_PARAM_PERSP_SAMPLE;
856 break;
857
858 case SI_PARAM_LINEAR_CENTROID:
859 case SI_PARAM_LINEAR_CENTER:
860 if (!si_shader_ctx->shader->selector->forces_persample_interp_for_linear)
861 return default_ij;
862
863 sample_param = SI_PARAM_LINEAR_SAMPLE;
864 break;
865
866 default:
867 return default_ij;
868 }
869
870 /* Otherwise, we have to select (i,j) based on a user data SGPR. */
871 sample_ij = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, sample_param);
872
873 /* TODO: this can be done more efficiently by switching between
874 * 2 prologs.
875 */
876 force_sample = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
877 SI_PARAM_PS_STATE_BITS);
878 force_sample = LLVMBuildTrunc(gallivm->builder, force_sample,
879 LLVMInt1TypeInContext(gallivm->context), "");
880 return LLVMBuildSelect(gallivm->builder, force_sample,
881 sample_ij, default_ij, "");
882 }
883
884 static void declare_input_fs(
885 struct radeon_llvm_context *radeon_bld,
886 unsigned input_index,
887 const struct tgsi_full_declaration *decl)
888 {
889 struct lp_build_context *base = &radeon_bld->soa.bld_base.base;
890 struct si_shader_context *si_shader_ctx =
891 si_shader_context(&radeon_bld->soa.bld_base);
892 struct si_shader *shader = si_shader_ctx->shader;
893 struct lp_build_context *uint = &radeon_bld->soa.bld_base.uint_bld;
894 struct gallivm_state *gallivm = base->gallivm;
895 LLVMTypeRef input_type = LLVMFloatTypeInContext(gallivm->context);
896 LLVMValueRef main_fn = radeon_bld->main_fn;
897
898 LLVMValueRef interp_param = NULL;
899 int interp_param_idx;
900 const char * intr_name;
901
902 /* This value is:
903 * [15:0] NewPrimMask (Bit mask for each quad. It is set it the
904 * quad begins a new primitive. Bit 0 always needs
905 * to be unset)
906 * [32:16] ParamOffset
907 *
908 */
909 LLVMValueRef params = LLVMGetParam(main_fn, SI_PARAM_PRIM_MASK);
910 LLVMValueRef attr_number;
911
912 unsigned chan;
913
914 if (decl->Semantic.Name == TGSI_SEMANTIC_POSITION) {
915 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
916 unsigned soa_index =
917 radeon_llvm_reg_index_soa(input_index, chan);
918 radeon_bld->inputs[soa_index] =
919 LLVMGetParam(main_fn, SI_PARAM_POS_X_FLOAT + chan);
920
921 if (chan == 3)
922 /* RCP for fragcoord.w */
923 radeon_bld->inputs[soa_index] =
924 LLVMBuildFDiv(gallivm->builder,
925 lp_build_const_float(gallivm, 1.0f),
926 radeon_bld->inputs[soa_index],
927 "");
928 }
929 return;
930 }
931
932 if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) {
933 radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 0)] =
934 LLVMGetParam(main_fn, SI_PARAM_FRONT_FACE);
935 radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 1)] =
936 radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 2)] =
937 lp_build_const_float(gallivm, 0.0f);
938 radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 3)] =
939 lp_build_const_float(gallivm, 1.0f);
940
941 return;
942 }
943
944 shader->ps_input_param_offset[input_index] = shader->nparam++;
945 attr_number = lp_build_const_int32(gallivm,
946 shader->ps_input_param_offset[input_index]);
947
948 shader->ps_input_interpolate[input_index] = decl->Interp.Interpolate;
949 interp_param_idx = lookup_interp_param_index(decl->Interp.Interpolate,
950 decl->Interp.Location);
951 if (interp_param_idx == -1)
952 return;
953 else if (interp_param_idx)
954 interp_param = get_interp_param(si_shader_ctx, interp_param_idx);
955
956 /* fs.constant returns the param from the middle vertex, so it's not
957 * really useful for flat shading. It's meant to be used for custom
958 * interpolation (but the intrinsic can't fetch from the other two
959 * vertices).
960 *
961 * Luckily, it doesn't matter, because we rely on the FLAT_SHADE state
962 * to do the right thing. The only reason we use fs.constant is that
963 * fs.interp cannot be used on integers, because they can be equal
964 * to NaN.
965 */
966 intr_name = interp_param ? "llvm.SI.fs.interp" : "llvm.SI.fs.constant";
967
968 if (decl->Semantic.Name == TGSI_SEMANTIC_COLOR &&
969 si_shader_ctx->shader->key.ps.color_two_side) {
970 LLVMValueRef args[4];
971 LLVMValueRef face, is_face_positive;
972 LLVMValueRef back_attr_number =
973 lp_build_const_int32(gallivm,
974 shader->ps_input_param_offset[input_index] + 1);
975
976 face = LLVMGetParam(main_fn, SI_PARAM_FRONT_FACE);
977
978 is_face_positive = LLVMBuildFCmp(gallivm->builder,
979 LLVMRealOGT, face,
980 lp_build_const_float(gallivm, 0.0f),
981 "");
982
983 args[2] = params;
984 args[3] = interp_param;
985 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
986 LLVMValueRef llvm_chan = lp_build_const_int32(gallivm, chan);
987 unsigned soa_index = radeon_llvm_reg_index_soa(input_index, chan);
988 LLVMValueRef front, back;
989
990 args[0] = llvm_chan;
991 args[1] = attr_number;
992 front = lp_build_intrinsic(gallivm->builder, intr_name,
993 input_type, args, args[3] ? 4 : 3,
994 LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
995
996 args[1] = back_attr_number;
997 back = lp_build_intrinsic(gallivm->builder, intr_name,
998 input_type, args, args[3] ? 4 : 3,
999 LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
1000
1001 radeon_bld->inputs[soa_index] =
1002 LLVMBuildSelect(gallivm->builder,
1003 is_face_positive,
1004 front,
1005 back,
1006 "");
1007 }
1008
1009 shader->nparam++;
1010 } else if (decl->Semantic.Name == TGSI_SEMANTIC_FOG) {
1011 LLVMValueRef args[4];
1012
1013 args[0] = uint->zero;
1014 args[1] = attr_number;
1015 args[2] = params;
1016 args[3] = interp_param;
1017 radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 0)] =
1018 lp_build_intrinsic(gallivm->builder, intr_name,
1019 input_type, args, args[3] ? 4 : 3,
1020 LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
1021 radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 1)] =
1022 radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 2)] =
1023 lp_build_const_float(gallivm, 0.0f);
1024 radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 3)] =
1025 lp_build_const_float(gallivm, 1.0f);
1026 } else {
1027 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
1028 LLVMValueRef args[4];
1029 LLVMValueRef llvm_chan = lp_build_const_int32(gallivm, chan);
1030 unsigned soa_index = radeon_llvm_reg_index_soa(input_index, chan);
1031 args[0] = llvm_chan;
1032 args[1] = attr_number;
1033 args[2] = params;
1034 args[3] = interp_param;
1035 radeon_bld->inputs[soa_index] =
1036 lp_build_intrinsic(gallivm->builder, intr_name,
1037 input_type, args, args[3] ? 4 : 3,
1038 LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
1039 }
1040 }
1041 }
1042
1043 static LLVMValueRef get_sample_id(struct radeon_llvm_context *radeon_bld)
1044 {
1045 return unpack_param(si_shader_context(&radeon_bld->soa.bld_base),
1046 SI_PARAM_ANCILLARY, 8, 4);
1047 }
1048
1049 /**
1050 * Load a dword from a constant buffer.
1051 */
1052 static LLVMValueRef buffer_load_const(LLVMBuilderRef builder, LLVMValueRef resource,
1053 LLVMValueRef offset, LLVMTypeRef return_type)
1054 {
1055 LLVMValueRef args[2] = {resource, offset};
1056
1057 return lp_build_intrinsic(builder, "llvm.SI.load.const", return_type, args, 2,
1058 LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
1059 }
1060
1061 static LLVMValueRef load_sample_position(struct radeon_llvm_context *radeon_bld, LLVMValueRef sample_id)
1062 {
1063 struct si_shader_context *si_shader_ctx =
1064 si_shader_context(&radeon_bld->soa.bld_base);
1065 struct lp_build_context *uint_bld = &radeon_bld->soa.bld_base.uint_bld;
1066 struct gallivm_state *gallivm = &radeon_bld->gallivm;
1067 LLVMBuilderRef builder = gallivm->builder;
1068 LLVMValueRef desc = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_CONST_BUFFERS);
1069 LLVMValueRef buf_index = lp_build_const_int32(gallivm, SI_DRIVER_STATE_CONST_BUF);
1070 LLVMValueRef resource = build_indexed_load_const(si_shader_ctx, desc, buf_index);
1071
1072 /* offset = sample_id * 8 (8 = 2 floats containing samplepos.xy) */
1073 LLVMValueRef offset0 = lp_build_mul_imm(uint_bld, sample_id, 8);
1074 LLVMValueRef offset1 = LLVMBuildAdd(builder, offset0, lp_build_const_int32(gallivm, 4), "");
1075
1076 LLVMValueRef pos[4] = {
1077 buffer_load_const(builder, resource, offset0, radeon_bld->soa.bld_base.base.elem_type),
1078 buffer_load_const(builder, resource, offset1, radeon_bld->soa.bld_base.base.elem_type),
1079 lp_build_const_float(gallivm, 0),
1080 lp_build_const_float(gallivm, 0)
1081 };
1082
1083 return lp_build_gather_values(gallivm, pos, 4);
1084 }
1085
1086 static void declare_system_value(
1087 struct radeon_llvm_context * radeon_bld,
1088 unsigned index,
1089 const struct tgsi_full_declaration *decl)
1090 {
1091 struct si_shader_context *si_shader_ctx =
1092 si_shader_context(&radeon_bld->soa.bld_base);
1093 struct lp_build_context *bld = &radeon_bld->soa.bld_base.base;
1094 struct lp_build_context *uint_bld = &radeon_bld->soa.bld_base.uint_bld;
1095 struct gallivm_state *gallivm = &radeon_bld->gallivm;
1096 LLVMValueRef value = 0;
1097
1098 switch (decl->Semantic.Name) {
1099 case TGSI_SEMANTIC_INSTANCEID:
1100 value = LLVMGetParam(radeon_bld->main_fn,
1101 si_shader_ctx->param_instance_id);
1102 break;
1103
1104 case TGSI_SEMANTIC_VERTEXID:
1105 value = LLVMBuildAdd(gallivm->builder,
1106 LLVMGetParam(radeon_bld->main_fn,
1107 si_shader_ctx->param_vertex_id),
1108 LLVMGetParam(radeon_bld->main_fn,
1109 SI_PARAM_BASE_VERTEX), "");
1110 break;
1111
1112 case TGSI_SEMANTIC_VERTEXID_NOBASE:
1113 value = LLVMGetParam(radeon_bld->main_fn,
1114 si_shader_ctx->param_vertex_id);
1115 break;
1116
1117 case TGSI_SEMANTIC_BASEVERTEX:
1118 value = LLVMGetParam(radeon_bld->main_fn,
1119 SI_PARAM_BASE_VERTEX);
1120 break;
1121
1122 case TGSI_SEMANTIC_INVOCATIONID:
1123 if (si_shader_ctx->type == TGSI_PROCESSOR_TESS_CTRL)
1124 value = unpack_param(si_shader_ctx, SI_PARAM_REL_IDS, 8, 5);
1125 else if (si_shader_ctx->type == TGSI_PROCESSOR_GEOMETRY)
1126 value = LLVMGetParam(radeon_bld->main_fn,
1127 SI_PARAM_GS_INSTANCE_ID);
1128 else
1129 assert(!"INVOCATIONID not implemented");
1130 break;
1131
1132 case TGSI_SEMANTIC_SAMPLEID:
1133 value = get_sample_id(radeon_bld);
1134 break;
1135
1136 case TGSI_SEMANTIC_SAMPLEPOS:
1137 value = load_sample_position(radeon_bld, get_sample_id(radeon_bld));
1138 break;
1139
1140 case TGSI_SEMANTIC_SAMPLEMASK:
1141 /* Smoothing isn't MSAA in GL, but it's MSAA in hardware.
1142 * Therefore, force gl_SampleMaskIn to 1 for GL. */
1143 if (si_shader_ctx->shader->key.ps.poly_line_smoothing)
1144 value = uint_bld->one;
1145 else
1146 value = LLVMGetParam(radeon_bld->main_fn, SI_PARAM_SAMPLE_COVERAGE);
1147 break;
1148
1149 case TGSI_SEMANTIC_TESSCOORD:
1150 {
1151 LLVMValueRef coord[4] = {
1152 LLVMGetParam(radeon_bld->main_fn, si_shader_ctx->param_tes_u),
1153 LLVMGetParam(radeon_bld->main_fn, si_shader_ctx->param_tes_v),
1154 bld->zero,
1155 bld->zero
1156 };
1157
1158 /* For triangles, the vector should be (u, v, 1-u-v). */
1159 if (si_shader_ctx->shader->selector->info.properties[TGSI_PROPERTY_TES_PRIM_MODE] ==
1160 PIPE_PRIM_TRIANGLES)
1161 coord[2] = lp_build_sub(bld, bld->one,
1162 lp_build_add(bld, coord[0], coord[1]));
1163
1164 value = lp_build_gather_values(gallivm, coord, 4);
1165 break;
1166 }
1167
1168 case TGSI_SEMANTIC_VERTICESIN:
1169 value = unpack_param(si_shader_ctx, SI_PARAM_TCS_OUT_LAYOUT, 26, 6);
1170 break;
1171
1172 case TGSI_SEMANTIC_TESSINNER:
1173 case TGSI_SEMANTIC_TESSOUTER:
1174 {
1175 LLVMValueRef dw_addr;
1176 int param = si_shader_io_get_unique_index(decl->Semantic.Name, 0);
1177
1178 dw_addr = get_tcs_out_current_patch_data_offset(si_shader_ctx);
1179 dw_addr = LLVMBuildAdd(gallivm->builder, dw_addr,
1180 lp_build_const_int32(gallivm, param * 4), "");
1181
1182 value = lds_load(&radeon_bld->soa.bld_base, TGSI_TYPE_FLOAT,
1183 ~0, dw_addr);
1184 break;
1185 }
1186
1187 case TGSI_SEMANTIC_PRIMID:
1188 value = get_primitive_id(&radeon_bld->soa.bld_base, 0);
1189 break;
1190
1191 default:
1192 assert(!"unknown system value");
1193 return;
1194 }
1195
1196 radeon_bld->system_values[index] = value;
1197 }
1198
1199 static LLVMValueRef fetch_constant(
1200 struct lp_build_tgsi_context * bld_base,
1201 const struct tgsi_full_src_register *reg,
1202 enum tgsi_opcode_type type,
1203 unsigned swizzle)
1204 {
1205 struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
1206 struct lp_build_context * base = &bld_base->base;
1207 const struct tgsi_ind_register *ireg = &reg->Indirect;
1208 unsigned buf, idx;
1209
1210 LLVMValueRef addr, bufp;
1211 LLVMValueRef result;
1212
1213 if (swizzle == LP_CHAN_ALL) {
1214 unsigned chan;
1215 LLVMValueRef values[4];
1216 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan)
1217 values[chan] = fetch_constant(bld_base, reg, type, chan);
1218
1219 return lp_build_gather_values(bld_base->base.gallivm, values, 4);
1220 }
1221
1222 buf = reg->Register.Dimension ? reg->Dimension.Index : 0;
1223 idx = reg->Register.Index * 4 + swizzle;
1224
1225 if (!reg->Register.Indirect && !reg->Dimension.Indirect) {
1226 if (type != TGSI_TYPE_DOUBLE)
1227 return bitcast(bld_base, type, si_shader_ctx->constants[buf][idx]);
1228 else {
1229 return radeon_llvm_emit_fetch_double(bld_base,
1230 si_shader_ctx->constants[buf][idx],
1231 si_shader_ctx->constants[buf][idx + 1]);
1232 }
1233 }
1234
1235 if (reg->Register.Dimension && reg->Dimension.Indirect) {
1236 LLVMValueRef ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_CONST_BUFFERS);
1237 LLVMValueRef index;
1238 index = get_indirect_index(si_shader_ctx, &reg->DimIndirect,
1239 reg->Dimension.Index);
1240 bufp = build_indexed_load_const(si_shader_ctx, ptr, index);
1241 } else
1242 bufp = si_shader_ctx->const_buffers[buf];
1243
1244 addr = si_shader_ctx->radeon_bld.soa.addr[ireg->Index][ireg->Swizzle];
1245 addr = LLVMBuildLoad(base->gallivm->builder, addr, "load addr reg");
1246 addr = lp_build_mul_imm(&bld_base->uint_bld, addr, 16);
1247 addr = lp_build_add(&bld_base->uint_bld, addr,
1248 lp_build_const_int32(base->gallivm, idx * 4));
1249
1250 result = buffer_load_const(base->gallivm->builder, bufp,
1251 addr, bld_base->base.elem_type);
1252
1253 if (type != TGSI_TYPE_DOUBLE)
1254 result = bitcast(bld_base, type, result);
1255 else {
1256 LLVMValueRef addr2, result2;
1257 addr2 = si_shader_ctx->radeon_bld.soa.addr[ireg->Index][ireg->Swizzle + 1];
1258 addr2 = LLVMBuildLoad(base->gallivm->builder, addr2, "load addr reg2");
1259 addr2 = lp_build_mul_imm(&bld_base->uint_bld, addr2, 16);
1260 addr2 = lp_build_add(&bld_base->uint_bld, addr2,
1261 lp_build_const_int32(base->gallivm, idx * 4));
1262
1263 result2 = buffer_load_const(base->gallivm->builder, si_shader_ctx->const_buffers[buf],
1264 addr2, bld_base->base.elem_type);
1265
1266 result = radeon_llvm_emit_fetch_double(bld_base,
1267 result, result2);
1268 }
1269 return result;
1270 }
1271
1272 /* Initialize arguments for the shader export intrinsic */
1273 static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base,
1274 LLVMValueRef *values,
1275 unsigned target,
1276 LLVMValueRef *args)
1277 {
1278 struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
1279 struct lp_build_context *uint =
1280 &si_shader_ctx->radeon_bld.soa.bld_base.uint_bld;
1281 struct lp_build_context *base = &bld_base->base;
1282 unsigned compressed = 0;
1283 unsigned chan;
1284
1285 /* XXX: This controls which components of the output
1286 * registers actually get exported. (e.g bit 0 means export
1287 * X component, bit 1 means export Y component, etc.) I'm
1288 * hard coding this to 0xf for now. In the future, we might
1289 * want to do something else.
1290 */
1291 args[0] = lp_build_const_int32(base->gallivm, 0xf);
1292
1293 /* Specify whether the EXEC mask represents the valid mask */
1294 args[1] = uint->zero;
1295
1296 /* Specify whether this is the last export */
1297 args[2] = uint->zero;
1298
1299 /* Specify the target we are exporting */
1300 args[3] = lp_build_const_int32(base->gallivm, target);
1301
1302 if (si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT) {
1303 int cbuf = target - V_008DFC_SQ_EXP_MRT;
1304
1305 if (cbuf >= 0 && cbuf < 8)
1306 compressed = (si_shader_ctx->shader->key.ps.export_16bpc >> cbuf) & 0x1;
1307 }
1308
1309 /* Set COMPR flag */
1310 args[4] = compressed ? uint->one : uint->zero;
1311
1312 if (compressed) {
1313 /* Pixel shader needs to pack output values before export */
1314 for (chan = 0; chan < 2; chan++) {
1315 LLVMValueRef pack_args[2] = {
1316 values[2 * chan],
1317 values[2 * chan + 1]
1318 };
1319 LLVMValueRef packed;
1320
1321 packed = lp_build_intrinsic(base->gallivm->builder,
1322 "llvm.SI.packf16",
1323 LLVMInt32TypeInContext(base->gallivm->context),
1324 pack_args, 2,
1325 LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
1326 args[chan + 7] = args[chan + 5] =
1327 LLVMBuildBitCast(base->gallivm->builder,
1328 packed,
1329 LLVMFloatTypeInContext(base->gallivm->context),
1330 "");
1331 }
1332 } else
1333 memcpy(&args[5], values, sizeof(values[0]) * 4);
1334 }
1335
1336 /* Load from output pointers and initialize arguments for the shader export intrinsic */
1337 static void si_llvm_init_export_args_load(struct lp_build_tgsi_context *bld_base,
1338 LLVMValueRef *out_ptr,
1339 unsigned target,
1340 LLVMValueRef *args)
1341 {
1342 struct gallivm_state *gallivm = bld_base->base.gallivm;
1343 LLVMValueRef values[4];
1344 int i;
1345
1346 for (i = 0; i < 4; i++)
1347 values[i] = LLVMBuildLoad(gallivm->builder, out_ptr[i], "");
1348
1349 si_llvm_init_export_args(bld_base, values, target, args);
1350 }
1351
1352 static void si_alpha_test(struct lp_build_tgsi_context *bld_base,
1353 LLVMValueRef alpha_ptr)
1354 {
1355 struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
1356 struct gallivm_state *gallivm = bld_base->base.gallivm;
1357
1358 if (si_shader_ctx->shader->key.ps.alpha_func != PIPE_FUNC_NEVER) {
1359 LLVMValueRef alpha_ref = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
1360 SI_PARAM_ALPHA_REF);
1361
1362 LLVMValueRef alpha_pass =
1363 lp_build_cmp(&bld_base->base,
1364 si_shader_ctx->shader->key.ps.alpha_func,
1365 LLVMBuildLoad(gallivm->builder, alpha_ptr, ""),
1366 alpha_ref);
1367 LLVMValueRef arg =
1368 lp_build_select(&bld_base->base,
1369 alpha_pass,
1370 lp_build_const_float(gallivm, 1.0f),
1371 lp_build_const_float(gallivm, -1.0f));
1372
1373 lp_build_intrinsic(gallivm->builder,
1374 "llvm.AMDGPU.kill",
1375 LLVMVoidTypeInContext(gallivm->context),
1376 &arg, 1, 0);
1377 } else {
1378 lp_build_intrinsic(gallivm->builder,
1379 "llvm.AMDGPU.kilp",
1380 LLVMVoidTypeInContext(gallivm->context),
1381 NULL, 0, 0);
1382 }
1383 }
1384
1385 static void si_scale_alpha_by_sample_mask(struct lp_build_tgsi_context *bld_base,
1386 LLVMValueRef alpha_ptr)
1387 {
1388 struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
1389 struct gallivm_state *gallivm = bld_base->base.gallivm;
1390 LLVMValueRef coverage, alpha;
1391
1392 /* alpha = alpha * popcount(coverage) / SI_NUM_SMOOTH_AA_SAMPLES */
1393 coverage = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
1394 SI_PARAM_SAMPLE_COVERAGE);
1395 coverage = bitcast(bld_base, TGSI_TYPE_SIGNED, coverage);
1396
1397 coverage = lp_build_intrinsic(gallivm->builder, "llvm.ctpop.i32",
1398 bld_base->int_bld.elem_type,
1399 &coverage, 1, LLVMReadNoneAttribute);
1400
1401 coverage = LLVMBuildUIToFP(gallivm->builder, coverage,
1402 bld_base->base.elem_type, "");
1403
1404 coverage = LLVMBuildFMul(gallivm->builder, coverage,
1405 lp_build_const_float(gallivm,
1406 1.0 / SI_NUM_SMOOTH_AA_SAMPLES), "");
1407
1408 alpha = LLVMBuildLoad(gallivm->builder, alpha_ptr, "");
1409 alpha = LLVMBuildFMul(gallivm->builder, alpha, coverage, "");
1410 LLVMBuildStore(gallivm->builder, alpha, alpha_ptr);
1411 }
1412
1413 static void si_llvm_emit_clipvertex(struct lp_build_tgsi_context * bld_base,
1414 LLVMValueRef (*pos)[9], LLVMValueRef *out_elts)
1415 {
1416 struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
1417 struct lp_build_context *base = &bld_base->base;
1418 struct lp_build_context *uint = &si_shader_ctx->radeon_bld.soa.bld_base.uint_bld;
1419 unsigned reg_index;
1420 unsigned chan;
1421 unsigned const_chan;
1422 LLVMValueRef base_elt;
1423 LLVMValueRef ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_CONST_BUFFERS);
1424 LLVMValueRef constbuf_index = lp_build_const_int32(base->gallivm, SI_DRIVER_STATE_CONST_BUF);
1425 LLVMValueRef const_resource = build_indexed_load_const(si_shader_ctx, ptr, constbuf_index);
1426
1427 for (reg_index = 0; reg_index < 2; reg_index ++) {
1428 LLVMValueRef *args = pos[2 + reg_index];
1429
1430 args[5] =
1431 args[6] =
1432 args[7] =
1433 args[8] = lp_build_const_float(base->gallivm, 0.0f);
1434
1435 /* Compute dot products of position and user clip plane vectors */
1436 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
1437 for (const_chan = 0; const_chan < TGSI_NUM_CHANNELS; const_chan++) {
1438 args[1] = lp_build_const_int32(base->gallivm,
1439 ((reg_index * 4 + chan) * 4 +
1440 const_chan) * 4);
1441 base_elt = buffer_load_const(base->gallivm->builder, const_resource,
1442 args[1], base->elem_type);
1443 args[5 + chan] =
1444 lp_build_add(base, args[5 + chan],
1445 lp_build_mul(base, base_elt,
1446 out_elts[const_chan]));
1447 }
1448 }
1449
1450 args[0] = lp_build_const_int32(base->gallivm, 0xf);
1451 args[1] = uint->zero;
1452 args[2] = uint->zero;
1453 args[3] = lp_build_const_int32(base->gallivm,
1454 V_008DFC_SQ_EXP_POS + 2 + reg_index);
1455 args[4] = uint->zero;
1456 }
1457 }
1458
1459 static void si_dump_streamout(struct pipe_stream_output_info *so)
1460 {
1461 unsigned i;
1462
1463 if (so->num_outputs)
1464 fprintf(stderr, "STREAMOUT\n");
1465
1466 for (i = 0; i < so->num_outputs; i++) {
1467 unsigned mask = ((1 << so->output[i].num_components) - 1) <<
1468 so->output[i].start_component;
1469 fprintf(stderr, " %i: BUF%i[%i..%i] <- OUT[%i].%s%s%s%s\n",
1470 i, so->output[i].output_buffer,
1471 so->output[i].dst_offset, so->output[i].dst_offset + so->output[i].num_components - 1,
1472 so->output[i].register_index,
1473 mask & 1 ? "x" : "",
1474 mask & 2 ? "y" : "",
1475 mask & 4 ? "z" : "",
1476 mask & 8 ? "w" : "");
1477 }
1478 }
1479
1480 /* TBUFFER_STORE_FORMAT_{X,XY,XYZ,XYZW} <- the suffix is selected by num_channels=1..4.
1481 * The type of vdata must be one of i32 (num_channels=1), v2i32 (num_channels=2),
1482 * or v4i32 (num_channels=3,4). */
1483 static void build_tbuffer_store(struct si_shader_context *shader,
1484 LLVMValueRef rsrc,
1485 LLVMValueRef vdata,
1486 unsigned num_channels,
1487 LLVMValueRef vaddr,
1488 LLVMValueRef soffset,
1489 unsigned inst_offset,
1490 unsigned dfmt,
1491 unsigned nfmt,
1492 unsigned offen,
1493 unsigned idxen,
1494 unsigned glc,
1495 unsigned slc,
1496 unsigned tfe)
1497 {
1498 struct gallivm_state *gallivm = &shader->radeon_bld.gallivm;
1499 LLVMTypeRef i32 = LLVMInt32TypeInContext(gallivm->context);
1500 LLVMValueRef args[] = {
1501 rsrc,
1502 vdata,
1503 LLVMConstInt(i32, num_channels, 0),
1504 vaddr,
1505 soffset,
1506 LLVMConstInt(i32, inst_offset, 0),
1507 LLVMConstInt(i32, dfmt, 0),
1508 LLVMConstInt(i32, nfmt, 0),
1509 LLVMConstInt(i32, offen, 0),
1510 LLVMConstInt(i32, idxen, 0),
1511 LLVMConstInt(i32, glc, 0),
1512 LLVMConstInt(i32, slc, 0),
1513 LLVMConstInt(i32, tfe, 0)
1514 };
1515
1516 /* The instruction offset field has 12 bits */
1517 assert(offen || inst_offset < (1 << 12));
1518
1519 /* The intrinsic is overloaded, we need to add a type suffix for overloading to work. */
1520 unsigned func = CLAMP(num_channels, 1, 3) - 1;
1521 const char *types[] = {"i32", "v2i32", "v4i32"};
1522 char name[256];
1523 snprintf(name, sizeof(name), "llvm.SI.tbuffer.store.%s", types[func]);
1524
1525 lp_build_intrinsic(gallivm->builder, name,
1526 LLVMVoidTypeInContext(gallivm->context),
1527 args, Elements(args), 0);
1528 }
1529
1530 static void build_tbuffer_store_dwords(struct si_shader_context *shader,
1531 LLVMValueRef rsrc,
1532 LLVMValueRef vdata,
1533 unsigned num_channels,
1534 LLVMValueRef vaddr,
1535 LLVMValueRef soffset,
1536 unsigned inst_offset)
1537 {
1538 static unsigned dfmt[] = {
1539 V_008F0C_BUF_DATA_FORMAT_32,
1540 V_008F0C_BUF_DATA_FORMAT_32_32,
1541 V_008F0C_BUF_DATA_FORMAT_32_32_32,
1542 V_008F0C_BUF_DATA_FORMAT_32_32_32_32
1543 };
1544 assert(num_channels >= 1 && num_channels <= 4);
1545
1546 build_tbuffer_store(shader, rsrc, vdata, num_channels, vaddr, soffset,
1547 inst_offset, dfmt[num_channels-1],
1548 V_008F0C_BUF_NUM_FORMAT_UINT, 1, 0, 1, 1, 0);
1549 }
1550
1551 /* On SI, the vertex shader is responsible for writing streamout data
1552 * to buffers. */
1553 static void si_llvm_emit_streamout(struct si_shader_context *shader,
1554 struct si_shader_output_values *outputs,
1555 unsigned noutput)
1556 {
1557 struct pipe_stream_output_info *so = &shader->shader->selector->so;
1558 struct gallivm_state *gallivm = &shader->radeon_bld.gallivm;
1559 LLVMBuilderRef builder = gallivm->builder;
1560 int i, j;
1561 struct lp_build_if_state if_ctx;
1562
1563 LLVMTypeRef i32 = LLVMInt32TypeInContext(gallivm->context);
1564
1565 /* Get bits [22:16], i.e. (so_param >> 16) & 127; */
1566 LLVMValueRef so_vtx_count =
1567 unpack_param(shader, shader->param_streamout_config, 16, 7);
1568
1569 LLVMValueRef tid = lp_build_intrinsic(builder, "llvm.SI.tid", i32,
1570 NULL, 0, LLVMReadNoneAttribute);
1571
1572 /* can_emit = tid < so_vtx_count; */
1573 LLVMValueRef can_emit =
1574 LLVMBuildICmp(builder, LLVMIntULT, tid, so_vtx_count, "");
1575
1576 LLVMValueRef stream_id =
1577 unpack_param(shader, shader->param_streamout_config, 24, 2);
1578
1579 /* Emit the streamout code conditionally. This actually avoids
1580 * out-of-bounds buffer access. The hw tells us via the SGPR
1581 * (so_vtx_count) which threads are allowed to emit streamout data. */
1582 lp_build_if(&if_ctx, gallivm, can_emit);
1583 {
1584 /* The buffer offset is computed as follows:
1585 * ByteOffset = streamout_offset[buffer_id]*4 +
1586 * (streamout_write_index + thread_id)*stride[buffer_id] +
1587 * attrib_offset
1588 */
1589
1590 LLVMValueRef so_write_index =
1591 LLVMGetParam(shader->radeon_bld.main_fn,
1592 shader->param_streamout_write_index);
1593
1594 /* Compute (streamout_write_index + thread_id). */
1595 so_write_index = LLVMBuildAdd(builder, so_write_index, tid, "");
1596
1597 /* Compute the write offset for each enabled buffer. */
1598 LLVMValueRef so_write_offset[4] = {};
1599 for (i = 0; i < 4; i++) {
1600 if (!so->stride[i])
1601 continue;
1602
1603 LLVMValueRef so_offset = LLVMGetParam(shader->radeon_bld.main_fn,
1604 shader->param_streamout_offset[i]);
1605 so_offset = LLVMBuildMul(builder, so_offset, LLVMConstInt(i32, 4, 0), "");
1606
1607 so_write_offset[i] = LLVMBuildMul(builder, so_write_index,
1608 LLVMConstInt(i32, so->stride[i]*4, 0), "");
1609 so_write_offset[i] = LLVMBuildAdd(builder, so_write_offset[i], so_offset, "");
1610 }
1611
1612 /* Write streamout data. */
1613 for (i = 0; i < so->num_outputs; i++) {
1614 unsigned buf_idx = so->output[i].output_buffer;
1615 unsigned reg = so->output[i].register_index;
1616 unsigned start = so->output[i].start_component;
1617 unsigned num_comps = so->output[i].num_components;
1618 unsigned stream = so->output[i].stream;
1619 LLVMValueRef out[4];
1620 struct lp_build_if_state if_ctx_stream;
1621
1622 assert(num_comps && num_comps <= 4);
1623 if (!num_comps || num_comps > 4)
1624 continue;
1625
1626 if (reg >= noutput)
1627 continue;
1628
1629 /* Load the output as int. */
1630 for (j = 0; j < num_comps; j++) {
1631 out[j] = LLVMBuildBitCast(builder,
1632 outputs[reg].values[start+j],
1633 i32, "");
1634 }
1635
1636 /* Pack the output. */
1637 LLVMValueRef vdata = NULL;
1638
1639 switch (num_comps) {
1640 case 1: /* as i32 */
1641 vdata = out[0];
1642 break;
1643 case 2: /* as v2i32 */
1644 case 3: /* as v4i32 (aligned to 4) */
1645 case 4: /* as v4i32 */
1646 vdata = LLVMGetUndef(LLVMVectorType(i32, util_next_power_of_two(num_comps)));
1647 for (j = 0; j < num_comps; j++) {
1648 vdata = LLVMBuildInsertElement(builder, vdata, out[j],
1649 LLVMConstInt(i32, j, 0), "");
1650 }
1651 break;
1652 }
1653
1654 LLVMValueRef can_emit_stream =
1655 LLVMBuildICmp(builder, LLVMIntEQ,
1656 stream_id,
1657 lp_build_const_int32(gallivm, stream), "");
1658
1659 lp_build_if(&if_ctx_stream, gallivm, can_emit_stream);
1660 build_tbuffer_store_dwords(shader, shader->so_buffers[buf_idx],
1661 vdata, num_comps,
1662 so_write_offset[buf_idx],
1663 LLVMConstInt(i32, 0, 0),
1664 so->output[i].dst_offset*4);
1665 lp_build_endif(&if_ctx_stream);
1666 }
1667 }
1668 lp_build_endif(&if_ctx);
1669 }
1670
1671
1672 /* Generate export instructions for hardware VS shader stage */
1673 static void si_llvm_export_vs(struct lp_build_tgsi_context *bld_base,
1674 struct si_shader_output_values *outputs,
1675 unsigned noutput)
1676 {
1677 struct si_shader_context * si_shader_ctx = si_shader_context(bld_base);
1678 struct si_shader * shader = si_shader_ctx->shader;
1679 struct lp_build_context * base = &bld_base->base;
1680 struct lp_build_context * uint =
1681 &si_shader_ctx->radeon_bld.soa.bld_base.uint_bld;
1682 LLVMValueRef args[9];
1683 LLVMValueRef pos_args[4][9] = { { 0 } };
1684 LLVMValueRef psize_value = NULL, edgeflag_value = NULL, layer_value = NULL, viewport_index_value = NULL;
1685 unsigned semantic_name, semantic_index;
1686 unsigned target;
1687 unsigned param_count = 0;
1688 unsigned pos_idx;
1689 int i;
1690
1691 if (outputs && si_shader_ctx->shader->selector->so.num_outputs) {
1692 si_llvm_emit_streamout(si_shader_ctx, outputs, noutput);
1693 }
1694
1695 for (i = 0; i < noutput; i++) {
1696 semantic_name = outputs[i].name;
1697 semantic_index = outputs[i].sid;
1698
1699 handle_semantic:
1700 /* Select the correct target */
1701 switch(semantic_name) {
1702 case TGSI_SEMANTIC_PSIZE:
1703 psize_value = outputs[i].values[0];
1704 continue;
1705 case TGSI_SEMANTIC_EDGEFLAG:
1706 edgeflag_value = outputs[i].values[0];
1707 continue;
1708 case TGSI_SEMANTIC_LAYER:
1709 layer_value = outputs[i].values[0];
1710 semantic_name = TGSI_SEMANTIC_GENERIC;
1711 goto handle_semantic;
1712 case TGSI_SEMANTIC_VIEWPORT_INDEX:
1713 viewport_index_value = outputs[i].values[0];
1714 semantic_name = TGSI_SEMANTIC_GENERIC;
1715 goto handle_semantic;
1716 case TGSI_SEMANTIC_POSITION:
1717 target = V_008DFC_SQ_EXP_POS;
1718 break;
1719 case TGSI_SEMANTIC_COLOR:
1720 case TGSI_SEMANTIC_BCOLOR:
1721 target = V_008DFC_SQ_EXP_PARAM + param_count;
1722 shader->vs_output_param_offset[i] = param_count;
1723 param_count++;
1724 break;
1725 case TGSI_SEMANTIC_CLIPDIST:
1726 target = V_008DFC_SQ_EXP_POS + 2 + semantic_index;
1727 break;
1728 case TGSI_SEMANTIC_CLIPVERTEX:
1729 si_llvm_emit_clipvertex(bld_base, pos_args, outputs[i].values);
1730 continue;
1731 case TGSI_SEMANTIC_PRIMID:
1732 case TGSI_SEMANTIC_FOG:
1733 case TGSI_SEMANTIC_TEXCOORD:
1734 case TGSI_SEMANTIC_GENERIC:
1735 target = V_008DFC_SQ_EXP_PARAM + param_count;
1736 shader->vs_output_param_offset[i] = param_count;
1737 param_count++;
1738 break;
1739 default:
1740 target = 0;
1741 fprintf(stderr,
1742 "Warning: SI unhandled vs output type:%d\n",
1743 semantic_name);
1744 }
1745
1746 si_llvm_init_export_args(bld_base, outputs[i].values, target, args);
1747
1748 if (target >= V_008DFC_SQ_EXP_POS &&
1749 target <= (V_008DFC_SQ_EXP_POS + 3)) {
1750 memcpy(pos_args[target - V_008DFC_SQ_EXP_POS],
1751 args, sizeof(args));
1752 } else {
1753 lp_build_intrinsic(base->gallivm->builder,
1754 "llvm.SI.export",
1755 LLVMVoidTypeInContext(base->gallivm->context),
1756 args, 9, 0);
1757 }
1758
1759 if (semantic_name == TGSI_SEMANTIC_CLIPDIST) {
1760 semantic_name = TGSI_SEMANTIC_GENERIC;
1761 goto handle_semantic;
1762 }
1763 }
1764
1765 shader->nr_param_exports = param_count;
1766
1767 /* We need to add the position output manually if it's missing. */
1768 if (!pos_args[0][0]) {
1769 pos_args[0][0] = lp_build_const_int32(base->gallivm, 0xf); /* writemask */
1770 pos_args[0][1] = uint->zero; /* EXEC mask */
1771 pos_args[0][2] = uint->zero; /* last export? */
1772 pos_args[0][3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_POS);
1773 pos_args[0][4] = uint->zero; /* COMPR flag */
1774 pos_args[0][5] = base->zero; /* X */
1775 pos_args[0][6] = base->zero; /* Y */
1776 pos_args[0][7] = base->zero; /* Z */
1777 pos_args[0][8] = base->one; /* W */
1778 }
1779
1780 /* Write the misc vector (point size, edgeflag, layer, viewport). */
1781 if (shader->selector->info.writes_psize ||
1782 shader->selector->info.writes_edgeflag ||
1783 shader->selector->info.writes_viewport_index ||
1784 shader->selector->info.writes_layer) {
1785 pos_args[1][0] = lp_build_const_int32(base->gallivm, /* writemask */
1786 shader->selector->info.writes_psize |
1787 (shader->selector->info.writes_edgeflag << 1) |
1788 (shader->selector->info.writes_layer << 2) |
1789 (shader->selector->info.writes_viewport_index << 3));
1790 pos_args[1][1] = uint->zero; /* EXEC mask */
1791 pos_args[1][2] = uint->zero; /* last export? */
1792 pos_args[1][3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_POS + 1);
1793 pos_args[1][4] = uint->zero; /* COMPR flag */
1794 pos_args[1][5] = base->zero; /* X */
1795 pos_args[1][6] = base->zero; /* Y */
1796 pos_args[1][7] = base->zero; /* Z */
1797 pos_args[1][8] = base->zero; /* W */
1798
1799 if (shader->selector->info.writes_psize)
1800 pos_args[1][5] = psize_value;
1801
1802 if (shader->selector->info.writes_edgeflag) {
1803 /* The output is a float, but the hw expects an integer
1804 * with the first bit containing the edge flag. */
1805 edgeflag_value = LLVMBuildFPToUI(base->gallivm->builder,
1806 edgeflag_value,
1807 bld_base->uint_bld.elem_type, "");
1808 edgeflag_value = lp_build_min(&bld_base->int_bld,
1809 edgeflag_value,
1810 bld_base->int_bld.one);
1811
1812 /* The LLVM intrinsic expects a float. */
1813 pos_args[1][6] = LLVMBuildBitCast(base->gallivm->builder,
1814 edgeflag_value,
1815 base->elem_type, "");
1816 }
1817
1818 if (shader->selector->info.writes_layer)
1819 pos_args[1][7] = layer_value;
1820
1821 if (shader->selector->info.writes_viewport_index)
1822 pos_args[1][8] = viewport_index_value;
1823 }
1824
1825 for (i = 0; i < 4; i++)
1826 if (pos_args[i][0])
1827 shader->nr_pos_exports++;
1828
1829 pos_idx = 0;
1830 for (i = 0; i < 4; i++) {
1831 if (!pos_args[i][0])
1832 continue;
1833
1834 /* Specify the target we are exporting */
1835 pos_args[i][3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_POS + pos_idx++);
1836
1837 if (pos_idx == shader->nr_pos_exports)
1838 /* Specify that this is the last export */
1839 pos_args[i][2] = uint->one;
1840
1841 lp_build_intrinsic(base->gallivm->builder,
1842 "llvm.SI.export",
1843 LLVMVoidTypeInContext(base->gallivm->context),
1844 pos_args[i], 9, 0);
1845 }
1846 }
1847
1848 /* This only writes the tessellation factor levels. */
1849 static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base)
1850 {
1851 struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
1852 struct gallivm_state *gallivm = bld_base->base.gallivm;
1853 struct si_shader *shader = si_shader_ctx->shader;
1854 unsigned tess_inner_index, tess_outer_index;
1855 LLVMValueRef lds_base, lds_inner, lds_outer;
1856 LLVMValueRef tf_base, rel_patch_id, byteoffset, buffer, rw_buffers;
1857 LLVMValueRef out[6], vec0, vec1, invocation_id;
1858 unsigned stride, outer_comps, inner_comps, i;
1859 struct lp_build_if_state if_ctx;
1860
1861 invocation_id = unpack_param(si_shader_ctx, SI_PARAM_REL_IDS, 8, 5);
1862
1863 /* Do this only for invocation 0, because the tess levels are per-patch,
1864 * not per-vertex.
1865 *
1866 * This can't jump, because invocation 0 executes this. It should
1867 * at least mask out the loads and stores for other invocations.
1868 */
1869 lp_build_if(&if_ctx, gallivm,
1870 LLVMBuildICmp(gallivm->builder, LLVMIntEQ,
1871 invocation_id, bld_base->uint_bld.zero, ""));
1872
1873 /* Determine the layout of one tess factor element in the buffer. */
1874 switch (shader->key.tcs.prim_mode) {
1875 case PIPE_PRIM_LINES:
1876 stride = 2; /* 2 dwords, 1 vec2 store */
1877 outer_comps = 2;
1878 inner_comps = 0;
1879 break;
1880 case PIPE_PRIM_TRIANGLES:
1881 stride = 4; /* 4 dwords, 1 vec4 store */
1882 outer_comps = 3;
1883 inner_comps = 1;
1884 break;
1885 case PIPE_PRIM_QUADS:
1886 stride = 6; /* 6 dwords, 2 stores (vec4 + vec2) */
1887 outer_comps = 4;
1888 inner_comps = 2;
1889 break;
1890 default:
1891 assert(0);
1892 return;
1893 }
1894
1895 /* Load tess_inner and tess_outer from LDS.
1896 * Any invocation can write them, so we can't get them from a temporary.
1897 */
1898 tess_inner_index = si_shader_io_get_unique_index(TGSI_SEMANTIC_TESSINNER, 0);
1899 tess_outer_index = si_shader_io_get_unique_index(TGSI_SEMANTIC_TESSOUTER, 0);
1900
1901 lds_base = get_tcs_out_current_patch_data_offset(si_shader_ctx);
1902 lds_inner = LLVMBuildAdd(gallivm->builder, lds_base,
1903 lp_build_const_int32(gallivm,
1904 tess_inner_index * 4), "");
1905 lds_outer = LLVMBuildAdd(gallivm->builder, lds_base,
1906 lp_build_const_int32(gallivm,
1907 tess_outer_index * 4), "");
1908
1909 for (i = 0; i < outer_comps; i++)
1910 out[i] = lds_load(bld_base, TGSI_TYPE_SIGNED, i, lds_outer);
1911 for (i = 0; i < inner_comps; i++)
1912 out[outer_comps+i] = lds_load(bld_base, TGSI_TYPE_SIGNED, i, lds_inner);
1913
1914 /* Convert the outputs to vectors for stores. */
1915 vec0 = lp_build_gather_values(gallivm, out, MIN2(stride, 4));
1916 vec1 = NULL;
1917
1918 if (stride > 4)
1919 vec1 = lp_build_gather_values(gallivm, out+4, stride - 4);
1920
1921 /* Get the buffer. */
1922 rw_buffers = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
1923 SI_PARAM_RW_BUFFERS);
1924 buffer = build_indexed_load_const(si_shader_ctx, rw_buffers,
1925 lp_build_const_int32(gallivm, SI_RING_TESS_FACTOR));
1926
1927 /* Get the offset. */
1928 tf_base = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
1929 SI_PARAM_TESS_FACTOR_OFFSET);
1930 rel_patch_id = get_rel_patch_id(si_shader_ctx);
1931 byteoffset = LLVMBuildMul(gallivm->builder, rel_patch_id,
1932 lp_build_const_int32(gallivm, 4 * stride), "");
1933
1934 /* Store the outputs. */
1935 build_tbuffer_store_dwords(si_shader_ctx, buffer, vec0,
1936 MIN2(stride, 4), byteoffset, tf_base, 0);
1937 if (vec1)
1938 build_tbuffer_store_dwords(si_shader_ctx, buffer, vec1,
1939 stride - 4, byteoffset, tf_base, 16);
1940 lp_build_endif(&if_ctx);
1941 }
1942
1943 static void si_llvm_emit_ls_epilogue(struct lp_build_tgsi_context * bld_base)
1944 {
1945 struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
1946 struct si_shader *shader = si_shader_ctx->shader;
1947 struct tgsi_shader_info *info = &shader->selector->info;
1948 struct gallivm_state *gallivm = bld_base->base.gallivm;
1949 unsigned i, chan;
1950 LLVMValueRef vertex_id = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
1951 si_shader_ctx->param_rel_auto_id);
1952 LLVMValueRef vertex_dw_stride =
1953 unpack_param(si_shader_ctx, SI_PARAM_LS_OUT_LAYOUT, 13, 8);
1954 LLVMValueRef base_dw_addr = LLVMBuildMul(gallivm->builder, vertex_id,
1955 vertex_dw_stride, "");
1956
1957 /* Write outputs to LDS. The next shader (TCS aka HS) will read
1958 * its inputs from it. */
1959 for (i = 0; i < info->num_outputs; i++) {
1960 LLVMValueRef *out_ptr = si_shader_ctx->radeon_bld.soa.outputs[i];
1961 unsigned name = info->output_semantic_name[i];
1962 unsigned index = info->output_semantic_index[i];
1963 int param = si_shader_io_get_unique_index(name, index);
1964 LLVMValueRef dw_addr = LLVMBuildAdd(gallivm->builder, base_dw_addr,
1965 lp_build_const_int32(gallivm, param * 4), "");
1966
1967 for (chan = 0; chan < 4; chan++) {
1968 lds_store(bld_base, chan, dw_addr,
1969 LLVMBuildLoad(gallivm->builder, out_ptr[chan], ""));
1970 }
1971 }
1972 }
1973
1974 static void si_llvm_emit_es_epilogue(struct lp_build_tgsi_context * bld_base)
1975 {
1976 struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
1977 struct gallivm_state *gallivm = bld_base->base.gallivm;
1978 struct si_shader *es = si_shader_ctx->shader;
1979 struct tgsi_shader_info *info = &es->selector->info;
1980 LLVMTypeRef i32 = LLVMInt32TypeInContext(gallivm->context);
1981 LLVMValueRef soffset = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
1982 si_shader_ctx->param_es2gs_offset);
1983 unsigned chan;
1984 int i;
1985
1986 for (i = 0; i < info->num_outputs; i++) {
1987 LLVMValueRef *out_ptr =
1988 si_shader_ctx->radeon_bld.soa.outputs[i];
1989 int param_index;
1990
1991 if (info->output_semantic_name[i] == TGSI_SEMANTIC_VIEWPORT_INDEX ||
1992 info->output_semantic_name[i] == TGSI_SEMANTIC_LAYER)
1993 continue;
1994
1995 param_index = si_shader_io_get_unique_index(info->output_semantic_name[i],
1996 info->output_semantic_index[i]);
1997
1998 for (chan = 0; chan < 4; chan++) {
1999 LLVMValueRef out_val = LLVMBuildLoad(gallivm->builder, out_ptr[chan], "");
2000 out_val = LLVMBuildBitCast(gallivm->builder, out_val, i32, "");
2001
2002 build_tbuffer_store(si_shader_ctx,
2003 si_shader_ctx->esgs_ring,
2004 out_val, 1,
2005 LLVMGetUndef(i32), soffset,
2006 (4 * param_index + chan) * 4,
2007 V_008F0C_BUF_DATA_FORMAT_32,
2008 V_008F0C_BUF_NUM_FORMAT_UINT,
2009 0, 0, 1, 1, 0);
2010 }
2011 }
2012 }
2013
2014 static void si_llvm_emit_gs_epilogue(struct lp_build_tgsi_context *bld_base)
2015 {
2016 struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
2017 struct gallivm_state *gallivm = bld_base->base.gallivm;
2018 LLVMValueRef args[2];
2019
2020 args[0] = lp_build_const_int32(gallivm, SENDMSG_GS_OP_NOP | SENDMSG_GS_DONE);
2021 args[1] = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_GS_WAVE_ID);
2022 lp_build_intrinsic(gallivm->builder, "llvm.SI.sendmsg",
2023 LLVMVoidTypeInContext(gallivm->context), args, 2,
2024 LLVMNoUnwindAttribute);
2025 }
2026
2027 static void si_llvm_emit_vs_epilogue(struct lp_build_tgsi_context * bld_base)
2028 {
2029 struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
2030 struct gallivm_state *gallivm = bld_base->base.gallivm;
2031 struct tgsi_shader_info *info = &si_shader_ctx->shader->selector->info;
2032 struct si_shader_output_values *outputs = NULL;
2033 int i,j;
2034
2035 outputs = MALLOC((info->num_outputs + 1) * sizeof(outputs[0]));
2036
2037 /* Vertex color clamping.
2038 *
2039 * This uses a state constant loaded in a user data SGPR and
2040 * an IF statement is added that clamps all colors if the constant
2041 * is true.
2042 */
2043 if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX &&
2044 !si_shader_ctx->shader->is_gs_copy_shader) {
2045 struct lp_build_if_state if_ctx;
2046 LLVMValueRef cond = NULL;
2047 LLVMValueRef addr, val;
2048
2049 for (i = 0; i < info->num_outputs; i++) {
2050 if (info->output_semantic_name[i] != TGSI_SEMANTIC_COLOR &&
2051 info->output_semantic_name[i] != TGSI_SEMANTIC_BCOLOR)
2052 continue;
2053
2054 /* We've found a color. */
2055 if (!cond) {
2056 /* The state is in the first bit of the user SGPR. */
2057 cond = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
2058 SI_PARAM_VS_STATE_BITS);
2059 cond = LLVMBuildTrunc(gallivm->builder, cond,
2060 LLVMInt1TypeInContext(gallivm->context), "");
2061 lp_build_if(&if_ctx, gallivm, cond);
2062 }
2063
2064 for (j = 0; j < 4; j++) {
2065 addr = si_shader_ctx->radeon_bld.soa.outputs[i][j];
2066 val = LLVMBuildLoad(gallivm->builder, addr, "");
2067 val = radeon_llvm_saturate(bld_base, val);
2068 LLVMBuildStore(gallivm->builder, val, addr);
2069 }
2070 }
2071
2072 if (cond)
2073 lp_build_endif(&if_ctx);
2074 }
2075
2076 for (i = 0; i < info->num_outputs; i++) {
2077 outputs[i].name = info->output_semantic_name[i];
2078 outputs[i].sid = info->output_semantic_index[i];
2079
2080 for (j = 0; j < 4; j++)
2081 outputs[i].values[j] =
2082 LLVMBuildLoad(gallivm->builder,
2083 si_shader_ctx->radeon_bld.soa.outputs[i][j],
2084 "");
2085 }
2086
2087 /* Export PrimitiveID when PS needs it. */
2088 if (si_vs_exports_prim_id(si_shader_ctx->shader)) {
2089 outputs[i].name = TGSI_SEMANTIC_PRIMID;
2090 outputs[i].sid = 0;
2091 outputs[i].values[0] = bitcast(bld_base, TGSI_TYPE_FLOAT,
2092 get_primitive_id(bld_base, 0));
2093 outputs[i].values[1] = bld_base->base.undef;
2094 outputs[i].values[2] = bld_base->base.undef;
2095 outputs[i].values[3] = bld_base->base.undef;
2096 i++;
2097 }
2098
2099 si_llvm_export_vs(bld_base, outputs, i);
2100 FREE(outputs);
2101 }
2102
2103 static void si_export_mrt_z(struct lp_build_tgsi_context *bld_base,
2104 LLVMValueRef depth, LLVMValueRef stencil,
2105 LLVMValueRef samplemask)
2106 {
2107 struct si_screen *sscreen = si_shader_context(bld_base)->screen;
2108 struct lp_build_context *base = &bld_base->base;
2109 struct lp_build_context *uint = &bld_base->uint_bld;
2110 LLVMValueRef args[9];
2111 unsigned mask = 0;
2112
2113 assert(depth || stencil || samplemask);
2114
2115 args[1] = uint->one; /* whether the EXEC mask is valid */
2116 args[2] = uint->one; /* DONE bit */
2117
2118 /* Specify the target we are exporting */
2119 args[3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_MRTZ);
2120
2121 args[4] = uint->zero; /* COMP flag */
2122 args[5] = base->zero; /* R, depth */
2123 args[6] = base->zero; /* G, stencil test value[0:7], stencil op value[8:15] */
2124 args[7] = base->zero; /* B, sample mask */
2125 args[8] = base->zero; /* A, alpha to mask */
2126
2127 if (depth) {
2128 args[5] = depth;
2129 mask |= 0x1;
2130 }
2131
2132 if (stencil) {
2133 args[6] = stencil;
2134 mask |= 0x2;
2135 }
2136
2137 if (samplemask) {
2138 args[7] = samplemask;
2139 mask |= 0x4;
2140 }
2141
2142 /* SI (except OLAND) has a bug that it only looks
2143 * at the X writemask component. */
2144 if (sscreen->b.chip_class == SI &&
2145 sscreen->b.family != CHIP_OLAND)
2146 mask |= 0x1;
2147
2148 /* Specify which components to enable */
2149 args[0] = lp_build_const_int32(base->gallivm, mask);
2150
2151 lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export",
2152 LLVMVoidTypeInContext(base->gallivm->context),
2153 args, 9, 0);
2154 }
2155
2156 static void si_llvm_emit_fs_epilogue(struct lp_build_tgsi_context * bld_base)
2157 {
2158 struct si_shader_context * si_shader_ctx = si_shader_context(bld_base);
2159 struct si_shader * shader = si_shader_ctx->shader;
2160 struct lp_build_context * base = &bld_base->base;
2161 struct lp_build_context * uint = &bld_base->uint_bld;
2162 struct tgsi_shader_info *info = &shader->selector->info;
2163 LLVMBuilderRef builder = base->gallivm->builder;
2164 LLVMValueRef args[9];
2165 LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL;
2166 int last_color_export = -1;
2167 int i;
2168
2169 /* If there are no outputs, add a dummy export. */
2170 if (!info->num_outputs) {
2171 args[0] = lp_build_const_int32(base->gallivm, 0x0); /* enabled channels */
2172 args[1] = uint->one; /* whether the EXEC mask is valid */
2173 args[2] = uint->one; /* DONE bit */
2174 args[3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_MRT);
2175 args[4] = uint->zero; /* COMPR flag (0 = 32-bit export) */
2176 args[5] = uint->zero; /* R */
2177 args[6] = uint->zero; /* G */
2178 args[7] = uint->zero; /* B */
2179 args[8] = uint->zero; /* A */
2180
2181 lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export",
2182 LLVMVoidTypeInContext(base->gallivm->context),
2183 args, 9, 0);
2184 return;
2185 }
2186
2187 /* Determine the last export. If MRTZ is present, it's always last.
2188 * Otherwise, find the last color export.
2189 */
2190 if (!info->writes_z && !info->writes_stencil && !info->writes_samplemask)
2191 for (i = 0; i < info->num_outputs; i++)
2192 if (info->output_semantic_name[i] == TGSI_SEMANTIC_COLOR)
2193 last_color_export = i;
2194
2195 for (i = 0; i < info->num_outputs; i++) {
2196 unsigned semantic_name = info->output_semantic_name[i];
2197 unsigned semantic_index = info->output_semantic_index[i];
2198 unsigned target;
2199 LLVMValueRef alpha_ptr;
2200
2201 /* Select the correct target */
2202 switch (semantic_name) {
2203 case TGSI_SEMANTIC_POSITION:
2204 depth = LLVMBuildLoad(builder,
2205 si_shader_ctx->radeon_bld.soa.outputs[i][2], "");
2206 continue;
2207 case TGSI_SEMANTIC_STENCIL:
2208 stencil = LLVMBuildLoad(builder,
2209 si_shader_ctx->radeon_bld.soa.outputs[i][1], "");
2210 continue;
2211 case TGSI_SEMANTIC_SAMPLEMASK:
2212 samplemask = LLVMBuildLoad(builder,
2213 si_shader_ctx->radeon_bld.soa.outputs[i][0], "");
2214 continue;
2215 case TGSI_SEMANTIC_COLOR:
2216 target = V_008DFC_SQ_EXP_MRT + semantic_index;
2217 alpha_ptr = si_shader_ctx->radeon_bld.soa.outputs[i][3];
2218
2219 if (si_shader_ctx->shader->key.ps.clamp_color) {
2220 for (int j = 0; j < 4; j++) {
2221 LLVMValueRef ptr = si_shader_ctx->radeon_bld.soa.outputs[i][j];
2222 LLVMValueRef result = LLVMBuildLoad(builder, ptr, "");
2223
2224 result = radeon_llvm_saturate(bld_base, result);
2225 LLVMBuildStore(builder, result, ptr);
2226 }
2227 }
2228
2229 if (si_shader_ctx->shader->key.ps.alpha_to_one)
2230 LLVMBuildStore(base->gallivm->builder,
2231 base->one, alpha_ptr);
2232
2233 if (semantic_index == 0 &&
2234 si_shader_ctx->shader->key.ps.alpha_func != PIPE_FUNC_ALWAYS)
2235 si_alpha_test(bld_base, alpha_ptr);
2236
2237 if (si_shader_ctx->shader->key.ps.poly_line_smoothing)
2238 si_scale_alpha_by_sample_mask(bld_base, alpha_ptr);
2239
2240 break;
2241 default:
2242 fprintf(stderr,
2243 "Warning: SI unhandled fs output type:%d\n",
2244 semantic_name);
2245 continue;
2246 }
2247
2248 /* If last_cbuf > 0, FS_COLOR0_WRITES_ALL_CBUFS is true. */
2249 if (semantic_index == 0 &&
2250 si_shader_ctx->shader->key.ps.last_cbuf > 0) {
2251 for (int c = 1; c <= si_shader_ctx->shader->key.ps.last_cbuf; c++) {
2252 si_llvm_init_export_args_load(bld_base,
2253 si_shader_ctx->radeon_bld.soa.outputs[i],
2254 V_008DFC_SQ_EXP_MRT + c, args);
2255 lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export",
2256 LLVMVoidTypeInContext(base->gallivm->context),
2257 args, 9, 0);
2258 }
2259 }
2260
2261 si_llvm_init_export_args_load(bld_base,
2262 si_shader_ctx->radeon_bld.soa.outputs[i],
2263 target, args);
2264 if (last_color_export == i) {
2265 args[1] = uint->one; /* whether the EXEC mask is valid */
2266 args[2] = uint->one; /* DONE bit */
2267 }
2268 lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export",
2269 LLVMVoidTypeInContext(base->gallivm->context),
2270 args, 9, 0);
2271 }
2272
2273 if (depth || stencil || samplemask)
2274 si_export_mrt_z(bld_base, depth, stencil, samplemask);
2275 }
2276
2277 static void build_tex_intrinsic(const struct lp_build_tgsi_action * action,
2278 struct lp_build_tgsi_context * bld_base,
2279 struct lp_build_emit_data * emit_data);
2280
2281 static bool tgsi_is_array_sampler(unsigned target)
2282 {
2283 return target == TGSI_TEXTURE_1D_ARRAY ||
2284 target == TGSI_TEXTURE_SHADOW1D_ARRAY ||
2285 target == TGSI_TEXTURE_2D_ARRAY ||
2286 target == TGSI_TEXTURE_SHADOW2D_ARRAY ||
2287 target == TGSI_TEXTURE_CUBE_ARRAY ||
2288 target == TGSI_TEXTURE_SHADOWCUBE_ARRAY ||
2289 target == TGSI_TEXTURE_2D_ARRAY_MSAA;
2290 }
2291
2292 static void set_tex_fetch_args(struct gallivm_state *gallivm,
2293 struct lp_build_emit_data *emit_data,
2294 unsigned opcode, unsigned target,
2295 LLVMValueRef res_ptr, LLVMValueRef samp_ptr,
2296 LLVMValueRef *param, unsigned count,
2297 unsigned dmask)
2298 {
2299 unsigned num_args;
2300 unsigned is_rect = target == TGSI_TEXTURE_RECT;
2301 LLVMTypeRef i32 = LLVMInt32TypeInContext(gallivm->context);
2302
2303 /* Pad to power of two vector */
2304 while (count < util_next_power_of_two(count))
2305 param[count++] = LLVMGetUndef(i32);
2306
2307 /* Texture coordinates. */
2308 if (count > 1)
2309 emit_data->args[0] = lp_build_gather_values(gallivm, param, count);
2310 else
2311 emit_data->args[0] = param[0];
2312
2313 /* Resource. */
2314 emit_data->args[1] = res_ptr;
2315 num_args = 2;
2316
2317 if (opcode == TGSI_OPCODE_TXF || opcode == TGSI_OPCODE_TXQ)
2318 emit_data->dst_type = LLVMVectorType(i32, 4);
2319 else {
2320 emit_data->dst_type = LLVMVectorType(
2321 LLVMFloatTypeInContext(gallivm->context), 4);
2322
2323 emit_data->args[num_args++] = samp_ptr;
2324 }
2325
2326 emit_data->args[num_args++] = lp_build_const_int32(gallivm, dmask);
2327 emit_data->args[num_args++] = lp_build_const_int32(gallivm, is_rect); /* unorm */
2328 emit_data->args[num_args++] = lp_build_const_int32(gallivm, 0); /* r128 */
2329 emit_data->args[num_args++] = lp_build_const_int32(gallivm,
2330 tgsi_is_array_sampler(target)); /* da */
2331 emit_data->args[num_args++] = lp_build_const_int32(gallivm, 0); /* glc */
2332 emit_data->args[num_args++] = lp_build_const_int32(gallivm, 0); /* slc */
2333 emit_data->args[num_args++] = lp_build_const_int32(gallivm, 0); /* tfe */
2334 emit_data->args[num_args++] = lp_build_const_int32(gallivm, 0); /* lwe */
2335
2336 emit_data->arg_count = num_args;
2337 }
2338
2339 static const struct lp_build_tgsi_action tex_action;
2340
2341 static void tex_fetch_ptrs(
2342 struct lp_build_tgsi_context * bld_base,
2343 struct lp_build_emit_data * emit_data,
2344 LLVMValueRef *res_ptr, LLVMValueRef *samp_ptr, LLVMValueRef *fmask_ptr)
2345 {
2346 struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
2347 struct gallivm_state *gallivm = bld_base->base.gallivm;
2348 const struct tgsi_full_instruction * inst = emit_data->inst;
2349 unsigned target = inst->Texture.Texture;
2350 unsigned sampler_src;
2351 unsigned sampler_index;
2352
2353 sampler_src = emit_data->inst->Instruction.NumSrcRegs - 1;
2354 sampler_index = emit_data->inst->Src[sampler_src].Register.Index;
2355
2356 if (emit_data->inst->Src[sampler_src].Register.Indirect) {
2357 const struct tgsi_full_src_register *reg = &emit_data->inst->Src[sampler_src];
2358 LLVMValueRef ind_index;
2359
2360 ind_index = get_indirect_index(si_shader_ctx, &reg->Indirect, reg->Register.Index);
2361
2362 *res_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_SAMPLER_VIEWS);
2363 *res_ptr = build_indexed_load_const(si_shader_ctx, *res_ptr, ind_index);
2364
2365 *samp_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_SAMPLER_STATES);
2366 *samp_ptr = build_indexed_load_const(si_shader_ctx, *samp_ptr, ind_index);
2367
2368 if (target == TGSI_TEXTURE_2D_MSAA ||
2369 target == TGSI_TEXTURE_2D_ARRAY_MSAA) {
2370 ind_index = LLVMBuildAdd(gallivm->builder, ind_index,
2371 lp_build_const_int32(gallivm,
2372 SI_FMASK_TEX_OFFSET), "");
2373 *fmask_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_SAMPLER_VIEWS);
2374 *fmask_ptr = build_indexed_load_const(si_shader_ctx, *fmask_ptr, ind_index);
2375 }
2376 } else {
2377 *res_ptr = si_shader_ctx->sampler_views[sampler_index];
2378 *samp_ptr = si_shader_ctx->sampler_states[sampler_index];
2379 *fmask_ptr = si_shader_ctx->sampler_views[SI_FMASK_TEX_OFFSET + sampler_index];
2380 }
2381 }
2382
2383 static void tex_fetch_args(
2384 struct lp_build_tgsi_context * bld_base,
2385 struct lp_build_emit_data * emit_data)
2386 {
2387 struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
2388 struct gallivm_state *gallivm = bld_base->base.gallivm;
2389 LLVMBuilderRef builder = gallivm->builder;
2390 const struct tgsi_full_instruction * inst = emit_data->inst;
2391 unsigned opcode = inst->Instruction.Opcode;
2392 unsigned target = inst->Texture.Texture;
2393 LLVMValueRef coords[5], derivs[6];
2394 LLVMValueRef address[16];
2395 int ref_pos;
2396 unsigned num_coords = tgsi_util_get_texture_coord_dim(target, &ref_pos);
2397 unsigned count = 0;
2398 unsigned chan;
2399 unsigned num_deriv_channels = 0;
2400 bool has_offset = inst->Texture.NumOffsets > 0;
2401 LLVMValueRef res_ptr, samp_ptr, fmask_ptr = NULL;
2402 LLVMTypeRef i32 = LLVMInt32TypeInContext(gallivm->context);
2403 unsigned dmask = 0xf;
2404
2405 tex_fetch_ptrs(bld_base, emit_data, &res_ptr, &samp_ptr, &fmask_ptr);
2406
2407 if (opcode == TGSI_OPCODE_TXQ) {
2408 if (target == TGSI_TEXTURE_BUFFER) {
2409 LLVMTypeRef v8i32 = LLVMVectorType(i32, 8);
2410
2411 /* Read the size from the buffer descriptor directly. */
2412 LLVMValueRef res = LLVMBuildBitCast(builder, res_ptr, v8i32, "");
2413 LLVMValueRef size = LLVMBuildExtractElement(builder, res,
2414 lp_build_const_int32(gallivm, 6), "");
2415
2416 if (si_shader_ctx->screen->b.chip_class >= VI) {
2417 /* On VI, the descriptor contains the size in bytes,
2418 * but TXQ must return the size in elements.
2419 * The stride is always non-zero for resources using TXQ.
2420 */
2421 LLVMValueRef stride =
2422 LLVMBuildExtractElement(builder, res,
2423 lp_build_const_int32(gallivm, 5), "");
2424 stride = LLVMBuildLShr(builder, stride,
2425 lp_build_const_int32(gallivm, 16), "");
2426 stride = LLVMBuildAnd(builder, stride,
2427 lp_build_const_int32(gallivm, 0x3FFF), "");
2428
2429 size = LLVMBuildUDiv(builder, size, stride, "");
2430 }
2431
2432 emit_data->args[0] = size;
2433 return;
2434 }
2435
2436 /* Textures - set the mip level. */
2437 address[count++] = lp_build_emit_fetch(bld_base, inst, 0, TGSI_CHAN_X);
2438
2439 set_tex_fetch_args(gallivm, emit_data, opcode, target, res_ptr,
2440 NULL, address, count, 0xf);
2441 return;
2442 }
2443
2444 if (target == TGSI_TEXTURE_BUFFER) {
2445 LLVMTypeRef i128 = LLVMIntTypeInContext(gallivm->context, 128);
2446 LLVMTypeRef v2i128 = LLVMVectorType(i128, 2);
2447 LLVMTypeRef i8 = LLVMInt8TypeInContext(gallivm->context);
2448 LLVMTypeRef v16i8 = LLVMVectorType(i8, 16);
2449
2450 /* Bitcast and truncate v8i32 to v16i8. */
2451 LLVMValueRef res = res_ptr;
2452 res = LLVMBuildBitCast(gallivm->builder, res, v2i128, "");
2453 res = LLVMBuildExtractElement(gallivm->builder, res, bld_base->uint_bld.one, "");
2454 res = LLVMBuildBitCast(gallivm->builder, res, v16i8, "");
2455
2456 emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4);
2457 emit_data->args[0] = res;
2458 emit_data->args[1] = bld_base->uint_bld.zero;
2459 emit_data->args[2] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, 0);
2460 emit_data->arg_count = 3;
2461 return;
2462 }
2463
2464 /* Fetch and project texture coordinates */
2465 coords[3] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_W);
2466 for (chan = 0; chan < 3; chan++ ) {
2467 coords[chan] = lp_build_emit_fetch(bld_base,
2468 emit_data->inst, 0,
2469 chan);
2470 if (opcode == TGSI_OPCODE_TXP)
2471 coords[chan] = lp_build_emit_llvm_binary(bld_base,
2472 TGSI_OPCODE_DIV,
2473 coords[chan],
2474 coords[3]);
2475 }
2476
2477 if (opcode == TGSI_OPCODE_TXP)
2478 coords[3] = bld_base->base.one;
2479
2480 /* Pack offsets. */
2481 if (has_offset && opcode != TGSI_OPCODE_TXF) {
2482 /* The offsets are six-bit signed integers packed like this:
2483 * X=[5:0], Y=[13:8], and Z=[21:16].
2484 */
2485 LLVMValueRef offset[3], pack;
2486
2487 assert(inst->Texture.NumOffsets == 1);
2488
2489 for (chan = 0; chan < 3; chan++) {
2490 offset[chan] = lp_build_emit_fetch_texoffset(bld_base,
2491 emit_data->inst, 0, chan);
2492 offset[chan] = LLVMBuildAnd(gallivm->builder, offset[chan],
2493 lp_build_const_int32(gallivm, 0x3f), "");
2494 if (chan)
2495 offset[chan] = LLVMBuildShl(gallivm->builder, offset[chan],
2496 lp_build_const_int32(gallivm, chan*8), "");
2497 }
2498
2499 pack = LLVMBuildOr(gallivm->builder, offset[0], offset[1], "");
2500 pack = LLVMBuildOr(gallivm->builder, pack, offset[2], "");
2501 address[count++] = pack;
2502 }
2503
2504 /* Pack LOD bias value */
2505 if (opcode == TGSI_OPCODE_TXB)
2506 address[count++] = coords[3];
2507 if (opcode == TGSI_OPCODE_TXB2)
2508 address[count++] = lp_build_emit_fetch(bld_base, inst, 1, 0);
2509
2510 /* Pack depth comparison value */
2511 if (tgsi_is_shadow_target(target) && opcode != TGSI_OPCODE_LODQ) {
2512 if (target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
2513 address[count++] = lp_build_emit_fetch(bld_base, inst, 1, 0);
2514 } else {
2515 assert(ref_pos >= 0);
2516 address[count++] = coords[ref_pos];
2517 }
2518 }
2519
2520 /* Pack user derivatives */
2521 if (opcode == TGSI_OPCODE_TXD) {
2522 int param, num_src_deriv_channels;
2523
2524 switch (target) {
2525 case TGSI_TEXTURE_3D:
2526 num_src_deriv_channels = 3;
2527 num_deriv_channels = 3;
2528 break;
2529 case TGSI_TEXTURE_2D:
2530 case TGSI_TEXTURE_SHADOW2D:
2531 case TGSI_TEXTURE_RECT:
2532 case TGSI_TEXTURE_SHADOWRECT:
2533 case TGSI_TEXTURE_2D_ARRAY:
2534 case TGSI_TEXTURE_SHADOW2D_ARRAY:
2535 num_src_deriv_channels = 2;
2536 num_deriv_channels = 2;
2537 break;
2538 case TGSI_TEXTURE_CUBE:
2539 case TGSI_TEXTURE_SHADOWCUBE:
2540 case TGSI_TEXTURE_CUBE_ARRAY:
2541 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
2542 /* Cube derivatives will be converted to 2D. */
2543 num_src_deriv_channels = 3;
2544 num_deriv_channels = 2;
2545 break;
2546 case TGSI_TEXTURE_1D:
2547 case TGSI_TEXTURE_SHADOW1D:
2548 case TGSI_TEXTURE_1D_ARRAY:
2549 case TGSI_TEXTURE_SHADOW1D_ARRAY:
2550 num_src_deriv_channels = 1;
2551 num_deriv_channels = 1;
2552 break;
2553 default:
2554 unreachable("invalid target");
2555 }
2556
2557 for (param = 0; param < 2; param++)
2558 for (chan = 0; chan < num_src_deriv_channels; chan++)
2559 derivs[param * num_src_deriv_channels + chan] =
2560 lp_build_emit_fetch(bld_base, inst, param+1, chan);
2561 }
2562
2563 if (target == TGSI_TEXTURE_CUBE ||
2564 target == TGSI_TEXTURE_CUBE_ARRAY ||
2565 target == TGSI_TEXTURE_SHADOWCUBE ||
2566 target == TGSI_TEXTURE_SHADOWCUBE_ARRAY)
2567 radeon_llvm_emit_prepare_cube_coords(bld_base, emit_data, coords, derivs);
2568
2569 if (opcode == TGSI_OPCODE_TXD)
2570 for (int i = 0; i < num_deriv_channels * 2; i++)
2571 address[count++] = derivs[i];
2572
2573 /* Pack texture coordinates */
2574 address[count++] = coords[0];
2575 if (num_coords > 1)
2576 address[count++] = coords[1];
2577 if (num_coords > 2)
2578 address[count++] = coords[2];
2579
2580 /* Pack LOD or sample index */
2581 if (opcode == TGSI_OPCODE_TXL || opcode == TGSI_OPCODE_TXF)
2582 address[count++] = coords[3];
2583 else if (opcode == TGSI_OPCODE_TXL2)
2584 address[count++] = lp_build_emit_fetch(bld_base, inst, 1, 0);
2585
2586 if (count > 16) {
2587 assert(!"Cannot handle more than 16 texture address parameters");
2588 count = 16;
2589 }
2590
2591 for (chan = 0; chan < count; chan++ ) {
2592 address[chan] = LLVMBuildBitCast(gallivm->builder,
2593 address[chan], i32, "");
2594 }
2595
2596 /* Adjust the sample index according to FMASK.
2597 *
2598 * For uncompressed MSAA surfaces, FMASK should return 0x76543210,
2599 * which is the identity mapping. Each nibble says which physical sample
2600 * should be fetched to get that sample.
2601 *
2602 * For example, 0x11111100 means there are only 2 samples stored and
2603 * the second sample covers 3/4 of the pixel. When reading samples 0
2604 * and 1, return physical sample 0 (determined by the first two 0s
2605 * in FMASK), otherwise return physical sample 1.
2606 *
2607 * The sample index should be adjusted as follows:
2608 * sample_index = (fmask >> (sample_index * 4)) & 0xF;
2609 */
2610 if (target == TGSI_TEXTURE_2D_MSAA ||
2611 target == TGSI_TEXTURE_2D_ARRAY_MSAA) {
2612 struct lp_build_context *uint_bld = &bld_base->uint_bld;
2613 struct lp_build_emit_data txf_emit_data = *emit_data;
2614 LLVMValueRef txf_address[4];
2615 unsigned txf_count = count;
2616 struct tgsi_full_instruction inst = {};
2617
2618 memcpy(txf_address, address, sizeof(txf_address));
2619
2620 if (target == TGSI_TEXTURE_2D_MSAA) {
2621 txf_address[2] = bld_base->uint_bld.zero;
2622 }
2623 txf_address[3] = bld_base->uint_bld.zero;
2624
2625 /* Read FMASK using TXF. */
2626 inst.Instruction.Opcode = TGSI_OPCODE_TXF;
2627 inst.Texture.Texture = target;
2628 txf_emit_data.inst = &inst;
2629 txf_emit_data.chan = 0;
2630 set_tex_fetch_args(gallivm, &txf_emit_data, TGSI_OPCODE_TXF,
2631 target, fmask_ptr, NULL,
2632 txf_address, txf_count, 0xf);
2633 build_tex_intrinsic(&tex_action, bld_base, &txf_emit_data);
2634
2635 /* Initialize some constants. */
2636 LLVMValueRef four = LLVMConstInt(uint_bld->elem_type, 4, 0);
2637 LLVMValueRef F = LLVMConstInt(uint_bld->elem_type, 0xF, 0);
2638
2639 /* Apply the formula. */
2640 LLVMValueRef fmask =
2641 LLVMBuildExtractElement(gallivm->builder,
2642 txf_emit_data.output[0],
2643 uint_bld->zero, "");
2644
2645 unsigned sample_chan = target == TGSI_TEXTURE_2D_MSAA ? 2 : 3;
2646
2647 LLVMValueRef sample_index4 =
2648 LLVMBuildMul(gallivm->builder, address[sample_chan], four, "");
2649
2650 LLVMValueRef shifted_fmask =
2651 LLVMBuildLShr(gallivm->builder, fmask, sample_index4, "");
2652
2653 LLVMValueRef final_sample =
2654 LLVMBuildAnd(gallivm->builder, shifted_fmask, F, "");
2655
2656 /* Don't rewrite the sample index if WORD1.DATA_FORMAT of the FMASK
2657 * resource descriptor is 0 (invalid),
2658 */
2659 LLVMValueRef fmask_desc =
2660 LLVMBuildBitCast(gallivm->builder, fmask_ptr,
2661 LLVMVectorType(uint_bld->elem_type, 8), "");
2662
2663 LLVMValueRef fmask_word1 =
2664 LLVMBuildExtractElement(gallivm->builder, fmask_desc,
2665 uint_bld->one, "");
2666
2667 LLVMValueRef word1_is_nonzero =
2668 LLVMBuildICmp(gallivm->builder, LLVMIntNE,
2669 fmask_word1, uint_bld->zero, "");
2670
2671 /* Replace the MSAA sample index. */
2672 address[sample_chan] =
2673 LLVMBuildSelect(gallivm->builder, word1_is_nonzero,
2674 final_sample, address[sample_chan], "");
2675 }
2676
2677 if (opcode == TGSI_OPCODE_TXF) {
2678 /* add tex offsets */
2679 if (inst->Texture.NumOffsets) {
2680 struct lp_build_context *uint_bld = &bld_base->uint_bld;
2681 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
2682 const struct tgsi_texture_offset * off = inst->TexOffsets;
2683
2684 assert(inst->Texture.NumOffsets == 1);
2685
2686 switch (target) {
2687 case TGSI_TEXTURE_3D:
2688 address[2] = lp_build_add(uint_bld, address[2],
2689 bld->immediates[off->Index][off->SwizzleZ]);
2690 /* fall through */
2691 case TGSI_TEXTURE_2D:
2692 case TGSI_TEXTURE_SHADOW2D:
2693 case TGSI_TEXTURE_RECT:
2694 case TGSI_TEXTURE_SHADOWRECT:
2695 case TGSI_TEXTURE_2D_ARRAY:
2696 case TGSI_TEXTURE_SHADOW2D_ARRAY:
2697 address[1] =
2698 lp_build_add(uint_bld, address[1],
2699 bld->immediates[off->Index][off->SwizzleY]);
2700 /* fall through */
2701 case TGSI_TEXTURE_1D:
2702 case TGSI_TEXTURE_SHADOW1D:
2703 case TGSI_TEXTURE_1D_ARRAY:
2704 case TGSI_TEXTURE_SHADOW1D_ARRAY:
2705 address[0] =
2706 lp_build_add(uint_bld, address[0],
2707 bld->immediates[off->Index][off->SwizzleX]);
2708 break;
2709 /* texture offsets do not apply to other texture targets */
2710 }
2711 }
2712 }
2713
2714 if (opcode == TGSI_OPCODE_TG4) {
2715 unsigned gather_comp = 0;
2716
2717 /* DMASK was repurposed for GATHER4. 4 components are always
2718 * returned and DMASK works like a swizzle - it selects
2719 * the component to fetch. The only valid DMASK values are
2720 * 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
2721 * (red,red,red,red) etc.) The ISA document doesn't mention
2722 * this.
2723 */
2724
2725 /* Get the component index from src1.x for Gather4. */
2726 if (!tgsi_is_shadow_target(target)) {
2727 LLVMValueRef (*imms)[4] = lp_soa_context(bld_base)->immediates;
2728 LLVMValueRef comp_imm;
2729 struct tgsi_src_register src1 = inst->Src[1].Register;
2730
2731 assert(src1.File == TGSI_FILE_IMMEDIATE);
2732
2733 comp_imm = imms[src1.Index][src1.SwizzleX];
2734 gather_comp = LLVMConstIntGetZExtValue(comp_imm);
2735 gather_comp = CLAMP(gather_comp, 0, 3);
2736 }
2737
2738 dmask = 1 << gather_comp;
2739 }
2740
2741 set_tex_fetch_args(gallivm, emit_data, opcode, target, res_ptr,
2742 samp_ptr, address, count, dmask);
2743 }
2744
2745 static void build_tex_intrinsic(const struct lp_build_tgsi_action * action,
2746 struct lp_build_tgsi_context * bld_base,
2747 struct lp_build_emit_data * emit_data)
2748 {
2749 struct lp_build_context * base = &bld_base->base;
2750 unsigned opcode = emit_data->inst->Instruction.Opcode;
2751 unsigned target = emit_data->inst->Texture.Texture;
2752 char intr_name[127];
2753 bool has_offset = emit_data->inst->Texture.NumOffsets > 0;
2754 bool is_shadow = tgsi_is_shadow_target(target);
2755 char type[64];
2756 const char *name = "llvm.SI.image.sample";
2757 const char *infix = "";
2758
2759 if (opcode == TGSI_OPCODE_TXQ && target == TGSI_TEXTURE_BUFFER) {
2760 /* Just return the buffer size. */
2761 emit_data->output[emit_data->chan] = emit_data->args[0];
2762 return;
2763 }
2764
2765 if (target == TGSI_TEXTURE_BUFFER) {
2766 emit_data->output[emit_data->chan] = lp_build_intrinsic(
2767 base->gallivm->builder,
2768 "llvm.SI.vs.load.input", emit_data->dst_type,
2769 emit_data->args, emit_data->arg_count,
2770 LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
2771 return;
2772 }
2773
2774 switch (opcode) {
2775 case TGSI_OPCODE_TXF:
2776 name = target == TGSI_TEXTURE_2D_MSAA ||
2777 target == TGSI_TEXTURE_2D_ARRAY_MSAA ?
2778 "llvm.SI.image.load" :
2779 "llvm.SI.image.load.mip";
2780 is_shadow = false;
2781 has_offset = false;
2782 break;
2783 case TGSI_OPCODE_TXQ:
2784 name = "llvm.SI.getresinfo";
2785 is_shadow = false;
2786 has_offset = false;
2787 break;
2788 case TGSI_OPCODE_LODQ:
2789 name = "llvm.SI.getlod";
2790 is_shadow = false;
2791 has_offset = false;
2792 break;
2793 case TGSI_OPCODE_TEX:
2794 case TGSI_OPCODE_TEX2:
2795 case TGSI_OPCODE_TXP:
2796 break;
2797 case TGSI_OPCODE_TXB:
2798 case TGSI_OPCODE_TXB2:
2799 infix = ".b";
2800 break;
2801 case TGSI_OPCODE_TXL:
2802 case TGSI_OPCODE_TXL2:
2803 infix = ".l";
2804 break;
2805 case TGSI_OPCODE_TXD:
2806 infix = ".d";
2807 break;
2808 case TGSI_OPCODE_TG4:
2809 name = "llvm.SI.gather4";
2810 break;
2811 default:
2812 assert(0);
2813 return;
2814 }
2815
2816 if (LLVMGetTypeKind(LLVMTypeOf(emit_data->args[0])) == LLVMVectorTypeKind)
2817 sprintf(type, ".v%ui32",
2818 LLVMGetVectorSize(LLVMTypeOf(emit_data->args[0])));
2819 else
2820 strcpy(type, ".i32");
2821
2822 /* Add the type and suffixes .c, .o if needed. */
2823 sprintf(intr_name, "%s%s%s%s%s",
2824 name, is_shadow ? ".c" : "", infix,
2825 has_offset ? ".o" : "", type);
2826
2827 emit_data->output[emit_data->chan] = lp_build_intrinsic(
2828 base->gallivm->builder, intr_name, emit_data->dst_type,
2829 emit_data->args, emit_data->arg_count,
2830 LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
2831
2832 /* Divide the number of layers by 6 to get the number of cubes. */
2833 if (opcode == TGSI_OPCODE_TXQ &&
2834 (target == TGSI_TEXTURE_CUBE_ARRAY ||
2835 target == TGSI_TEXTURE_SHADOWCUBE_ARRAY)) {
2836 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
2837 LLVMValueRef two = lp_build_const_int32(bld_base->base.gallivm, 2);
2838 LLVMValueRef six = lp_build_const_int32(bld_base->base.gallivm, 6);
2839
2840 LLVMValueRef v4 = emit_data->output[emit_data->chan];
2841 LLVMValueRef z = LLVMBuildExtractElement(builder, v4, two, "");
2842 z = LLVMBuildSDiv(builder, z, six, "");
2843
2844 emit_data->output[emit_data->chan] =
2845 LLVMBuildInsertElement(builder, v4, z, two, "");
2846 }
2847 }
2848
2849 static void si_llvm_emit_txqs(
2850 const struct lp_build_tgsi_action * action,
2851 struct lp_build_tgsi_context * bld_base,
2852 struct lp_build_emit_data * emit_data)
2853 {
2854 struct gallivm_state *gallivm = bld_base->base.gallivm;
2855 LLVMBuilderRef builder = gallivm->builder;
2856 LLVMTypeRef i32 = LLVMInt32TypeInContext(gallivm->context);
2857 LLVMTypeRef v8i32 = LLVMVectorType(i32, 8);
2858 LLVMValueRef res, samples;
2859 LLVMValueRef res_ptr, samp_ptr, fmask_ptr = NULL;
2860
2861 tex_fetch_ptrs(bld_base, emit_data, &res_ptr, &samp_ptr, &fmask_ptr);
2862
2863
2864 /* Read the samples from the descriptor directly. */
2865 res = LLVMBuildBitCast(builder, res_ptr, v8i32, "");
2866 samples = LLVMBuildExtractElement(
2867 builder, res,
2868 lp_build_const_int32(gallivm, 3), "");
2869 samples = LLVMBuildLShr(builder, samples,
2870 lp_build_const_int32(gallivm, 16), "");
2871 samples = LLVMBuildAnd(builder, samples,
2872 lp_build_const_int32(gallivm, 0xf), "");
2873 samples = LLVMBuildShl(builder, lp_build_const_int32(gallivm, 1),
2874 samples, "");
2875
2876 emit_data->output[emit_data->chan] = samples;
2877 }
2878
2879 /*
2880 * SI implements derivatives using the local data store (LDS)
2881 * All writes to the LDS happen in all executing threads at
2882 * the same time. TID is the Thread ID for the current
2883 * thread and is a value between 0 and 63, representing
2884 * the thread's position in the wavefront.
2885 *
2886 * For the pixel shader threads are grouped into quads of four pixels.
2887 * The TIDs of the pixels of a quad are:
2888 *
2889 * +------+------+
2890 * |4n + 0|4n + 1|
2891 * +------+------+
2892 * |4n + 2|4n + 3|
2893 * +------+------+
2894 *
2895 * So, masking the TID with 0xfffffffc yields the TID of the top left pixel
2896 * of the quad, masking with 0xfffffffd yields the TID of the top pixel of
2897 * the current pixel's column, and masking with 0xfffffffe yields the TID
2898 * of the left pixel of the current pixel's row.
2899 *
2900 * Adding 1 yields the TID of the pixel to the right of the left pixel, and
2901 * adding 2 yields the TID of the pixel below the top pixel.
2902 */
2903 /* masks for thread ID. */
2904 #define TID_MASK_TOP_LEFT 0xfffffffc
2905 #define TID_MASK_TOP 0xfffffffd
2906 #define TID_MASK_LEFT 0xfffffffe
2907
2908 static void si_llvm_emit_ddxy(
2909 const struct lp_build_tgsi_action * action,
2910 struct lp_build_tgsi_context * bld_base,
2911 struct lp_build_emit_data * emit_data)
2912 {
2913 struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
2914 struct gallivm_state *gallivm = bld_base->base.gallivm;
2915 struct lp_build_context * base = &bld_base->base;
2916 const struct tgsi_full_instruction *inst = emit_data->inst;
2917 unsigned opcode = inst->Instruction.Opcode;
2918 LLVMValueRef indices[2];
2919 LLVMValueRef store_ptr, load_ptr0, load_ptr1;
2920 LLVMValueRef tl, trbl, result[4];
2921 LLVMTypeRef i32;
2922 unsigned swizzle[4];
2923 unsigned c;
2924 int idx;
2925 unsigned mask;
2926
2927 i32 = LLVMInt32TypeInContext(gallivm->context);
2928
2929 indices[0] = bld_base->uint_bld.zero;
2930 indices[1] = lp_build_intrinsic(gallivm->builder, "llvm.SI.tid", i32,
2931 NULL, 0, LLVMReadNoneAttribute);
2932 store_ptr = LLVMBuildGEP(gallivm->builder, si_shader_ctx->lds,
2933 indices, 2, "");
2934
2935 if (opcode == TGSI_OPCODE_DDX_FINE)
2936 mask = TID_MASK_LEFT;
2937 else if (opcode == TGSI_OPCODE_DDY_FINE)
2938 mask = TID_MASK_TOP;
2939 else
2940 mask = TID_MASK_TOP_LEFT;
2941
2942 indices[1] = LLVMBuildAnd(gallivm->builder, indices[1],
2943 lp_build_const_int32(gallivm, mask), "");
2944 load_ptr0 = LLVMBuildGEP(gallivm->builder, si_shader_ctx->lds,
2945 indices, 2, "");
2946
2947 /* for DDX we want to next X pixel, DDY next Y pixel. */
2948 idx = (opcode == TGSI_OPCODE_DDX || opcode == TGSI_OPCODE_DDX_FINE) ? 1 : 2;
2949 indices[1] = LLVMBuildAdd(gallivm->builder, indices[1],
2950 lp_build_const_int32(gallivm, idx), "");
2951 load_ptr1 = LLVMBuildGEP(gallivm->builder, si_shader_ctx->lds,
2952 indices, 2, "");
2953
2954 for (c = 0; c < 4; ++c) {
2955 unsigned i;
2956
2957 swizzle[c] = tgsi_util_get_full_src_register_swizzle(&inst->Src[0], c);
2958 for (i = 0; i < c; ++i) {
2959 if (swizzle[i] == swizzle[c]) {
2960 result[c] = result[i];
2961 break;
2962 }
2963 }
2964 if (i != c)
2965 continue;
2966
2967 LLVMBuildStore(gallivm->builder,
2968 LLVMBuildBitCast(gallivm->builder,
2969 lp_build_emit_fetch(bld_base, inst, 0, c),
2970 i32, ""),
2971 store_ptr);
2972
2973 tl = LLVMBuildLoad(gallivm->builder, load_ptr0, "");
2974 tl = LLVMBuildBitCast(gallivm->builder, tl, base->elem_type, "");
2975
2976 trbl = LLVMBuildLoad(gallivm->builder, load_ptr1, "");
2977 trbl = LLVMBuildBitCast(gallivm->builder, trbl, base->elem_type, "");
2978
2979 result[c] = LLVMBuildFSub(gallivm->builder, trbl, tl, "");
2980 }
2981
2982 emit_data->output[0] = lp_build_gather_values(gallivm, result, 4);
2983 }
2984
2985 /*
2986 * this takes an I,J coordinate pair,
2987 * and works out the X and Y derivatives.
2988 * it returns DDX(I), DDX(J), DDY(I), DDY(J).
2989 */
2990 static LLVMValueRef si_llvm_emit_ddxy_interp(
2991 struct lp_build_tgsi_context *bld_base,
2992 LLVMValueRef interp_ij)
2993 {
2994 struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
2995 struct gallivm_state *gallivm = bld_base->base.gallivm;
2996 struct lp_build_context *base = &bld_base->base;
2997 LLVMValueRef indices[2];
2998 LLVMValueRef store_ptr, load_ptr_x, load_ptr_y, load_ptr_ddx, load_ptr_ddy, temp, temp2;
2999 LLVMValueRef tl, tr, bl, result[4];
3000 LLVMTypeRef i32;
3001 unsigned c;
3002
3003 i32 = LLVMInt32TypeInContext(gallivm->context);
3004
3005 indices[0] = bld_base->uint_bld.zero;
3006 indices[1] = lp_build_intrinsic(gallivm->builder, "llvm.SI.tid", i32,
3007 NULL, 0, LLVMReadNoneAttribute);
3008 store_ptr = LLVMBuildGEP(gallivm->builder, si_shader_ctx->lds,
3009 indices, 2, "");
3010
3011 temp = LLVMBuildAnd(gallivm->builder, indices[1],
3012 lp_build_const_int32(gallivm, TID_MASK_LEFT), "");
3013
3014 temp2 = LLVMBuildAnd(gallivm->builder, indices[1],
3015 lp_build_const_int32(gallivm, TID_MASK_TOP), "");
3016
3017 indices[1] = temp;
3018 load_ptr_x = LLVMBuildGEP(gallivm->builder, si_shader_ctx->lds,
3019 indices, 2, "");
3020
3021 indices[1] = temp2;
3022 load_ptr_y = LLVMBuildGEP(gallivm->builder, si_shader_ctx->lds,
3023 indices, 2, "");
3024
3025 indices[1] = LLVMBuildAdd(gallivm->builder, temp,
3026 lp_build_const_int32(gallivm, 1), "");
3027 load_ptr_ddx = LLVMBuildGEP(gallivm->builder, si_shader_ctx->lds,
3028 indices, 2, "");
3029
3030 indices[1] = LLVMBuildAdd(gallivm->builder, temp2,
3031 lp_build_const_int32(gallivm, 2), "");
3032 load_ptr_ddy = LLVMBuildGEP(gallivm->builder, si_shader_ctx->lds,
3033 indices, 2, "");
3034
3035 for (c = 0; c < 2; ++c) {
3036 LLVMValueRef store_val;
3037 LLVMValueRef c_ll = lp_build_const_int32(gallivm, c);
3038
3039 store_val = LLVMBuildExtractElement(gallivm->builder,
3040 interp_ij, c_ll, "");
3041 LLVMBuildStore(gallivm->builder,
3042 store_val,
3043 store_ptr);
3044
3045 tl = LLVMBuildLoad(gallivm->builder, load_ptr_x, "");
3046 tl = LLVMBuildBitCast(gallivm->builder, tl, base->elem_type, "");
3047
3048 tr = LLVMBuildLoad(gallivm->builder, load_ptr_ddx, "");
3049 tr = LLVMBuildBitCast(gallivm->builder, tr, base->elem_type, "");
3050
3051 result[c] = LLVMBuildFSub(gallivm->builder, tr, tl, "");
3052
3053 tl = LLVMBuildLoad(gallivm->builder, load_ptr_y, "");
3054 tl = LLVMBuildBitCast(gallivm->builder, tl, base->elem_type, "");
3055
3056 bl = LLVMBuildLoad(gallivm->builder, load_ptr_ddy, "");
3057 bl = LLVMBuildBitCast(gallivm->builder, bl, base->elem_type, "");
3058
3059 result[c + 2] = LLVMBuildFSub(gallivm->builder, bl, tl, "");
3060 }
3061
3062 return lp_build_gather_values(gallivm, result, 4);
3063 }
3064
3065 static void interp_fetch_args(
3066 struct lp_build_tgsi_context *bld_base,
3067 struct lp_build_emit_data *emit_data)
3068 {
3069 struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
3070 struct gallivm_state *gallivm = bld_base->base.gallivm;
3071 const struct tgsi_full_instruction *inst = emit_data->inst;
3072
3073 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET) {
3074 /* offset is in second src, first two channels */
3075 emit_data->args[0] = lp_build_emit_fetch(bld_base,
3076 emit_data->inst, 1,
3077 0);
3078 emit_data->args[1] = lp_build_emit_fetch(bld_base,
3079 emit_data->inst, 1,
3080 1);
3081 emit_data->arg_count = 2;
3082 } else if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) {
3083 LLVMValueRef sample_position;
3084 LLVMValueRef sample_id;
3085 LLVMValueRef halfval = lp_build_const_float(gallivm, 0.5f);
3086
3087 /* fetch sample ID, then fetch its sample position,
3088 * and place into first two channels.
3089 */
3090 sample_id = lp_build_emit_fetch(bld_base,
3091 emit_data->inst, 1, 0);
3092 sample_id = LLVMBuildBitCast(gallivm->builder, sample_id,
3093 LLVMInt32TypeInContext(gallivm->context),
3094 "");
3095 sample_position = load_sample_position(&si_shader_ctx->radeon_bld, sample_id);
3096
3097 emit_data->args[0] = LLVMBuildExtractElement(gallivm->builder,
3098 sample_position,
3099 lp_build_const_int32(gallivm, 0), "");
3100
3101 emit_data->args[0] = LLVMBuildFSub(gallivm->builder, emit_data->args[0], halfval, "");
3102 emit_data->args[1] = LLVMBuildExtractElement(gallivm->builder,
3103 sample_position,
3104 lp_build_const_int32(gallivm, 1), "");
3105 emit_data->args[1] = LLVMBuildFSub(gallivm->builder, emit_data->args[1], halfval, "");
3106 emit_data->arg_count = 2;
3107 }
3108 }
3109
3110 static void build_interp_intrinsic(const struct lp_build_tgsi_action *action,
3111 struct lp_build_tgsi_context *bld_base,
3112 struct lp_build_emit_data *emit_data)
3113 {
3114 struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
3115 struct si_shader *shader = si_shader_ctx->shader;
3116 struct gallivm_state *gallivm = bld_base->base.gallivm;
3117 LLVMValueRef interp_param;
3118 const struct tgsi_full_instruction *inst = emit_data->inst;
3119 const char *intr_name;
3120 int input_index;
3121 int chan;
3122 int i;
3123 LLVMValueRef attr_number;
3124 LLVMTypeRef input_type = LLVMFloatTypeInContext(gallivm->context);
3125 LLVMValueRef params = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_PRIM_MASK);
3126 int interp_param_idx;
3127 unsigned location;
3128
3129 assert(inst->Src[0].Register.File == TGSI_FILE_INPUT);
3130 input_index = inst->Src[0].Register.Index;
3131
3132 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET ||
3133 inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE)
3134 location = TGSI_INTERPOLATE_LOC_CENTER;
3135 else
3136 location = TGSI_INTERPOLATE_LOC_CENTROID;
3137
3138 interp_param_idx = lookup_interp_param_index(shader->ps_input_interpolate[input_index],
3139 location);
3140 if (interp_param_idx == -1)
3141 return;
3142 else if (interp_param_idx)
3143 interp_param = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, interp_param_idx);
3144 else
3145 interp_param = NULL;
3146
3147 attr_number = lp_build_const_int32(gallivm,
3148 shader->ps_input_param_offset[input_index]);
3149
3150 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET ||
3151 inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) {
3152 LLVMValueRef ij_out[2];
3153 LLVMValueRef ddxy_out = si_llvm_emit_ddxy_interp(bld_base, interp_param);
3154
3155 /*
3156 * take the I then J parameters, and the DDX/Y for it, and
3157 * calculate the IJ inputs for the interpolator.
3158 * temp1 = ddx * offset/sample.x + I;
3159 * interp_param.I = ddy * offset/sample.y + temp1;
3160 * temp1 = ddx * offset/sample.x + J;
3161 * interp_param.J = ddy * offset/sample.y + temp1;
3162 */
3163 for (i = 0; i < 2; i++) {
3164 LLVMValueRef ix_ll = lp_build_const_int32(gallivm, i);
3165 LLVMValueRef iy_ll = lp_build_const_int32(gallivm, i + 2);
3166 LLVMValueRef ddx_el = LLVMBuildExtractElement(gallivm->builder,
3167 ddxy_out, ix_ll, "");
3168 LLVMValueRef ddy_el = LLVMBuildExtractElement(gallivm->builder,
3169 ddxy_out, iy_ll, "");
3170 LLVMValueRef interp_el = LLVMBuildExtractElement(gallivm->builder,
3171 interp_param, ix_ll, "");
3172 LLVMValueRef temp1, temp2;
3173
3174 interp_el = LLVMBuildBitCast(gallivm->builder, interp_el,
3175 LLVMFloatTypeInContext(gallivm->context), "");
3176
3177 temp1 = LLVMBuildFMul(gallivm->builder, ddx_el, emit_data->args[0], "");
3178
3179 temp1 = LLVMBuildFAdd(gallivm->builder, temp1, interp_el, "");
3180
3181 temp2 = LLVMBuildFMul(gallivm->builder, ddy_el, emit_data->args[1], "");
3182
3183 temp2 = LLVMBuildFAdd(gallivm->builder, temp2, temp1, "");
3184
3185 ij_out[i] = LLVMBuildBitCast(gallivm->builder,
3186 temp2,
3187 LLVMIntTypeInContext(gallivm->context, 32), "");
3188 }
3189 interp_param = lp_build_gather_values(bld_base->base.gallivm, ij_out, 2);
3190 }
3191
3192 intr_name = interp_param ? "llvm.SI.fs.interp" : "llvm.SI.fs.constant";
3193 for (chan = 0; chan < 2; chan++) {
3194 LLVMValueRef args[4];
3195 LLVMValueRef llvm_chan;
3196 unsigned schan;
3197
3198 schan = tgsi_util_get_full_src_register_swizzle(&inst->Src[0], chan);
3199 llvm_chan = lp_build_const_int32(gallivm, schan);
3200
3201 args[0] = llvm_chan;
3202 args[1] = attr_number;
3203 args[2] = params;
3204 args[3] = interp_param;
3205
3206 emit_data->output[chan] =
3207 lp_build_intrinsic(gallivm->builder, intr_name,
3208 input_type, args, args[3] ? 4 : 3,
3209 LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
3210 }
3211 }
3212
3213 static unsigned si_llvm_get_stream(struct lp_build_tgsi_context *bld_base,
3214 struct lp_build_emit_data *emit_data)
3215 {
3216 LLVMValueRef (*imms)[4] = lp_soa_context(bld_base)->immediates;
3217 struct tgsi_src_register src0 = emit_data->inst->Src[0].Register;
3218 unsigned stream;
3219
3220 assert(src0.File == TGSI_FILE_IMMEDIATE);
3221
3222 stream = LLVMConstIntGetZExtValue(imms[src0.Index][src0.SwizzleX]) & 0x3;
3223 return stream;
3224 }
3225
3226 /* Emit one vertex from the geometry shader */
3227 static void si_llvm_emit_vertex(
3228 const struct lp_build_tgsi_action *action,
3229 struct lp_build_tgsi_context *bld_base,
3230 struct lp_build_emit_data *emit_data)
3231 {
3232 struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
3233 struct lp_build_context *uint = &bld_base->uint_bld;
3234 struct si_shader *shader = si_shader_ctx->shader;
3235 struct tgsi_shader_info *info = &shader->selector->info;
3236 struct gallivm_state *gallivm = bld_base->base.gallivm;
3237 LLVMTypeRef i32 = LLVMInt32TypeInContext(gallivm->context);
3238 LLVMValueRef soffset = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
3239 SI_PARAM_GS2VS_OFFSET);
3240 LLVMValueRef gs_next_vertex;
3241 LLVMValueRef can_emit, kill;
3242 LLVMValueRef args[2];
3243 unsigned chan;
3244 int i;
3245 unsigned stream;
3246
3247 stream = si_llvm_get_stream(bld_base, emit_data);
3248
3249 /* Write vertex attribute values to GSVS ring */
3250 gs_next_vertex = LLVMBuildLoad(gallivm->builder,
3251 si_shader_ctx->gs_next_vertex[stream],
3252 "");
3253
3254 /* If this thread has already emitted the declared maximum number of
3255 * vertices, kill it: excessive vertex emissions are not supposed to
3256 * have any effect, and GS threads have no externally observable
3257 * effects other than emitting vertices.
3258 */
3259 can_emit = LLVMBuildICmp(gallivm->builder, LLVMIntULE, gs_next_vertex,
3260 lp_build_const_int32(gallivm,
3261 shader->selector->gs_max_out_vertices), "");
3262 kill = lp_build_select(&bld_base->base, can_emit,
3263 lp_build_const_float(gallivm, 1.0f),
3264 lp_build_const_float(gallivm, -1.0f));
3265
3266 lp_build_intrinsic(gallivm->builder, "llvm.AMDGPU.kill",
3267 LLVMVoidTypeInContext(gallivm->context), &kill, 1, 0);
3268
3269 for (i = 0; i < info->num_outputs; i++) {
3270 LLVMValueRef *out_ptr =
3271 si_shader_ctx->radeon_bld.soa.outputs[i];
3272
3273 for (chan = 0; chan < 4; chan++) {
3274 LLVMValueRef out_val = LLVMBuildLoad(gallivm->builder, out_ptr[chan], "");
3275 LLVMValueRef voffset =
3276 lp_build_const_int32(gallivm, (i * 4 + chan) *
3277 shader->selector->gs_max_out_vertices);
3278
3279 voffset = lp_build_add(uint, voffset, gs_next_vertex);
3280 voffset = lp_build_mul_imm(uint, voffset, 4);
3281
3282 out_val = LLVMBuildBitCast(gallivm->builder, out_val, i32, "");
3283
3284 build_tbuffer_store(si_shader_ctx,
3285 si_shader_ctx->gsvs_ring[stream],
3286 out_val, 1,
3287 voffset, soffset, 0,
3288 V_008F0C_BUF_DATA_FORMAT_32,
3289 V_008F0C_BUF_NUM_FORMAT_UINT,
3290 1, 0, 1, 1, 0);
3291 }
3292 }
3293 gs_next_vertex = lp_build_add(uint, gs_next_vertex,
3294 lp_build_const_int32(gallivm, 1));
3295
3296 LLVMBuildStore(gallivm->builder, gs_next_vertex, si_shader_ctx->gs_next_vertex[stream]);
3297
3298 /* Signal vertex emission */
3299 args[0] = lp_build_const_int32(gallivm, SENDMSG_GS_OP_EMIT | SENDMSG_GS | (stream << 8));
3300 args[1] = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_GS_WAVE_ID);
3301 lp_build_intrinsic(gallivm->builder, "llvm.SI.sendmsg",
3302 LLVMVoidTypeInContext(gallivm->context), args, 2,
3303 LLVMNoUnwindAttribute);
3304 }
3305
3306 /* Cut one primitive from the geometry shader */
3307 static void si_llvm_emit_primitive(
3308 const struct lp_build_tgsi_action *action,
3309 struct lp_build_tgsi_context *bld_base,
3310 struct lp_build_emit_data *emit_data)
3311 {
3312 struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
3313 struct gallivm_state *gallivm = bld_base->base.gallivm;
3314 LLVMValueRef args[2];
3315 unsigned stream;
3316
3317 /* Signal primitive cut */
3318 stream = si_llvm_get_stream(bld_base, emit_data);
3319 args[0] = lp_build_const_int32(gallivm, SENDMSG_GS_OP_CUT | SENDMSG_GS | (stream << 8));
3320 args[1] = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_GS_WAVE_ID);
3321 lp_build_intrinsic(gallivm->builder, "llvm.SI.sendmsg",
3322 LLVMVoidTypeInContext(gallivm->context), args, 2,
3323 LLVMNoUnwindAttribute);
3324 }
3325
3326 static void si_llvm_emit_barrier(const struct lp_build_tgsi_action *action,
3327 struct lp_build_tgsi_context *bld_base,
3328 struct lp_build_emit_data *emit_data)
3329 {
3330 struct gallivm_state *gallivm = bld_base->base.gallivm;
3331
3332 lp_build_intrinsic(gallivm->builder, "llvm.AMDGPU.barrier.local",
3333 LLVMVoidTypeInContext(gallivm->context), NULL, 0,
3334 LLVMNoUnwindAttribute);
3335 }
3336
3337 static const struct lp_build_tgsi_action tex_action = {
3338 .fetch_args = tex_fetch_args,
3339 .emit = build_tex_intrinsic,
3340 };
3341
3342 static const struct lp_build_tgsi_action interp_action = {
3343 .fetch_args = interp_fetch_args,
3344 .emit = build_interp_intrinsic,
3345 };
3346
3347 static void create_meta_data(struct si_shader_context *si_shader_ctx)
3348 {
3349 struct gallivm_state *gallivm = si_shader_ctx->radeon_bld.soa.bld_base.base.gallivm;
3350 LLVMValueRef args[3];
3351
3352 args[0] = LLVMMDStringInContext(gallivm->context, "const", 5);
3353 args[1] = 0;
3354 args[2] = lp_build_const_int32(gallivm, 1);
3355
3356 si_shader_ctx->const_md = LLVMMDNodeInContext(gallivm->context, args, 3);
3357 }
3358
3359 static LLVMTypeRef const_array(LLVMTypeRef elem_type, int num_elements)
3360 {
3361 return LLVMPointerType(LLVMArrayType(elem_type, num_elements),
3362 CONST_ADDR_SPACE);
3363 }
3364
3365 static void declare_streamout_params(struct si_shader_context *si_shader_ctx,
3366 struct pipe_stream_output_info *so,
3367 LLVMTypeRef *params, LLVMTypeRef i32,
3368 unsigned *num_params)
3369 {
3370 int i;
3371
3372 /* Streamout SGPRs. */
3373 if (so->num_outputs) {
3374 params[si_shader_ctx->param_streamout_config = (*num_params)++] = i32;
3375 params[si_shader_ctx->param_streamout_write_index = (*num_params)++] = i32;
3376 }
3377 /* A streamout buffer offset is loaded if the stride is non-zero. */
3378 for (i = 0; i < 4; i++) {
3379 if (!so->stride[i])
3380 continue;
3381
3382 params[si_shader_ctx->param_streamout_offset[i] = (*num_params)++] = i32;
3383 }
3384 }
3385
3386 static void create_function(struct si_shader_context *si_shader_ctx)
3387 {
3388 struct lp_build_tgsi_context *bld_base = &si_shader_ctx->radeon_bld.soa.bld_base;
3389 struct gallivm_state *gallivm = bld_base->base.gallivm;
3390 struct si_shader *shader = si_shader_ctx->shader;
3391 LLVMTypeRef params[SI_NUM_PARAMS], f32, i8, i32, v2i32, v3i32, v16i8, v4i32, v8i32;
3392 unsigned i, last_array_pointer, last_sgpr, num_params;
3393
3394 i8 = LLVMInt8TypeInContext(gallivm->context);
3395 i32 = LLVMInt32TypeInContext(gallivm->context);
3396 f32 = LLVMFloatTypeInContext(gallivm->context);
3397 v2i32 = LLVMVectorType(i32, 2);
3398 v3i32 = LLVMVectorType(i32, 3);
3399 v4i32 = LLVMVectorType(i32, 4);
3400 v8i32 = LLVMVectorType(i32, 8);
3401 v16i8 = LLVMVectorType(i8, 16);
3402
3403 params[SI_PARAM_RW_BUFFERS] = const_array(v16i8, SI_NUM_RW_BUFFERS);
3404 params[SI_PARAM_CONST_BUFFERS] = const_array(v16i8, SI_NUM_CONST_BUFFERS);
3405 params[SI_PARAM_SAMPLER_STATES] = const_array(v4i32, SI_NUM_SAMPLER_STATES);
3406 params[SI_PARAM_SAMPLER_VIEWS] = const_array(v8i32, SI_NUM_SAMPLER_VIEWS);
3407 last_array_pointer = SI_PARAM_SAMPLER_VIEWS;
3408
3409 switch (si_shader_ctx->type) {
3410 case TGSI_PROCESSOR_VERTEX:
3411 params[SI_PARAM_VERTEX_BUFFERS] = const_array(v16i8, SI_NUM_VERTEX_BUFFERS);
3412 last_array_pointer = SI_PARAM_VERTEX_BUFFERS;
3413 params[SI_PARAM_BASE_VERTEX] = i32;
3414 params[SI_PARAM_START_INSTANCE] = i32;
3415 num_params = SI_PARAM_START_INSTANCE+1;
3416
3417 if (shader->key.vs.as_es) {
3418 params[si_shader_ctx->param_es2gs_offset = num_params++] = i32;
3419 } else if (shader->key.vs.as_ls) {
3420 params[SI_PARAM_LS_OUT_LAYOUT] = i32;
3421 num_params = SI_PARAM_LS_OUT_LAYOUT+1;
3422 } else {
3423 if (shader->is_gs_copy_shader) {
3424 last_array_pointer = SI_PARAM_CONST_BUFFERS;
3425 num_params = SI_PARAM_CONST_BUFFERS+1;
3426 } else {
3427 params[SI_PARAM_VS_STATE_BITS] = i32;
3428 num_params = SI_PARAM_VS_STATE_BITS+1;
3429 }
3430
3431 /* The locations of the other parameters are assigned dynamically. */
3432 declare_streamout_params(si_shader_ctx, &shader->selector->so,
3433 params, i32, &num_params);
3434 }
3435
3436 last_sgpr = num_params-1;
3437
3438 /* VGPRs */
3439 params[si_shader_ctx->param_vertex_id = num_params++] = i32;
3440 params[si_shader_ctx->param_rel_auto_id = num_params++] = i32;
3441 params[si_shader_ctx->param_vs_prim_id = num_params++] = i32;
3442 params[si_shader_ctx->param_instance_id = num_params++] = i32;
3443 break;
3444
3445 case TGSI_PROCESSOR_TESS_CTRL:
3446 params[SI_PARAM_TCS_OUT_OFFSETS] = i32;
3447 params[SI_PARAM_TCS_OUT_LAYOUT] = i32;
3448 params[SI_PARAM_TCS_IN_LAYOUT] = i32;
3449 params[SI_PARAM_TESS_FACTOR_OFFSET] = i32;
3450 last_sgpr = SI_PARAM_TESS_FACTOR_OFFSET;
3451
3452 /* VGPRs */
3453 params[SI_PARAM_PATCH_ID] = i32;
3454 params[SI_PARAM_REL_IDS] = i32;
3455 num_params = SI_PARAM_REL_IDS+1;
3456 break;
3457
3458 case TGSI_PROCESSOR_TESS_EVAL:
3459 params[SI_PARAM_TCS_OUT_OFFSETS] = i32;
3460 params[SI_PARAM_TCS_OUT_LAYOUT] = i32;
3461 num_params = SI_PARAM_TCS_OUT_LAYOUT+1;
3462
3463 if (shader->key.tes.as_es) {
3464 params[si_shader_ctx->param_es2gs_offset = num_params++] = i32;
3465 } else {
3466 declare_streamout_params(si_shader_ctx, &shader->selector->so,
3467 params, i32, &num_params);
3468 }
3469 last_sgpr = num_params - 1;
3470
3471 /* VGPRs */
3472 params[si_shader_ctx->param_tes_u = num_params++] = f32;
3473 params[si_shader_ctx->param_tes_v = num_params++] = f32;
3474 params[si_shader_ctx->param_tes_rel_patch_id = num_params++] = i32;
3475 params[si_shader_ctx->param_tes_patch_id = num_params++] = i32;
3476 break;
3477
3478 case TGSI_PROCESSOR_GEOMETRY:
3479 params[SI_PARAM_GS2VS_OFFSET] = i32;
3480 params[SI_PARAM_GS_WAVE_ID] = i32;
3481 last_sgpr = SI_PARAM_GS_WAVE_ID;
3482
3483 /* VGPRs */
3484 params[SI_PARAM_VTX0_OFFSET] = i32;
3485 params[SI_PARAM_VTX1_OFFSET] = i32;
3486 params[SI_PARAM_PRIMITIVE_ID] = i32;
3487 params[SI_PARAM_VTX2_OFFSET] = i32;
3488 params[SI_PARAM_VTX3_OFFSET] = i32;
3489 params[SI_PARAM_VTX4_OFFSET] = i32;
3490 params[SI_PARAM_VTX5_OFFSET] = i32;
3491 params[SI_PARAM_GS_INSTANCE_ID] = i32;
3492 num_params = SI_PARAM_GS_INSTANCE_ID+1;
3493 break;
3494
3495 case TGSI_PROCESSOR_FRAGMENT:
3496 params[SI_PARAM_ALPHA_REF] = f32;
3497 params[SI_PARAM_PS_STATE_BITS] = i32;
3498 params[SI_PARAM_PRIM_MASK] = i32;
3499 last_sgpr = SI_PARAM_PRIM_MASK;
3500 params[SI_PARAM_PERSP_SAMPLE] = v2i32;
3501 params[SI_PARAM_PERSP_CENTER] = v2i32;
3502 params[SI_PARAM_PERSP_CENTROID] = v2i32;
3503 params[SI_PARAM_PERSP_PULL_MODEL] = v3i32;
3504 params[SI_PARAM_LINEAR_SAMPLE] = v2i32;
3505 params[SI_PARAM_LINEAR_CENTER] = v2i32;
3506 params[SI_PARAM_LINEAR_CENTROID] = v2i32;
3507 params[SI_PARAM_LINE_STIPPLE_TEX] = f32;
3508 params[SI_PARAM_POS_X_FLOAT] = f32;
3509 params[SI_PARAM_POS_Y_FLOAT] = f32;
3510 params[SI_PARAM_POS_Z_FLOAT] = f32;
3511 params[SI_PARAM_POS_W_FLOAT] = f32;
3512 params[SI_PARAM_FRONT_FACE] = f32;
3513 params[SI_PARAM_ANCILLARY] = i32;
3514 params[SI_PARAM_SAMPLE_COVERAGE] = f32;
3515 params[SI_PARAM_POS_FIXED_PT] = f32;
3516 num_params = SI_PARAM_POS_FIXED_PT+1;
3517 break;
3518
3519 default:
3520 assert(0 && "unimplemented shader");
3521 return;
3522 }
3523
3524 assert(num_params <= Elements(params));
3525 radeon_llvm_create_func(&si_shader_ctx->radeon_bld, params, num_params);
3526 radeon_llvm_shader_type(si_shader_ctx->radeon_bld.main_fn, si_shader_ctx->type);
3527
3528 if (shader->dx10_clamp_mode)
3529 LLVMAddTargetDependentFunctionAttr(si_shader_ctx->radeon_bld.main_fn,
3530 "enable-no-nans-fp-math", "true");
3531
3532 for (i = 0; i <= last_sgpr; ++i) {
3533 LLVMValueRef P = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, i);
3534
3535 /* We tell llvm that array inputs are passed by value to allow Sinking pass
3536 * to move load. Inputs are constant so this is fine. */
3537 if (i <= last_array_pointer)
3538 LLVMAddAttribute(P, LLVMByValAttribute);
3539 else
3540 LLVMAddAttribute(P, LLVMInRegAttribute);
3541 }
3542
3543 if (bld_base->info &&
3544 (bld_base->info->opcode_count[TGSI_OPCODE_DDX] > 0 ||
3545 bld_base->info->opcode_count[TGSI_OPCODE_DDY] > 0 ||
3546 bld_base->info->opcode_count[TGSI_OPCODE_DDX_FINE] > 0 ||
3547 bld_base->info->opcode_count[TGSI_OPCODE_DDY_FINE] > 0 ||
3548 bld_base->info->opcode_count[TGSI_OPCODE_INTERP_OFFSET] > 0 ||
3549 bld_base->info->opcode_count[TGSI_OPCODE_INTERP_SAMPLE] > 0))
3550 si_shader_ctx->lds =
3551 LLVMAddGlobalInAddressSpace(gallivm->module,
3552 LLVMArrayType(i32, 64),
3553 "ddxy_lds",
3554 LOCAL_ADDR_SPACE);
3555
3556 if ((si_shader_ctx->type == TGSI_PROCESSOR_VERTEX && shader->key.vs.as_ls) ||
3557 si_shader_ctx->type == TGSI_PROCESSOR_TESS_CTRL ||
3558 si_shader_ctx->type == TGSI_PROCESSOR_TESS_EVAL) {
3559 /* This is the upper bound, maximum is 32 inputs times 32 vertices */
3560 unsigned vertex_data_dw_size = 32*32*4;
3561 unsigned patch_data_dw_size = 32*4;
3562 /* The formula is: TCS inputs + TCS outputs + TCS patch outputs. */
3563 unsigned patch_dw_size = vertex_data_dw_size*2 + patch_data_dw_size;
3564 unsigned lds_dwords = patch_dw_size;
3565
3566 /* The actual size is computed outside of the shader to reduce
3567 * the number of shader variants. */
3568 si_shader_ctx->lds =
3569 LLVMAddGlobalInAddressSpace(gallivm->module,
3570 LLVMArrayType(i32, lds_dwords),
3571 "tess_lds",
3572 LOCAL_ADDR_SPACE);
3573 }
3574 }
3575
3576 static void preload_constants(struct si_shader_context *si_shader_ctx)
3577 {
3578 struct lp_build_tgsi_context * bld_base = &si_shader_ctx->radeon_bld.soa.bld_base;
3579 struct gallivm_state * gallivm = bld_base->base.gallivm;
3580 const struct tgsi_shader_info * info = bld_base->info;
3581 unsigned buf;
3582 LLVMValueRef ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_CONST_BUFFERS);
3583
3584 for (buf = 0; buf < SI_NUM_CONST_BUFFERS; buf++) {
3585 unsigned i, num_const = info->const_file_max[buf] + 1;
3586
3587 if (num_const == 0)
3588 continue;
3589
3590 /* Allocate space for the constant values */
3591 si_shader_ctx->constants[buf] = CALLOC(num_const * 4, sizeof(LLVMValueRef));
3592
3593 /* Load the resource descriptor */
3594 si_shader_ctx->const_buffers[buf] =
3595 build_indexed_load_const(si_shader_ctx, ptr, lp_build_const_int32(gallivm, buf));
3596
3597 /* Load the constants, we rely on the code sinking to do the rest */
3598 for (i = 0; i < num_const * 4; ++i) {
3599 si_shader_ctx->constants[buf][i] =
3600 buffer_load_const(gallivm->builder,
3601 si_shader_ctx->const_buffers[buf],
3602 lp_build_const_int32(gallivm, i * 4),
3603 bld_base->base.elem_type);
3604 }
3605 }
3606 }
3607
3608 static void preload_samplers(struct si_shader_context *si_shader_ctx)
3609 {
3610 struct lp_build_tgsi_context * bld_base = &si_shader_ctx->radeon_bld.soa.bld_base;
3611 struct gallivm_state * gallivm = bld_base->base.gallivm;
3612 const struct tgsi_shader_info * info = bld_base->info;
3613
3614 unsigned i, num_samplers = info->file_max[TGSI_FILE_SAMPLER] + 1;
3615
3616 LLVMValueRef res_ptr, samp_ptr;
3617 LLVMValueRef offset;
3618
3619 if (num_samplers == 0)
3620 return;
3621
3622 res_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_SAMPLER_VIEWS);
3623 samp_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_SAMPLER_STATES);
3624
3625 /* Load the resources and samplers, we rely on the code sinking to do the rest */
3626 for (i = 0; i < num_samplers; ++i) {
3627 /* Resource */
3628 offset = lp_build_const_int32(gallivm, i);
3629 si_shader_ctx->sampler_views[i] = build_indexed_load_const(si_shader_ctx, res_ptr, offset);
3630
3631 /* Sampler */
3632 offset = lp_build_const_int32(gallivm, i);
3633 si_shader_ctx->sampler_states[i] = build_indexed_load_const(si_shader_ctx, samp_ptr, offset);
3634
3635 /* FMASK resource */
3636 if (info->is_msaa_sampler[i]) {
3637 offset = lp_build_const_int32(gallivm, SI_FMASK_TEX_OFFSET + i);
3638 si_shader_ctx->sampler_views[SI_FMASK_TEX_OFFSET + i] =
3639 build_indexed_load_const(si_shader_ctx, res_ptr, offset);
3640 }
3641 }
3642 }
3643
3644 static void preload_streamout_buffers(struct si_shader_context *si_shader_ctx)
3645 {
3646 struct lp_build_tgsi_context * bld_base = &si_shader_ctx->radeon_bld.soa.bld_base;
3647 struct gallivm_state * gallivm = bld_base->base.gallivm;
3648 unsigned i;
3649
3650 /* Streamout can only be used if the shader is compiled as VS. */
3651 if (!si_shader_ctx->shader->selector->so.num_outputs ||
3652 (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX &&
3653 (si_shader_ctx->shader->key.vs.as_es ||
3654 si_shader_ctx->shader->key.vs.as_ls)) ||
3655 (si_shader_ctx->type == TGSI_PROCESSOR_TESS_EVAL &&
3656 si_shader_ctx->shader->key.tes.as_es))
3657 return;
3658
3659 LLVMValueRef buf_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
3660 SI_PARAM_RW_BUFFERS);
3661
3662 /* Load the resources, we rely on the code sinking to do the rest */
3663 for (i = 0; i < 4; ++i) {
3664 if (si_shader_ctx->shader->selector->so.stride[i]) {
3665 LLVMValueRef offset = lp_build_const_int32(gallivm,
3666 SI_SO_BUF_OFFSET + i);
3667
3668 si_shader_ctx->so_buffers[i] = build_indexed_load_const(si_shader_ctx, buf_ptr, offset);
3669 }
3670 }
3671 }
3672
3673 /**
3674 * Load ESGS and GSVS ring buffer resource descriptors and save the variables
3675 * for later use.
3676 */
3677 static void preload_ring_buffers(struct si_shader_context *si_shader_ctx)
3678 {
3679 struct gallivm_state *gallivm =
3680 si_shader_ctx->radeon_bld.soa.bld_base.base.gallivm;
3681
3682 LLVMValueRef buf_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
3683 SI_PARAM_RW_BUFFERS);
3684
3685 if ((si_shader_ctx->type == TGSI_PROCESSOR_VERTEX &&
3686 si_shader_ctx->shader->key.vs.as_es) ||
3687 (si_shader_ctx->type == TGSI_PROCESSOR_TESS_EVAL &&
3688 si_shader_ctx->shader->key.tes.as_es) ||
3689 si_shader_ctx->type == TGSI_PROCESSOR_GEOMETRY) {
3690 LLVMValueRef offset = lp_build_const_int32(gallivm, SI_RING_ESGS);
3691
3692 si_shader_ctx->esgs_ring =
3693 build_indexed_load_const(si_shader_ctx, buf_ptr, offset);
3694 }
3695
3696 if (si_shader_ctx->shader->is_gs_copy_shader) {
3697 LLVMValueRef offset = lp_build_const_int32(gallivm, SI_RING_GSVS);
3698
3699 si_shader_ctx->gsvs_ring[0] =
3700 build_indexed_load_const(si_shader_ctx, buf_ptr, offset);
3701 }
3702 if (si_shader_ctx->type == TGSI_PROCESSOR_GEOMETRY) {
3703 int i;
3704 for (i = 0; i < 4; i++) {
3705 LLVMValueRef offset = lp_build_const_int32(gallivm, SI_RING_GSVS + i);
3706
3707 si_shader_ctx->gsvs_ring[i] =
3708 build_indexed_load_const(si_shader_ctx, buf_ptr, offset);
3709 }
3710 }
3711 }
3712
3713 void si_shader_binary_read_config(struct si_shader *shader,
3714 unsigned symbol_offset)
3715 {
3716 unsigned i;
3717 const unsigned char *config =
3718 radeon_shader_binary_config_start(&shader->binary,
3719 symbol_offset);
3720
3721 /* XXX: We may be able to emit some of these values directly rather than
3722 * extracting fields to be emitted later.
3723 */
3724
3725 for (i = 0; i < shader->binary.config_size_per_symbol; i+= 8) {
3726 unsigned reg = util_le32_to_cpu(*(uint32_t*)(config + i));
3727 unsigned value = util_le32_to_cpu(*(uint32_t*)(config + i + 4));
3728 switch (reg) {
3729 case R_00B028_SPI_SHADER_PGM_RSRC1_PS:
3730 case R_00B128_SPI_SHADER_PGM_RSRC1_VS:
3731 case R_00B228_SPI_SHADER_PGM_RSRC1_GS:
3732 case R_00B848_COMPUTE_PGM_RSRC1:
3733 shader->num_sgprs = MAX2(shader->num_sgprs, (G_00B028_SGPRS(value) + 1) * 8);
3734 shader->num_vgprs = MAX2(shader->num_vgprs, (G_00B028_VGPRS(value) + 1) * 4);
3735 shader->float_mode = G_00B028_FLOAT_MODE(value);
3736 shader->rsrc1 = value;
3737 break;
3738 case R_00B02C_SPI_SHADER_PGM_RSRC2_PS:
3739 shader->lds_size = MAX2(shader->lds_size, G_00B02C_EXTRA_LDS_SIZE(value));
3740 break;
3741 case R_00B84C_COMPUTE_PGM_RSRC2:
3742 shader->lds_size = MAX2(shader->lds_size, G_00B84C_LDS_SIZE(value));
3743 shader->rsrc2 = value;
3744 break;
3745 case R_0286CC_SPI_PS_INPUT_ENA:
3746 shader->spi_ps_input_ena = value;
3747 break;
3748 case R_0286E8_SPI_TMPRING_SIZE:
3749 case R_00B860_COMPUTE_TMPRING_SIZE:
3750 /* WAVESIZE is in units of 256 dwords. */
3751 shader->scratch_bytes_per_wave =
3752 G_00B860_WAVESIZE(value) * 256 * 4 * 1;
3753 break;
3754 default:
3755 fprintf(stderr, "Warning: Compiler emitted unknown "
3756 "config register: 0x%x\n", reg);
3757 break;
3758 }
3759 }
3760 }
3761
3762 void si_shader_apply_scratch_relocs(struct si_context *sctx,
3763 struct si_shader *shader,
3764 uint64_t scratch_va)
3765 {
3766 unsigned i;
3767 uint32_t scratch_rsrc_dword0 = scratch_va;
3768 uint32_t scratch_rsrc_dword1 =
3769 S_008F04_BASE_ADDRESS_HI(scratch_va >> 32)
3770 | S_008F04_STRIDE(shader->scratch_bytes_per_wave / 64);
3771
3772 for (i = 0 ; i < shader->binary.reloc_count; i++) {
3773 const struct radeon_shader_reloc *reloc =
3774 &shader->binary.relocs[i];
3775 if (!strcmp(scratch_rsrc_dword0_symbol, reloc->name)) {
3776 util_memcpy_cpu_to_le32(shader->binary.code + reloc->offset,
3777 &scratch_rsrc_dword0, 4);
3778 } else if (!strcmp(scratch_rsrc_dword1_symbol, reloc->name)) {
3779 util_memcpy_cpu_to_le32(shader->binary.code + reloc->offset,
3780 &scratch_rsrc_dword1, 4);
3781 }
3782 }
3783 }
3784
3785 int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader)
3786 {
3787 const struct radeon_shader_binary *binary = &shader->binary;
3788 unsigned code_size = binary->code_size + binary->rodata_size;
3789 unsigned char *ptr;
3790
3791 r600_resource_reference(&shader->bo, NULL);
3792 shader->bo = si_resource_create_custom(&sscreen->b.b,
3793 PIPE_USAGE_IMMUTABLE,
3794 code_size);
3795 if (!shader->bo)
3796 return -ENOMEM;
3797
3798 ptr = sscreen->b.ws->buffer_map(shader->bo->buf, NULL,
3799 PIPE_TRANSFER_READ_WRITE);
3800 util_memcpy_cpu_to_le32(ptr, binary->code, binary->code_size);
3801 if (binary->rodata_size > 0) {
3802 ptr += binary->code_size;
3803 util_memcpy_cpu_to_le32(ptr, binary->rodata,
3804 binary->rodata_size);
3805 }
3806
3807 sscreen->b.ws->buffer_unmap(shader->bo->buf);
3808 return 0;
3809 }
3810
3811 static void si_shader_dump_disassembly(const struct radeon_shader_binary *binary,
3812 struct pipe_debug_callback *debug)
3813 {
3814 char *line, *p;
3815 unsigned i, count;
3816
3817 if (binary->disasm_string) {
3818 fprintf(stderr, "\nShader Disassembly:\n\n");
3819 fprintf(stderr, "%s\n", binary->disasm_string);
3820
3821 if (debug && debug->debug_message) {
3822 /* Very long debug messages are cut off, so send the
3823 * disassembly one line at a time. This causes more
3824 * overhead, but on the plus side it simplifies
3825 * parsing of resulting logs.
3826 */
3827 pipe_debug_message(debug, SHADER_INFO,
3828 "Shader Disassembly Begin");
3829
3830 line = binary->disasm_string;
3831 while (*line) {
3832 p = strchrnul(line, '\n');
3833 count = p - line;
3834
3835 if (count) {
3836 pipe_debug_message(debug, SHADER_INFO,
3837 "%.*s", count, line);
3838 }
3839
3840 if (!*p)
3841 break;
3842 line = p + 1;
3843 }
3844
3845 pipe_debug_message(debug, SHADER_INFO,
3846 "Shader Disassembly End");
3847 }
3848 } else {
3849 fprintf(stderr, "SI CODE:\n");
3850 for (i = 0; i < binary->code_size; i += 4) {
3851 fprintf(stderr, "@0x%x: %02x%02x%02x%02x\n", i,
3852 binary->code[i + 3], binary->code[i + 2],
3853 binary->code[i + 1], binary->code[i]);
3854 }
3855 }
3856 }
3857
3858 void si_shader_binary_read(struct si_screen *sscreen, struct si_shader *shader,
3859 struct pipe_debug_callback *debug, unsigned processor)
3860 {
3861 const struct radeon_shader_binary *binary = &shader->binary;
3862
3863 si_shader_binary_read_config(shader, 0);
3864
3865 if (r600_can_dump_shader(&sscreen->b, processor)) {
3866 if (!(sscreen->b.debug_flags & DBG_NO_ASM))
3867 si_shader_dump_disassembly(binary, debug);
3868
3869 fprintf(stderr, "*** SHADER STATS ***\n"
3870 "SGPRS: %d\nVGPRS: %d\nCode Size: %d bytes\nLDS: %d blocks\n"
3871 "Scratch: %d bytes per wave\n********************\n",
3872 shader->num_sgprs, shader->num_vgprs, binary->code_size,
3873 shader->lds_size, shader->scratch_bytes_per_wave);
3874 }
3875
3876 pipe_debug_message(debug, SHADER_INFO,
3877 "Shader Stats: SGPRS: %d VGPRS: %d Code Size: %d LDS: %d Scratch: %d",
3878 shader->num_sgprs, shader->num_vgprs, binary->code_size,
3879 shader->lds_size, shader->scratch_bytes_per_wave);
3880 }
3881
3882 int si_compile_llvm(struct si_screen *sscreen, struct si_shader *shader,
3883 LLVMTargetMachineRef tm, LLVMModuleRef mod,
3884 struct pipe_debug_callback *debug, unsigned processor)
3885 {
3886 int r = 0;
3887 unsigned count = p_atomic_inc_return(&sscreen->b.num_compilations);
3888
3889 if (r600_can_dump_shader(&sscreen->b, processor)) {
3890 fprintf(stderr, "radeonsi: Compiling shader %d\n", count);
3891
3892 if (!(sscreen->b.debug_flags & DBG_NO_IR))
3893 LLVMDumpModule(mod);
3894 }
3895
3896 if (!si_replace_shader(count, &shader->binary)) {
3897 r = radeon_llvm_compile(mod, &shader->binary,
3898 r600_get_llvm_processor_name(sscreen->b.family), tm,
3899 debug);
3900 if (r)
3901 return r;
3902 }
3903
3904 si_shader_binary_read(sscreen, shader, debug, processor);
3905
3906 r = si_shader_binary_upload(sscreen, shader);
3907 if (r)
3908 return r;
3909
3910 FREE(shader->binary.config);
3911 FREE(shader->binary.rodata);
3912 FREE(shader->binary.global_symbol_offsets);
3913 if (shader->scratch_bytes_per_wave == 0) {
3914 FREE(shader->binary.code);
3915 FREE(shader->binary.relocs);
3916 memset(&shader->binary, 0,
3917 offsetof(struct radeon_shader_binary, disasm_string));
3918 }
3919 return r;
3920 }
3921
3922 /* Generate code for the hardware VS shader stage to go with a geometry shader */
3923 static int si_generate_gs_copy_shader(struct si_screen *sscreen,
3924 struct si_shader_context *si_shader_ctx,
3925 struct si_shader *gs, bool dump,
3926 struct pipe_debug_callback *debug)
3927 {
3928 struct gallivm_state *gallivm = &si_shader_ctx->radeon_bld.gallivm;
3929 struct lp_build_tgsi_context *bld_base = &si_shader_ctx->radeon_bld.soa.bld_base;
3930 struct lp_build_context *base = &bld_base->base;
3931 struct lp_build_context *uint = &bld_base->uint_bld;
3932 struct si_shader *shader = si_shader_ctx->shader;
3933 struct si_shader_output_values *outputs;
3934 struct tgsi_shader_info *gsinfo = &gs->selector->info;
3935 LLVMValueRef args[9];
3936 int i, r;
3937
3938 outputs = MALLOC(gsinfo->num_outputs * sizeof(outputs[0]));
3939
3940 si_shader_ctx->type = TGSI_PROCESSOR_VERTEX;
3941 shader->is_gs_copy_shader = true;
3942
3943 radeon_llvm_context_init(&si_shader_ctx->radeon_bld);
3944
3945 create_meta_data(si_shader_ctx);
3946 create_function(si_shader_ctx);
3947 preload_streamout_buffers(si_shader_ctx);
3948 preload_ring_buffers(si_shader_ctx);
3949
3950 args[0] = si_shader_ctx->gsvs_ring[0];
3951 args[1] = lp_build_mul_imm(uint,
3952 LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
3953 si_shader_ctx->param_vertex_id),
3954 4);
3955 args[3] = uint->zero;
3956 args[4] = uint->one; /* OFFEN */
3957 args[5] = uint->zero; /* IDXEN */
3958 args[6] = uint->one; /* GLC */
3959 args[7] = uint->one; /* SLC */
3960 args[8] = uint->zero; /* TFE */
3961
3962 /* Fetch vertex data from GSVS ring */
3963 for (i = 0; i < gsinfo->num_outputs; ++i) {
3964 unsigned chan;
3965
3966 outputs[i].name = gsinfo->output_semantic_name[i];
3967 outputs[i].sid = gsinfo->output_semantic_index[i];
3968
3969 for (chan = 0; chan < 4; chan++) {
3970 args[2] = lp_build_const_int32(gallivm,
3971 (i * 4 + chan) *
3972 gs->selector->gs_max_out_vertices * 16 * 4);
3973
3974 outputs[i].values[chan] =
3975 LLVMBuildBitCast(gallivm->builder,
3976 lp_build_intrinsic(gallivm->builder,
3977 "llvm.SI.buffer.load.dword.i32.i32",
3978 LLVMInt32TypeInContext(gallivm->context),
3979 args, 9,
3980 LLVMReadOnlyAttribute | LLVMNoUnwindAttribute),
3981 base->elem_type, "");
3982 }
3983 }
3984
3985 si_llvm_export_vs(bld_base, outputs, gsinfo->num_outputs);
3986
3987 radeon_llvm_finalize_module(&si_shader_ctx->radeon_bld);
3988
3989 if (dump)
3990 fprintf(stderr, "Copy Vertex Shader for Geometry Shader:\n\n");
3991
3992 r = si_compile_llvm(sscreen, si_shader_ctx->shader,
3993 si_shader_ctx->tm, bld_base->base.gallivm->module,
3994 debug, TGSI_PROCESSOR_GEOMETRY);
3995
3996 radeon_llvm_dispose(&si_shader_ctx->radeon_bld);
3997
3998 FREE(outputs);
3999 return r;
4000 }
4001
4002 void si_dump_shader_key(unsigned shader, union si_shader_key *key, FILE *f)
4003 {
4004 int i;
4005
4006 fprintf(f, "SHADER KEY\n");
4007
4008 switch (shader) {
4009 case PIPE_SHADER_VERTEX:
4010 fprintf(f, " instance_divisors = {");
4011 for (i = 0; i < Elements(key->vs.instance_divisors); i++)
4012 fprintf(f, !i ? "%u" : ", %u",
4013 key->vs.instance_divisors[i]);
4014 fprintf(f, "}\n");
4015 fprintf(f, " as_es = %u\n", key->vs.as_es);
4016 fprintf(f, " as_ls = %u\n", key->vs.as_ls);
4017 fprintf(f, " export_prim_id = %u\n", key->vs.export_prim_id);
4018 break;
4019
4020 case PIPE_SHADER_TESS_CTRL:
4021 fprintf(f, " prim_mode = %u\n", key->tcs.prim_mode);
4022 break;
4023
4024 case PIPE_SHADER_TESS_EVAL:
4025 fprintf(f, " as_es = %u\n", key->tes.as_es);
4026 fprintf(f, " export_prim_id = %u\n", key->tes.export_prim_id);
4027 break;
4028
4029 case PIPE_SHADER_GEOMETRY:
4030 break;
4031
4032 case PIPE_SHADER_FRAGMENT:
4033 fprintf(f, " export_16bpc = 0x%X\n", key->ps.export_16bpc);
4034 fprintf(f, " last_cbuf = %u\n", key->ps.last_cbuf);
4035 fprintf(f, " color_two_side = %u\n", key->ps.color_two_side);
4036 fprintf(f, " alpha_func = %u\n", key->ps.alpha_func);
4037 fprintf(f, " alpha_to_one = %u\n", key->ps.alpha_to_one);
4038 fprintf(f, " poly_stipple = %u\n", key->ps.poly_stipple);
4039 fprintf(f, " clamp_color = %u\n", key->ps.clamp_color);
4040 break;
4041
4042 default:
4043 assert(0);
4044 }
4045 }
4046
4047 int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
4048 struct si_shader *shader,
4049 struct pipe_debug_callback *debug)
4050 {
4051 struct si_shader_selector *sel = shader->selector;
4052 struct tgsi_token *tokens = sel->tokens;
4053 struct si_shader_context si_shader_ctx;
4054 struct lp_build_tgsi_context * bld_base;
4055 struct tgsi_shader_info stipple_shader_info;
4056 LLVMModuleRef mod;
4057 int r = 0;
4058 bool poly_stipple = sel->type == PIPE_SHADER_FRAGMENT &&
4059 shader->key.ps.poly_stipple;
4060 bool dump = r600_can_dump_shader(&sscreen->b, sel->info.processor);
4061
4062 if (poly_stipple) {
4063 tokens = util_pstipple_create_fragment_shader(tokens, NULL,
4064 SI_POLY_STIPPLE_SAMPLER);
4065 tgsi_scan_shader(tokens, &stipple_shader_info);
4066 }
4067
4068 /* Dump TGSI code before doing TGSI->LLVM conversion in case the
4069 * conversion fails. */
4070 if (dump && !(sscreen->b.debug_flags & DBG_NO_TGSI)) {
4071 si_dump_shader_key(sel->type, &shader->key, stderr);
4072 tgsi_dump(tokens, 0);
4073 si_dump_streamout(&sel->so);
4074 }
4075
4076 assert(shader->nparam == 0);
4077
4078 memset(&si_shader_ctx, 0, sizeof(si_shader_ctx));
4079 radeon_llvm_context_init(&si_shader_ctx.radeon_bld);
4080 bld_base = &si_shader_ctx.radeon_bld.soa.bld_base;
4081
4082 if (sel->type != PIPE_SHADER_COMPUTE)
4083 shader->dx10_clamp_mode = true;
4084
4085 shader->uses_instanceid = sel->info.uses_instanceid;
4086 bld_base->info = poly_stipple ? &stipple_shader_info : &sel->info;
4087 bld_base->emit_fetch_funcs[TGSI_FILE_CONSTANT] = fetch_constant;
4088
4089 bld_base->op_actions[TGSI_OPCODE_INTERP_CENTROID] = interp_action;
4090 bld_base->op_actions[TGSI_OPCODE_INTERP_SAMPLE] = interp_action;
4091 bld_base->op_actions[TGSI_OPCODE_INTERP_OFFSET] = interp_action;
4092
4093 bld_base->op_actions[TGSI_OPCODE_TEX] = tex_action;
4094 bld_base->op_actions[TGSI_OPCODE_TEX2] = tex_action;
4095 bld_base->op_actions[TGSI_OPCODE_TXB] = tex_action;
4096 bld_base->op_actions[TGSI_OPCODE_TXB2] = tex_action;
4097 bld_base->op_actions[TGSI_OPCODE_TXD] = tex_action;
4098 bld_base->op_actions[TGSI_OPCODE_TXF] = tex_action;
4099 bld_base->op_actions[TGSI_OPCODE_TXL] = tex_action;
4100 bld_base->op_actions[TGSI_OPCODE_TXL2] = tex_action;
4101 bld_base->op_actions[TGSI_OPCODE_TXP] = tex_action;
4102 bld_base->op_actions[TGSI_OPCODE_TXQ] = tex_action;
4103 bld_base->op_actions[TGSI_OPCODE_TG4] = tex_action;
4104 bld_base->op_actions[TGSI_OPCODE_LODQ] = tex_action;
4105 bld_base->op_actions[TGSI_OPCODE_TXQS].emit = si_llvm_emit_txqs;
4106
4107 bld_base->op_actions[TGSI_OPCODE_DDX].emit = si_llvm_emit_ddxy;
4108 bld_base->op_actions[TGSI_OPCODE_DDY].emit = si_llvm_emit_ddxy;
4109 bld_base->op_actions[TGSI_OPCODE_DDX_FINE].emit = si_llvm_emit_ddxy;
4110 bld_base->op_actions[TGSI_OPCODE_DDY_FINE].emit = si_llvm_emit_ddxy;
4111
4112 bld_base->op_actions[TGSI_OPCODE_EMIT].emit = si_llvm_emit_vertex;
4113 bld_base->op_actions[TGSI_OPCODE_ENDPRIM].emit = si_llvm_emit_primitive;
4114 bld_base->op_actions[TGSI_OPCODE_BARRIER].emit = si_llvm_emit_barrier;
4115
4116 if (HAVE_LLVM >= 0x0306) {
4117 bld_base->op_actions[TGSI_OPCODE_MAX].emit = build_tgsi_intrinsic_nomem;
4118 bld_base->op_actions[TGSI_OPCODE_MAX].intr_name = "llvm.maxnum.f32";
4119 bld_base->op_actions[TGSI_OPCODE_MIN].emit = build_tgsi_intrinsic_nomem;
4120 bld_base->op_actions[TGSI_OPCODE_MIN].intr_name = "llvm.minnum.f32";
4121 }
4122
4123 si_shader_ctx.radeon_bld.load_system_value = declare_system_value;
4124 si_shader_ctx.shader = shader;
4125 si_shader_ctx.type = tgsi_get_processor_type(tokens);
4126 si_shader_ctx.screen = sscreen;
4127 si_shader_ctx.tm = tm;
4128
4129 switch (si_shader_ctx.type) {
4130 case TGSI_PROCESSOR_VERTEX:
4131 si_shader_ctx.radeon_bld.load_input = declare_input_vs;
4132 if (shader->key.vs.as_ls)
4133 bld_base->emit_epilogue = si_llvm_emit_ls_epilogue;
4134 else if (shader->key.vs.as_es)
4135 bld_base->emit_epilogue = si_llvm_emit_es_epilogue;
4136 else
4137 bld_base->emit_epilogue = si_llvm_emit_vs_epilogue;
4138 break;
4139 case TGSI_PROCESSOR_TESS_CTRL:
4140 bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = fetch_input_tcs;
4141 bld_base->emit_fetch_funcs[TGSI_FILE_OUTPUT] = fetch_output_tcs;
4142 bld_base->emit_store = store_output_tcs;
4143 bld_base->emit_epilogue = si_llvm_emit_tcs_epilogue;
4144 break;
4145 case TGSI_PROCESSOR_TESS_EVAL:
4146 bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = fetch_input_tes;
4147 if (shader->key.tes.as_es)
4148 bld_base->emit_epilogue = si_llvm_emit_es_epilogue;
4149 else
4150 bld_base->emit_epilogue = si_llvm_emit_vs_epilogue;
4151 break;
4152 case TGSI_PROCESSOR_GEOMETRY:
4153 bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = fetch_input_gs;
4154 bld_base->emit_epilogue = si_llvm_emit_gs_epilogue;
4155 break;
4156 case TGSI_PROCESSOR_FRAGMENT:
4157 si_shader_ctx.radeon_bld.load_input = declare_input_fs;
4158 bld_base->emit_epilogue = si_llvm_emit_fs_epilogue;
4159 break;
4160 default:
4161 assert(!"Unsupported shader type");
4162 return -1;
4163 }
4164
4165 create_meta_data(&si_shader_ctx);
4166 create_function(&si_shader_ctx);
4167 preload_constants(&si_shader_ctx);
4168 preload_samplers(&si_shader_ctx);
4169 preload_streamout_buffers(&si_shader_ctx);
4170 preload_ring_buffers(&si_shader_ctx);
4171
4172 if (si_shader_ctx.type == TGSI_PROCESSOR_GEOMETRY) {
4173 int i;
4174 for (i = 0; i < 4; i++) {
4175 si_shader_ctx.gs_next_vertex[i] =
4176 lp_build_alloca(bld_base->base.gallivm,
4177 bld_base->uint_bld.elem_type, "");
4178 }
4179 }
4180
4181 if (!lp_build_tgsi_llvm(bld_base, tokens)) {
4182 fprintf(stderr, "Failed to translate shader from TGSI to LLVM\n");
4183 goto out;
4184 }
4185
4186 radeon_llvm_finalize_module(&si_shader_ctx.radeon_bld);
4187
4188 mod = bld_base->base.gallivm->module;
4189 r = si_compile_llvm(sscreen, shader, tm, mod, debug, si_shader_ctx.type);
4190 if (r) {
4191 fprintf(stderr, "LLVM failed to compile shader\n");
4192 goto out;
4193 }
4194
4195 radeon_llvm_dispose(&si_shader_ctx.radeon_bld);
4196
4197 if (si_shader_ctx.type == TGSI_PROCESSOR_GEOMETRY) {
4198 shader->gs_copy_shader = CALLOC_STRUCT(si_shader);
4199 shader->gs_copy_shader->selector = shader->selector;
4200 shader->gs_copy_shader->key = shader->key;
4201 si_shader_ctx.shader = shader->gs_copy_shader;
4202 if ((r = si_generate_gs_copy_shader(sscreen, &si_shader_ctx,
4203 shader, dump, debug))) {
4204 free(shader->gs_copy_shader);
4205 shader->gs_copy_shader = NULL;
4206 goto out;
4207 }
4208 }
4209
4210 out:
4211 for (int i = 0; i < SI_NUM_CONST_BUFFERS; i++)
4212 FREE(si_shader_ctx.constants[i]);
4213 if (poly_stipple)
4214 tgsi_free_tokens(tokens);
4215 return r;
4216 }
4217
4218 void si_shader_destroy(struct si_shader *shader)
4219 {
4220 if (shader->gs_copy_shader) {
4221 si_shader_destroy(shader->gs_copy_shader);
4222 FREE(shader->gs_copy_shader);
4223 }
4224
4225 if (shader->scratch_bo)
4226 r600_resource_reference(&shader->scratch_bo, NULL);
4227
4228 r600_resource_reference(&shader->bo, NULL);
4229
4230 FREE(shader->binary.code);
4231 FREE(shader->binary.relocs);
4232 FREE(shader->binary.disasm_string);
4233 }