2 * Copyright 2020 Advanced Micro Devices, Inc.
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
25 #include "si_shader_internal.h"
29 static LLVMValueRef
get_rel_patch_id(struct si_shader_context
*ctx
)
32 case PIPE_SHADER_TESS_CTRL
:
33 return si_unpack_param(ctx
, ctx
->args
.tcs_rel_ids
, 0, 8);
35 case PIPE_SHADER_TESS_EVAL
:
36 return ac_get_arg(&ctx
->ac
, ctx
->tes_rel_patch_id
);
44 /* Tessellation shaders pass outputs to the next shader using LDS.
46 * LS outputs = TCS inputs
47 * TCS outputs = TES inputs
50 * - TCS inputs for patch 0
51 * - TCS inputs for patch 1
52 * - TCS inputs for patch 2 = get_tcs_in_current_patch_offset (if RelPatchID==2)
54 * - TCS outputs for patch 0 = get_tcs_out_patch0_offset
55 * - Per-patch TCS outputs for patch 0 = get_tcs_out_patch0_patch_data_offset
56 * - TCS outputs for patch 1
57 * - Per-patch TCS outputs for patch 1
58 * - TCS outputs for patch 2 = get_tcs_out_current_patch_offset (if RelPatchID==2)
59 * - Per-patch TCS outputs for patch 2 = get_tcs_out_current_patch_data_offset (if RelPatchID==2)
62 * All three shaders VS(LS), TCS, TES share the same LDS space.
66 get_tcs_in_patch_stride(struct si_shader_context
*ctx
)
68 return si_unpack_param(ctx
, ctx
->vs_state_bits
, 11, 13);
71 static unsigned get_tcs_out_vertex_dw_stride_constant(struct si_shader_context
*ctx
)
73 assert(ctx
->type
== PIPE_SHADER_TESS_CTRL
);
75 if (ctx
->shader
->key
.mono
.u
.ff_tcs_inputs_to_copy
)
76 return util_last_bit64(ctx
->shader
->key
.mono
.u
.ff_tcs_inputs_to_copy
) * 4;
78 return util_last_bit64(ctx
->shader
->selector
->outputs_written
) * 4;
81 static LLVMValueRef
get_tcs_out_vertex_dw_stride(struct si_shader_context
*ctx
)
83 unsigned stride
= get_tcs_out_vertex_dw_stride_constant(ctx
);
85 return LLVMConstInt(ctx
->i32
, stride
, 0);
88 static LLVMValueRef
get_tcs_out_patch_stride(struct si_shader_context
*ctx
)
90 if (ctx
->shader
->key
.mono
.u
.ff_tcs_inputs_to_copy
)
91 return si_unpack_param(ctx
, ctx
->tcs_out_lds_layout
, 0, 13);
93 const struct si_shader_info
*info
= &ctx
->shader
->selector
->info
;
94 unsigned tcs_out_vertices
= info
->properties
[TGSI_PROPERTY_TCS_VERTICES_OUT
];
95 unsigned vertex_dw_stride
= get_tcs_out_vertex_dw_stride_constant(ctx
);
96 unsigned num_patch_outputs
= util_last_bit64(ctx
->shader
->selector
->patch_outputs_written
);
97 unsigned patch_dw_stride
= tcs_out_vertices
* vertex_dw_stride
+
98 num_patch_outputs
* 4;
99 return LLVMConstInt(ctx
->i32
, patch_dw_stride
, 0);
103 get_tcs_out_patch0_offset(struct si_shader_context
*ctx
)
105 return LLVMBuildMul(ctx
->ac
.builder
,
106 si_unpack_param(ctx
, ctx
->tcs_out_lds_offsets
, 0, 16),
107 LLVMConstInt(ctx
->i32
, 4, 0), "");
111 get_tcs_out_patch0_patch_data_offset(struct si_shader_context
*ctx
)
113 return LLVMBuildMul(ctx
->ac
.builder
,
114 si_unpack_param(ctx
, ctx
->tcs_out_lds_offsets
, 16, 16),
115 LLVMConstInt(ctx
->i32
, 4, 0), "");
119 get_tcs_in_current_patch_offset(struct si_shader_context
*ctx
)
121 LLVMValueRef patch_stride
= get_tcs_in_patch_stride(ctx
);
122 LLVMValueRef rel_patch_id
= get_rel_patch_id(ctx
);
124 return LLVMBuildMul(ctx
->ac
.builder
, patch_stride
, rel_patch_id
, "");
128 get_tcs_out_current_patch_offset(struct si_shader_context
*ctx
)
130 LLVMValueRef patch0_offset
= get_tcs_out_patch0_offset(ctx
);
131 LLVMValueRef patch_stride
= get_tcs_out_patch_stride(ctx
);
132 LLVMValueRef rel_patch_id
= get_rel_patch_id(ctx
);
134 return ac_build_imad(&ctx
->ac
, patch_stride
, rel_patch_id
, patch0_offset
);
138 get_tcs_out_current_patch_data_offset(struct si_shader_context
*ctx
)
140 LLVMValueRef patch0_patch_data_offset
=
141 get_tcs_out_patch0_patch_data_offset(ctx
);
142 LLVMValueRef patch_stride
= get_tcs_out_patch_stride(ctx
);
143 LLVMValueRef rel_patch_id
= get_rel_patch_id(ctx
);
145 return ac_build_imad(&ctx
->ac
, patch_stride
, rel_patch_id
, patch0_patch_data_offset
);
148 static LLVMValueRef
get_num_tcs_out_vertices(struct si_shader_context
*ctx
)
150 unsigned tcs_out_vertices
=
151 ctx
->shader
->selector
?
152 ctx
->shader
->selector
->info
.properties
[TGSI_PROPERTY_TCS_VERTICES_OUT
] : 0;
154 /* If !tcs_out_vertices, it's either the fixed-func TCS or the TCS epilog. */
155 if (ctx
->type
== PIPE_SHADER_TESS_CTRL
&& tcs_out_vertices
)
156 return LLVMConstInt(ctx
->i32
, tcs_out_vertices
, 0);
158 return si_unpack_param(ctx
, ctx
->tcs_offchip_layout
, 6, 6);
161 static LLVMValueRef
get_tcs_in_vertex_dw_stride(struct si_shader_context
*ctx
)
166 case PIPE_SHADER_VERTEX
:
167 stride
= ctx
->shader
->selector
->lshs_vertex_stride
/ 4;
168 return LLVMConstInt(ctx
->i32
, stride
, 0);
170 case PIPE_SHADER_TESS_CTRL
:
171 if (ctx
->screen
->info
.chip_class
>= GFX9
&&
172 ctx
->shader
->is_monolithic
) {
173 stride
= ctx
->shader
->key
.part
.tcs
.ls
->lshs_vertex_stride
/ 4;
174 return LLVMConstInt(ctx
->i32
, stride
, 0);
176 return si_unpack_param(ctx
, ctx
->vs_state_bits
, 24, 8);
184 static LLVMValueRef
get_dw_address_from_generic_indices(struct si_shader_context
*ctx
,
185 LLVMValueRef vertex_dw_stride
,
186 LLVMValueRef base_addr
,
187 LLVMValueRef vertex_index
,
188 LLVMValueRef param_index
,
189 ubyte name
, ubyte index
)
191 if (vertex_dw_stride
) {
192 base_addr
= ac_build_imad(&ctx
->ac
, vertex_index
,
193 vertex_dw_stride
, base_addr
);
197 base_addr
= ac_build_imad(&ctx
->ac
, param_index
,
198 LLVMConstInt(ctx
->i32
, 4, 0), base_addr
);
201 int param
= name
== TGSI_SEMANTIC_PATCH
||
202 name
== TGSI_SEMANTIC_TESSINNER
||
203 name
== TGSI_SEMANTIC_TESSOUTER
?
204 si_shader_io_get_unique_index_patch(name
, index
) :
205 si_shader_io_get_unique_index(name
, index
, false);
207 /* Add the base address of the element. */
208 return LLVMBuildAdd(ctx
->ac
.builder
, base_addr
,
209 LLVMConstInt(ctx
->i32
, param
* 4, 0), "");
212 /* The offchip buffer layout for TCS->TES is
214 * - attribute 0 of patch 0 vertex 0
215 * - attribute 0 of patch 0 vertex 1
216 * - attribute 0 of patch 0 vertex 2
218 * - attribute 0 of patch 1 vertex 0
219 * - attribute 0 of patch 1 vertex 1
221 * - attribute 1 of patch 0 vertex 0
222 * - attribute 1 of patch 0 vertex 1
224 * - per patch attribute 0 of patch 0
225 * - per patch attribute 0 of patch 1
228 * Note that every attribute has 4 components.
230 static LLVMValueRef
get_tcs_tes_buffer_address(struct si_shader_context
*ctx
,
231 LLVMValueRef rel_patch_id
,
232 LLVMValueRef vertex_index
,
233 LLVMValueRef param_index
)
235 LLVMValueRef base_addr
, vertices_per_patch
, num_patches
, total_vertices
;
236 LLVMValueRef param_stride
, constant16
;
238 vertices_per_patch
= get_num_tcs_out_vertices(ctx
);
239 num_patches
= si_unpack_param(ctx
, ctx
->tcs_offchip_layout
, 0, 6);
240 total_vertices
= LLVMBuildMul(ctx
->ac
.builder
, vertices_per_patch
,
243 constant16
= LLVMConstInt(ctx
->i32
, 16, 0);
245 base_addr
= ac_build_imad(&ctx
->ac
, rel_patch_id
,
246 vertices_per_patch
, vertex_index
);
247 param_stride
= total_vertices
;
249 base_addr
= rel_patch_id
;
250 param_stride
= num_patches
;
253 base_addr
= ac_build_imad(&ctx
->ac
, param_index
, param_stride
, base_addr
);
254 base_addr
= LLVMBuildMul(ctx
->ac
.builder
, base_addr
, constant16
, "");
257 LLVMValueRef patch_data_offset
=
258 si_unpack_param(ctx
, ctx
->tcs_offchip_layout
, 12, 20);
260 base_addr
= LLVMBuildAdd(ctx
->ac
.builder
, base_addr
,
261 patch_data_offset
, "");
266 static LLVMValueRef
get_tcs_tes_buffer_address_from_generic_indices(
267 struct si_shader_context
*ctx
,
268 LLVMValueRef vertex_index
,
269 LLVMValueRef param_index
,
270 ubyte name
, ubyte index
)
272 unsigned param_index_base
;
274 param_index_base
= name
== TGSI_SEMANTIC_PATCH
||
275 name
== TGSI_SEMANTIC_TESSINNER
||
276 name
== TGSI_SEMANTIC_TESSOUTER
?
277 si_shader_io_get_unique_index_patch(name
, index
) :
278 si_shader_io_get_unique_index(name
, index
, false);
281 param_index
= LLVMBuildAdd(ctx
->ac
.builder
, param_index
,
282 LLVMConstInt(ctx
->i32
, param_index_base
, 0),
285 param_index
= LLVMConstInt(ctx
->i32
, param_index_base
, 0);
288 return get_tcs_tes_buffer_address(ctx
, get_rel_patch_id(ctx
),
289 vertex_index
, param_index
);
292 static LLVMValueRef
buffer_load(struct si_shader_context
*ctx
,
293 LLVMTypeRef type
, unsigned swizzle
,
294 LLVMValueRef buffer
, LLVMValueRef offset
,
295 LLVMValueRef base
, bool can_speculate
)
297 LLVMValueRef value
, value2
;
298 LLVMTypeRef vec_type
= LLVMVectorType(type
, 4);
301 value
= ac_build_buffer_load(&ctx
->ac
, buffer
, 4, NULL
, base
, offset
,
302 0, ac_glc
, can_speculate
, false);
304 return LLVMBuildBitCast(ctx
->ac
.builder
, value
, vec_type
, "");
307 if (ac_get_type_size(type
) != 8) {
308 value
= ac_build_buffer_load(&ctx
->ac
, buffer
, 4, NULL
, base
, offset
,
309 0, ac_glc
, can_speculate
, false);
311 value
= LLVMBuildBitCast(ctx
->ac
.builder
, value
, vec_type
, "");
312 return LLVMBuildExtractElement(ctx
->ac
.builder
, value
,
313 LLVMConstInt(ctx
->i32
, swizzle
, 0), "");
316 value
= ac_build_buffer_load(&ctx
->ac
, buffer
, 1, NULL
, base
, offset
,
317 swizzle
* 4, ac_glc
, can_speculate
, false);
319 value2
= ac_build_buffer_load(&ctx
->ac
, buffer
, 1, NULL
, base
, offset
,
320 swizzle
* 4 + 4, ac_glc
, can_speculate
, false);
322 return si_build_gather_64bit(ctx
, type
, value
, value2
);
326 * Load from LSHS LDS storage.
328 * \param type output value type
329 * \param swizzle offset (typically 0..3); it can be ~0, which loads a vec4
330 * \param dw_addr address in dwords
332 static LLVMValueRef
lshs_lds_load(struct si_shader_context
*ctx
,
333 LLVMTypeRef type
, unsigned swizzle
,
334 LLVMValueRef dw_addr
)
339 LLVMValueRef values
[4];
341 for (unsigned chan
= 0; chan
< 4; chan
++)
342 values
[chan
] = lshs_lds_load(ctx
, type
, chan
, dw_addr
);
344 return ac_build_gather_values(&ctx
->ac
, values
, 4);
347 /* Split 64-bit loads. */
348 if (ac_get_type_size(type
) == 8) {
351 lo
= lshs_lds_load(ctx
, ctx
->i32
, swizzle
, dw_addr
);
352 hi
= lshs_lds_load(ctx
, ctx
->i32
, swizzle
+ 1, dw_addr
);
353 return si_build_gather_64bit(ctx
, type
, lo
, hi
);
356 dw_addr
= LLVMBuildAdd(ctx
->ac
.builder
, dw_addr
,
357 LLVMConstInt(ctx
->i32
, swizzle
, 0), "");
359 value
= ac_lds_load(&ctx
->ac
, dw_addr
);
361 return LLVMBuildBitCast(ctx
->ac
.builder
, value
, type
, "");
365 * Store to LSHS LDS storage.
367 * \param swizzle offset (typically 0..3)
368 * \param dw_addr address in dwords
369 * \param value value to store
371 static void lshs_lds_store(struct si_shader_context
*ctx
,
372 unsigned dw_offset_imm
, LLVMValueRef dw_addr
,
375 dw_addr
= LLVMBuildAdd(ctx
->ac
.builder
, dw_addr
,
376 LLVMConstInt(ctx
->i32
, dw_offset_imm
, 0), "");
378 ac_lds_store(&ctx
->ac
, dw_addr
, value
);
383 TESS_OFFCHIP_RING_TCS
,
384 TESS_OFFCHIP_RING_TES
,
387 static LLVMValueRef
get_tess_ring_descriptor(struct si_shader_context
*ctx
,
388 enum si_tess_ring ring
)
390 LLVMBuilderRef builder
= ctx
->ac
.builder
;
391 LLVMValueRef addr
= ac_get_arg(&ctx
->ac
,
392 ring
== TESS_OFFCHIP_RING_TES
?
393 ctx
->tes_offchip_addr
:
394 ctx
->tcs_out_lds_layout
);
396 /* TCS only receives high 13 bits of the address. */
397 if (ring
== TESS_OFFCHIP_RING_TCS
|| ring
== TCS_FACTOR_RING
) {
398 addr
= LLVMBuildAnd(builder
, addr
,
399 LLVMConstInt(ctx
->i32
, 0xfff80000, 0), "");
402 if (ring
== TCS_FACTOR_RING
) {
403 unsigned tf_offset
= ctx
->screen
->tess_offchip_ring_size
;
404 addr
= LLVMBuildAdd(builder
, addr
,
405 LLVMConstInt(ctx
->i32
, tf_offset
, 0), "");
408 uint32_t rsrc3
= S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X
) |
409 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y
) |
410 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z
) |
411 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W
);
413 if (ctx
->screen
->info
.chip_class
>= GFX10
)
414 rsrc3
|= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT
) |
415 S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW
) |
416 S_008F0C_RESOURCE_LEVEL(1);
418 rsrc3
|= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT
) |
419 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32
);
421 LLVMValueRef desc
[4];
423 desc
[1] = LLVMConstInt(ctx
->i32
,
424 S_008F04_BASE_ADDRESS_HI(ctx
->screen
->info
.address32_hi
), 0);
425 desc
[2] = LLVMConstInt(ctx
->i32
, 0xffffffff, 0);
426 desc
[3] = LLVMConstInt(ctx
->i32
, rsrc3
, false);
428 return ac_build_gather_values(&ctx
->ac
, desc
, 4);
431 void si_llvm_preload_tes_rings(struct si_shader_context
*ctx
)
433 ctx
->tess_offchip_ring
= get_tess_ring_descriptor(ctx
, TESS_OFFCHIP_RING_TES
);
436 static LLVMValueRef
si_nir_load_tcs_varyings(struct ac_shader_abi
*abi
,
438 LLVMValueRef vertex_index
,
439 LLVMValueRef param_index
,
440 unsigned const_index
,
442 unsigned driver_location
,
444 unsigned num_components
,
449 struct si_shader_context
*ctx
= si_shader_context_from_abi(abi
);
450 struct si_shader_info
*info
= &ctx
->shader
->selector
->info
;
451 LLVMValueRef dw_addr
, stride
;
454 driver_location
= driver_location
/ 4;
457 name
= info
->input_semantic_name
[driver_location
];
458 index
= info
->input_semantic_index
[driver_location
];
460 name
= info
->output_semantic_name
[driver_location
];
461 index
= info
->output_semantic_index
[driver_location
];
464 assert((name
== TGSI_SEMANTIC_PATCH
||
465 name
== TGSI_SEMANTIC_TESSINNER
||
466 name
== TGSI_SEMANTIC_TESSOUTER
) == is_patch
);
469 stride
= get_tcs_in_vertex_dw_stride(ctx
);
470 dw_addr
= get_tcs_in_current_patch_offset(ctx
);
474 dw_addr
= get_tcs_out_current_patch_data_offset(ctx
);
476 stride
= get_tcs_out_vertex_dw_stride(ctx
);
477 dw_addr
= get_tcs_out_current_patch_offset(ctx
);
482 param_index
= LLVMConstInt(ctx
->i32
, const_index
, 0);
485 dw_addr
= get_dw_address_from_generic_indices(ctx
, stride
, dw_addr
,
486 vertex_index
, param_index
,
489 LLVMValueRef value
[4];
490 for (unsigned i
= 0; i
< num_components
; i
++) {
492 if (ac_get_type_size(type
) == 8)
496 value
[i
+ component
] = lshs_lds_load(ctx
, type
, offset
, dw_addr
);
499 return ac_build_varying_gather_values(&ctx
->ac
, value
, num_components
, component
);
502 LLVMValueRef
si_nir_load_input_tes(struct ac_shader_abi
*abi
,
504 LLVMValueRef vertex_index
,
505 LLVMValueRef param_index
,
506 unsigned const_index
,
508 unsigned driver_location
,
510 unsigned num_components
,
515 struct si_shader_context
*ctx
= si_shader_context_from_abi(abi
);
516 struct si_shader_info
*info
= &ctx
->shader
->selector
->info
;
517 LLVMValueRef base
, addr
;
519 driver_location
= driver_location
/ 4;
520 ubyte name
= info
->input_semantic_name
[driver_location
];
521 ubyte index
= info
->input_semantic_index
[driver_location
];
523 assert((name
== TGSI_SEMANTIC_PATCH
||
524 name
== TGSI_SEMANTIC_TESSINNER
||
525 name
== TGSI_SEMANTIC_TESSOUTER
) == is_patch
);
527 base
= ac_get_arg(&ctx
->ac
, ctx
->tcs_offchip_offset
);
530 param_index
= LLVMConstInt(ctx
->i32
, const_index
, 0);
533 addr
= get_tcs_tes_buffer_address_from_generic_indices(ctx
, vertex_index
,
537 /* TODO: This will generate rather ordinary llvm code, although it
538 * should be easy for the optimiser to fix up. In future we might want
539 * to refactor buffer_load().
541 LLVMValueRef value
[4];
542 for (unsigned i
= 0; i
< num_components
; i
++) {
544 if (ac_get_type_size(type
) == 8) {
547 ubyte name
= info
->input_semantic_name
[driver_location
+ 1];
548 ubyte index
= info
->input_semantic_index
[driver_location
+ 1];
549 addr
= get_tcs_tes_buffer_address_from_generic_indices(ctx
,
559 value
[i
+ component
] = buffer_load(ctx
, type
, offset
,
560 ctx
->tess_offchip_ring
, base
, addr
, true);
563 return ac_build_varying_gather_values(&ctx
->ac
, value
, num_components
, component
);
566 static void si_nir_store_output_tcs(struct ac_shader_abi
*abi
,
567 const struct nir_variable
*var
,
568 LLVMValueRef vertex_index
,
569 LLVMValueRef param_index
,
570 unsigned const_index
,
574 struct si_shader_context
*ctx
= si_shader_context_from_abi(abi
);
575 struct si_shader_info
*info
= &ctx
->shader
->selector
->info
;
576 const unsigned component
= var
->data
.location_frac
;
577 unsigned driver_location
= var
->data
.driver_location
;
578 LLVMValueRef dw_addr
, stride
;
579 LLVMValueRef buffer
, base
, addr
;
580 LLVMValueRef values
[8];
582 bool is_tess_factor
= false, is_tess_inner
= false;
584 driver_location
= driver_location
/ 4;
585 ubyte name
= info
->output_semantic_name
[driver_location
];
586 ubyte index
= info
->output_semantic_index
[driver_location
];
588 bool is_const
= !param_index
;
590 param_index
= LLVMConstInt(ctx
->i32
, const_index
, 0);
592 const bool is_patch
= var
->data
.patch
||
593 var
->data
.location
== VARYING_SLOT_TESS_LEVEL_INNER
||
594 var
->data
.location
== VARYING_SLOT_TESS_LEVEL_OUTER
;
596 /* Invalid SPIR-V can cause this. */
597 if ((name
== TGSI_SEMANTIC_PATCH
||
598 name
== TGSI_SEMANTIC_TESSINNER
||
599 name
== TGSI_SEMANTIC_TESSOUTER
) != is_patch
)
603 stride
= get_tcs_out_vertex_dw_stride(ctx
);
604 dw_addr
= get_tcs_out_current_patch_offset(ctx
);
605 dw_addr
= get_dw_address_from_generic_indices(ctx
, stride
, dw_addr
,
606 vertex_index
, param_index
,
609 skip_lds_store
= !info
->reads_pervertex_outputs
;
611 dw_addr
= get_tcs_out_current_patch_data_offset(ctx
);
612 dw_addr
= get_dw_address_from_generic_indices(ctx
, NULL
, dw_addr
,
613 vertex_index
, param_index
,
616 skip_lds_store
= !info
->reads_perpatch_outputs
;
618 if (is_const
&& const_index
== 0) {
619 int name
= info
->output_semantic_name
[driver_location
];
621 /* Always write tess factors into LDS for the TCS epilog. */
622 if (name
== TGSI_SEMANTIC_TESSINNER
||
623 name
== TGSI_SEMANTIC_TESSOUTER
) {
624 /* The epilog doesn't read LDS if invocation 0 defines tess factors. */
625 skip_lds_store
= !info
->reads_tessfactor_outputs
&&
626 ctx
->shader
->selector
->info
.tessfactors_are_def_in_all_invocs
;
627 is_tess_factor
= true;
628 is_tess_inner
= name
== TGSI_SEMANTIC_TESSINNER
;
633 buffer
= get_tess_ring_descriptor(ctx
, TESS_OFFCHIP_RING_TCS
);
635 base
= ac_get_arg(&ctx
->ac
, ctx
->tcs_offchip_offset
);
637 addr
= get_tcs_tes_buffer_address_from_generic_indices(ctx
, vertex_index
,
638 param_index
, name
, index
);
640 for (unsigned chan
= component
; chan
< 8; chan
++) {
641 if (!(writemask
& (1 << chan
)))
643 LLVMValueRef value
= ac_llvm_extract_elem(&ctx
->ac
, src
, chan
- component
);
645 unsigned buffer_store_offset
= chan
% 4;
647 ubyte name
= info
->output_semantic_name
[driver_location
+ 1];
648 ubyte index
= info
->output_semantic_index
[driver_location
+ 1];
649 addr
= get_tcs_tes_buffer_address_from_generic_indices(ctx
,
655 /* Skip LDS stores if there is no LDS read of this output. */
657 lshs_lds_store(ctx
, chan
, dw_addr
, value
);
659 value
= ac_to_integer(&ctx
->ac
, value
);
660 values
[chan
] = value
;
662 if (writemask
!= 0xF && !is_tess_factor
) {
663 ac_build_buffer_store_dword(&ctx
->ac
, buffer
, value
, 1,
665 4 * buffer_store_offset
,
669 /* Write tess factors into VGPRs for the epilog. */
670 if (is_tess_factor
&&
671 ctx
->shader
->selector
->info
.tessfactors_are_def_in_all_invocs
) {
672 if (!is_tess_inner
) {
673 LLVMBuildStore(ctx
->ac
.builder
, value
, /* outer */
674 ctx
->invoc0_tess_factors
[chan
]);
675 } else if (chan
< 2) {
676 LLVMBuildStore(ctx
->ac
.builder
, value
, /* inner */
677 ctx
->invoc0_tess_factors
[4 + chan
]);
682 if (writemask
== 0xF && !is_tess_factor
) {
683 LLVMValueRef value
= ac_build_gather_values(&ctx
->ac
,
685 ac_build_buffer_store_dword(&ctx
->ac
, buffer
, value
, 4, addr
,
690 static LLVMValueRef
si_load_tess_coord(struct ac_shader_abi
*abi
)
692 struct si_shader_context
*ctx
= si_shader_context_from_abi(abi
);
693 LLVMValueRef coord
[4] = {
694 ac_get_arg(&ctx
->ac
, ctx
->tes_u
),
695 ac_get_arg(&ctx
->ac
, ctx
->tes_v
),
700 /* For triangles, the vector should be (u, v, 1-u-v). */
701 if (ctx
->shader
->selector
->info
.properties
[TGSI_PROPERTY_TES_PRIM_MODE
] ==
702 PIPE_PRIM_TRIANGLES
) {
703 coord
[2] = LLVMBuildFSub(ctx
->ac
.builder
, ctx
->ac
.f32_1
,
704 LLVMBuildFAdd(ctx
->ac
.builder
,
705 coord
[0], coord
[1], ""), "");
707 return ac_build_gather_values(&ctx
->ac
, coord
, 4);
710 static LLVMValueRef
load_tess_level(struct si_shader_context
*ctx
,
711 unsigned semantic_name
)
713 LLVMValueRef base
, addr
;
715 int param
= si_shader_io_get_unique_index_patch(semantic_name
, 0);
717 base
= ac_get_arg(&ctx
->ac
, ctx
->tcs_offchip_offset
);
718 addr
= get_tcs_tes_buffer_address(ctx
, get_rel_patch_id(ctx
), NULL
,
719 LLVMConstInt(ctx
->i32
, param
, 0));
721 return buffer_load(ctx
, ctx
->f32
,
722 ~0, ctx
->tess_offchip_ring
, base
, addr
, true);
726 static LLVMValueRef
load_tess_level_default(struct si_shader_context
*ctx
,
727 unsigned semantic_name
)
729 LLVMValueRef buf
, slot
, val
[4];
732 slot
= LLVMConstInt(ctx
->i32
, SI_HS_CONST_DEFAULT_TESS_LEVELS
, 0);
733 buf
= ac_get_arg(&ctx
->ac
, ctx
->rw_buffers
);
734 buf
= ac_build_load_to_sgpr(&ctx
->ac
, buf
, slot
);
735 offset
= semantic_name
== TGSI_SEMANTIC_TESS_DEFAULT_INNER_LEVEL
? 4 : 0;
737 for (i
= 0; i
< 4; i
++)
738 val
[i
] = si_buffer_load_const(ctx
, buf
,
739 LLVMConstInt(ctx
->i32
, (offset
+ i
) * 4, 0));
740 return ac_build_gather_values(&ctx
->ac
, val
, 4);
743 static LLVMValueRef
si_load_tess_level(struct ac_shader_abi
*abi
,
745 bool load_default_state
)
747 struct si_shader_context
*ctx
= si_shader_context_from_abi(abi
);
748 unsigned semantic_name
;
750 if (load_default_state
) {
751 switch (varying_id
) {
752 case VARYING_SLOT_TESS_LEVEL_INNER
:
753 semantic_name
= TGSI_SEMANTIC_TESS_DEFAULT_INNER_LEVEL
;
755 case VARYING_SLOT_TESS_LEVEL_OUTER
:
756 semantic_name
= TGSI_SEMANTIC_TESS_DEFAULT_OUTER_LEVEL
;
759 unreachable("unknown tess level");
761 return load_tess_level_default(ctx
, semantic_name
);
764 switch (varying_id
) {
765 case VARYING_SLOT_TESS_LEVEL_INNER
:
766 semantic_name
= TGSI_SEMANTIC_TESSINNER
;
768 case VARYING_SLOT_TESS_LEVEL_OUTER
:
769 semantic_name
= TGSI_SEMANTIC_TESSOUTER
;
772 unreachable("unknown tess level");
775 return load_tess_level(ctx
, semantic_name
);
779 static LLVMValueRef
si_load_patch_vertices_in(struct ac_shader_abi
*abi
)
781 struct si_shader_context
*ctx
= si_shader_context_from_abi(abi
);
782 if (ctx
->type
== PIPE_SHADER_TESS_CTRL
)
783 return si_unpack_param(ctx
, ctx
->tcs_out_lds_layout
, 13, 6);
784 else if (ctx
->type
== PIPE_SHADER_TESS_EVAL
)
785 return get_num_tcs_out_vertices(ctx
);
787 unreachable("invalid shader stage for TGSI_SEMANTIC_VERTICESIN");
791 * Forward all outputs from the vertex shader to the TES. This is only used
792 * for the fixed function TCS.
794 static void si_copy_tcs_inputs(struct si_shader_context
*ctx
)
796 LLVMValueRef invocation_id
, buffer
, buffer_offset
;
797 LLVMValueRef lds_vertex_stride
, lds_base
;
800 invocation_id
= si_unpack_param(ctx
, ctx
->args
.tcs_rel_ids
, 8, 5);
801 buffer
= get_tess_ring_descriptor(ctx
, TESS_OFFCHIP_RING_TCS
);
802 buffer_offset
= ac_get_arg(&ctx
->ac
, ctx
->tcs_offchip_offset
);
804 lds_vertex_stride
= get_tcs_in_vertex_dw_stride(ctx
);
805 lds_base
= get_tcs_in_current_patch_offset(ctx
);
806 lds_base
= ac_build_imad(&ctx
->ac
, invocation_id
, lds_vertex_stride
,
809 inputs
= ctx
->shader
->key
.mono
.u
.ff_tcs_inputs_to_copy
;
811 unsigned i
= u_bit_scan64(&inputs
);
813 LLVMValueRef lds_ptr
= LLVMBuildAdd(ctx
->ac
.builder
, lds_base
,
814 LLVMConstInt(ctx
->i32
, 4 * i
, 0),
817 LLVMValueRef buffer_addr
= get_tcs_tes_buffer_address(ctx
,
818 get_rel_patch_id(ctx
),
820 LLVMConstInt(ctx
->i32
, i
, 0));
822 LLVMValueRef value
= lshs_lds_load(ctx
, ctx
->ac
.i32
, ~0, lds_ptr
);
824 ac_build_buffer_store_dword(&ctx
->ac
, buffer
, value
, 4, buffer_addr
,
825 buffer_offset
, 0, ac_glc
);
829 static void si_write_tess_factors(struct si_shader_context
*ctx
,
830 LLVMValueRef rel_patch_id
,
831 LLVMValueRef invocation_id
,
832 LLVMValueRef tcs_out_current_patch_data_offset
,
833 LLVMValueRef invoc0_tf_outer
[4],
834 LLVMValueRef invoc0_tf_inner
[2])
836 struct si_shader
*shader
= ctx
->shader
;
837 unsigned tess_inner_index
, tess_outer_index
;
838 LLVMValueRef lds_base
, lds_inner
, lds_outer
, byteoffset
, buffer
;
839 LLVMValueRef out
[6], vec0
, vec1
, tf_base
, inner
[4], outer
[4];
840 unsigned stride
, outer_comps
, inner_comps
, i
, offset
;
842 /* Add a barrier before loading tess factors from LDS. */
843 if (!shader
->key
.part
.tcs
.epilog
.invoc0_tess_factors_are_def
)
844 si_llvm_emit_barrier(ctx
);
846 /* Do this only for invocation 0, because the tess levels are per-patch,
849 * This can't jump, because invocation 0 executes this. It should
850 * at least mask out the loads and stores for other invocations.
852 ac_build_ifcc(&ctx
->ac
,
853 LLVMBuildICmp(ctx
->ac
.builder
, LLVMIntEQ
,
854 invocation_id
, ctx
->i32_0
, ""), 6503);
856 /* Determine the layout of one tess factor element in the buffer. */
857 switch (shader
->key
.part
.tcs
.epilog
.prim_mode
) {
858 case PIPE_PRIM_LINES
:
859 stride
= 2; /* 2 dwords, 1 vec2 store */
863 case PIPE_PRIM_TRIANGLES
:
864 stride
= 4; /* 4 dwords, 1 vec4 store */
868 case PIPE_PRIM_QUADS
:
869 stride
= 6; /* 6 dwords, 2 stores (vec4 + vec2) */
878 for (i
= 0; i
< 4; i
++) {
879 inner
[i
] = LLVMGetUndef(ctx
->i32
);
880 outer
[i
] = LLVMGetUndef(ctx
->i32
);
883 if (shader
->key
.part
.tcs
.epilog
.invoc0_tess_factors_are_def
) {
884 /* Tess factors are in VGPRs. */
885 for (i
= 0; i
< outer_comps
; i
++)
886 outer
[i
] = out
[i
] = invoc0_tf_outer
[i
];
887 for (i
= 0; i
< inner_comps
; i
++)
888 inner
[i
] = out
[outer_comps
+i
] = invoc0_tf_inner
[i
];
890 /* Load tess_inner and tess_outer from LDS.
891 * Any invocation can write them, so we can't get them from a temporary.
893 tess_inner_index
= si_shader_io_get_unique_index_patch(TGSI_SEMANTIC_TESSINNER
, 0);
894 tess_outer_index
= si_shader_io_get_unique_index_patch(TGSI_SEMANTIC_TESSOUTER
, 0);
896 lds_base
= tcs_out_current_patch_data_offset
;
897 lds_inner
= LLVMBuildAdd(ctx
->ac
.builder
, lds_base
,
898 LLVMConstInt(ctx
->i32
,
899 tess_inner_index
* 4, 0), "");
900 lds_outer
= LLVMBuildAdd(ctx
->ac
.builder
, lds_base
,
901 LLVMConstInt(ctx
->i32
,
902 tess_outer_index
* 4, 0), "");
904 for (i
= 0; i
< outer_comps
; i
++) {
906 lshs_lds_load(ctx
, ctx
->ac
.i32
, i
, lds_outer
);
908 for (i
= 0; i
< inner_comps
; i
++) {
909 inner
[i
] = out
[outer_comps
+i
] =
910 lshs_lds_load(ctx
, ctx
->ac
.i32
, i
, lds_inner
);
914 if (shader
->key
.part
.tcs
.epilog
.prim_mode
== PIPE_PRIM_LINES
) {
915 /* For isolines, the hardware expects tess factors in the
916 * reverse order from what NIR specifies.
918 LLVMValueRef tmp
= out
[0];
923 /* Convert the outputs to vectors for stores. */
924 vec0
= ac_build_gather_values(&ctx
->ac
, out
, MIN2(stride
, 4));
928 vec1
= ac_build_gather_values(&ctx
->ac
, out
+4, stride
- 4);
930 /* Get the buffer. */
931 buffer
= get_tess_ring_descriptor(ctx
, TCS_FACTOR_RING
);
933 /* Get the offset. */
934 tf_base
= ac_get_arg(&ctx
->ac
,
935 ctx
->tcs_factor_offset
);
936 byteoffset
= LLVMBuildMul(ctx
->ac
.builder
, rel_patch_id
,
937 LLVMConstInt(ctx
->i32
, 4 * stride
, 0), "");
939 ac_build_ifcc(&ctx
->ac
,
940 LLVMBuildICmp(ctx
->ac
.builder
, LLVMIntEQ
,
941 rel_patch_id
, ctx
->i32_0
, ""), 6504);
943 /* Store the dynamic HS control word. */
945 if (ctx
->screen
->info
.chip_class
<= GFX8
) {
946 ac_build_buffer_store_dword(&ctx
->ac
, buffer
,
947 LLVMConstInt(ctx
->i32
, 0x80000000, 0),
948 1, ctx
->i32_0
, tf_base
,
953 ac_build_endif(&ctx
->ac
, 6504);
955 /* Store the tessellation factors. */
956 ac_build_buffer_store_dword(&ctx
->ac
, buffer
, vec0
,
957 MIN2(stride
, 4), byteoffset
, tf_base
,
961 ac_build_buffer_store_dword(&ctx
->ac
, buffer
, vec1
,
962 stride
- 4, byteoffset
, tf_base
,
965 /* Store the tess factors into the offchip buffer if TES reads them. */
966 if (shader
->key
.part
.tcs
.epilog
.tes_reads_tess_factors
) {
967 LLVMValueRef buf
, base
, inner_vec
, outer_vec
, tf_outer_offset
;
968 LLVMValueRef tf_inner_offset
;
969 unsigned param_outer
, param_inner
;
971 buf
= get_tess_ring_descriptor(ctx
, TESS_OFFCHIP_RING_TCS
);
972 base
= ac_get_arg(&ctx
->ac
, ctx
->tcs_offchip_offset
);
974 param_outer
= si_shader_io_get_unique_index_patch(
975 TGSI_SEMANTIC_TESSOUTER
, 0);
976 tf_outer_offset
= get_tcs_tes_buffer_address(ctx
, rel_patch_id
, NULL
,
977 LLVMConstInt(ctx
->i32
, param_outer
, 0));
979 unsigned outer_vec_size
=
980 ac_has_vec3_support(ctx
->screen
->info
.chip_class
, false) ?
981 outer_comps
: util_next_power_of_two(outer_comps
);
982 outer_vec
= ac_build_gather_values(&ctx
->ac
, outer
, outer_vec_size
);
984 ac_build_buffer_store_dword(&ctx
->ac
, buf
, outer_vec
,
985 outer_comps
, tf_outer_offset
,
988 param_inner
= si_shader_io_get_unique_index_patch(
989 TGSI_SEMANTIC_TESSINNER
, 0);
990 tf_inner_offset
= get_tcs_tes_buffer_address(ctx
, rel_patch_id
, NULL
,
991 LLVMConstInt(ctx
->i32
, param_inner
, 0));
993 inner_vec
= inner_comps
== 1 ? inner
[0] :
994 ac_build_gather_values(&ctx
->ac
, inner
, inner_comps
);
995 ac_build_buffer_store_dword(&ctx
->ac
, buf
, inner_vec
,
996 inner_comps
, tf_inner_offset
,
1001 ac_build_endif(&ctx
->ac
, 6503);
1004 /* This only writes the tessellation factor levels. */
1005 static void si_llvm_emit_tcs_epilogue(struct ac_shader_abi
*abi
,
1006 unsigned max_outputs
,
1007 LLVMValueRef
*addrs
)
1009 struct si_shader_context
*ctx
= si_shader_context_from_abi(abi
);
1010 LLVMBuilderRef builder
= ctx
->ac
.builder
;
1011 LLVMValueRef rel_patch_id
, invocation_id
, tf_lds_offset
;
1013 si_copy_tcs_inputs(ctx
);
1015 rel_patch_id
= get_rel_patch_id(ctx
);
1016 invocation_id
= si_unpack_param(ctx
, ctx
->args
.tcs_rel_ids
, 8, 5);
1017 tf_lds_offset
= get_tcs_out_current_patch_data_offset(ctx
);
1019 if (ctx
->screen
->info
.chip_class
>= GFX9
) {
1020 LLVMBasicBlockRef blocks
[2] = {
1021 LLVMGetInsertBlock(builder
),
1022 ctx
->merged_wrap_if_entry_block
1024 LLVMValueRef values
[2];
1026 ac_build_endif(&ctx
->ac
, ctx
->merged_wrap_if_label
);
1028 values
[0] = rel_patch_id
;
1029 values
[1] = LLVMGetUndef(ctx
->i32
);
1030 rel_patch_id
= ac_build_phi(&ctx
->ac
, ctx
->i32
, 2, values
, blocks
);
1032 values
[0] = tf_lds_offset
;
1033 values
[1] = LLVMGetUndef(ctx
->i32
);
1034 tf_lds_offset
= ac_build_phi(&ctx
->ac
, ctx
->i32
, 2, values
, blocks
);
1036 values
[0] = invocation_id
;
1037 values
[1] = ctx
->i32_1
; /* cause the epilog to skip threads */
1038 invocation_id
= ac_build_phi(&ctx
->ac
, ctx
->i32
, 2, values
, blocks
);
1041 /* Return epilog parameters from this function. */
1042 LLVMValueRef ret
= ctx
->return_value
;
1045 if (ctx
->screen
->info
.chip_class
>= GFX9
) {
1046 ret
= si_insert_input_ret(ctx
, ret
, ctx
->tcs_offchip_layout
,
1047 8 + GFX9_SGPR_TCS_OFFCHIP_LAYOUT
);
1048 ret
= si_insert_input_ret(ctx
, ret
, ctx
->tcs_out_lds_layout
,
1049 8 + GFX9_SGPR_TCS_OUT_LAYOUT
);
1050 /* Tess offchip and tess factor offsets are at the beginning. */
1051 ret
= si_insert_input_ret(ctx
, ret
, ctx
->tcs_offchip_offset
, 2);
1052 ret
= si_insert_input_ret(ctx
, ret
, ctx
->tcs_factor_offset
, 4);
1053 vgpr
= 8 + GFX9_SGPR_TCS_OUT_LAYOUT
+ 1;
1055 ret
= si_insert_input_ret(ctx
, ret
, ctx
->tcs_offchip_layout
,
1056 GFX6_SGPR_TCS_OFFCHIP_LAYOUT
);
1057 ret
= si_insert_input_ret(ctx
, ret
, ctx
->tcs_out_lds_layout
,
1058 GFX6_SGPR_TCS_OUT_LAYOUT
);
1059 /* Tess offchip and tess factor offsets are after user SGPRs. */
1060 ret
= si_insert_input_ret(ctx
, ret
, ctx
->tcs_offchip_offset
,
1061 GFX6_TCS_NUM_USER_SGPR
);
1062 ret
= si_insert_input_ret(ctx
, ret
, ctx
->tcs_factor_offset
,
1063 GFX6_TCS_NUM_USER_SGPR
+ 1);
1064 vgpr
= GFX6_TCS_NUM_USER_SGPR
+ 2;
1068 rel_patch_id
= ac_to_float(&ctx
->ac
, rel_patch_id
);
1069 invocation_id
= ac_to_float(&ctx
->ac
, invocation_id
);
1070 tf_lds_offset
= ac_to_float(&ctx
->ac
, tf_lds_offset
);
1072 /* Leave a hole corresponding to the two input VGPRs. This ensures that
1073 * the invocation_id output does not alias the tcs_rel_ids input,
1074 * which saves a V_MOV on gfx9.
1078 ret
= LLVMBuildInsertValue(builder
, ret
, rel_patch_id
, vgpr
++, "");
1079 ret
= LLVMBuildInsertValue(builder
, ret
, invocation_id
, vgpr
++, "");
1081 if (ctx
->shader
->selector
->info
.tessfactors_are_def_in_all_invocs
) {
1082 vgpr
++; /* skip the tess factor LDS offset */
1083 for (unsigned i
= 0; i
< 6; i
++) {
1084 LLVMValueRef value
=
1085 LLVMBuildLoad(builder
, ctx
->invoc0_tess_factors
[i
], "");
1086 value
= ac_to_float(&ctx
->ac
, value
);
1087 ret
= LLVMBuildInsertValue(builder
, ret
, value
, vgpr
++, "");
1090 ret
= LLVMBuildInsertValue(builder
, ret
, tf_lds_offset
, vgpr
++, "");
1092 ctx
->return_value
= ret
;
1095 /* Pass TCS inputs from LS to TCS on GFX9. */
1096 static void si_set_ls_return_value_for_tcs(struct si_shader_context
*ctx
)
1098 LLVMValueRef ret
= ctx
->return_value
;
1100 ret
= si_insert_input_ptr(ctx
, ret
, ctx
->other_const_and_shader_buffers
, 0);
1101 ret
= si_insert_input_ptr(ctx
, ret
, ctx
->other_samplers_and_images
, 1);
1102 ret
= si_insert_input_ret(ctx
, ret
, ctx
->tcs_offchip_offset
, 2);
1103 ret
= si_insert_input_ret(ctx
, ret
, ctx
->merged_wave_info
, 3);
1104 ret
= si_insert_input_ret(ctx
, ret
, ctx
->tcs_factor_offset
, 4);
1105 ret
= si_insert_input_ret(ctx
, ret
, ctx
->merged_scratch_offset
, 5);
1107 ret
= si_insert_input_ptr(ctx
, ret
, ctx
->rw_buffers
,
1108 8 + SI_SGPR_RW_BUFFERS
);
1109 ret
= si_insert_input_ptr(ctx
, ret
,
1110 ctx
->bindless_samplers_and_images
,
1111 8 + SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES
);
1113 ret
= si_insert_input_ret(ctx
, ret
, ctx
->vs_state_bits
,
1114 8 + SI_SGPR_VS_STATE_BITS
);
1116 ret
= si_insert_input_ret(ctx
, ret
, ctx
->tcs_offchip_layout
,
1117 8 + GFX9_SGPR_TCS_OFFCHIP_LAYOUT
);
1118 ret
= si_insert_input_ret(ctx
, ret
, ctx
->tcs_out_lds_offsets
,
1119 8 + GFX9_SGPR_TCS_OUT_OFFSETS
);
1120 ret
= si_insert_input_ret(ctx
, ret
, ctx
->tcs_out_lds_layout
,
1121 8 + GFX9_SGPR_TCS_OUT_LAYOUT
);
1123 unsigned vgpr
= 8 + GFX9_TCS_NUM_USER_SGPR
;
1124 ret
= LLVMBuildInsertValue(ctx
->ac
.builder
, ret
,
1125 ac_to_float(&ctx
->ac
,
1126 ac_get_arg(&ctx
->ac
, ctx
->args
.tcs_patch_id
)),
1128 ret
= LLVMBuildInsertValue(ctx
->ac
.builder
, ret
,
1129 ac_to_float(&ctx
->ac
,
1130 ac_get_arg(&ctx
->ac
, ctx
->args
.tcs_rel_ids
)),
1132 ctx
->return_value
= ret
;
1135 void si_llvm_emit_ls_epilogue(struct ac_shader_abi
*abi
, unsigned max_outputs
,
1136 LLVMValueRef
*addrs
)
1138 struct si_shader_context
*ctx
= si_shader_context_from_abi(abi
);
1139 struct si_shader
*shader
= ctx
->shader
;
1140 struct si_shader_info
*info
= &shader
->selector
->info
;
1142 LLVMValueRef vertex_id
= ac_get_arg(&ctx
->ac
, ctx
->rel_auto_id
);
1143 LLVMValueRef vertex_dw_stride
= get_tcs_in_vertex_dw_stride(ctx
);
1144 LLVMValueRef base_dw_addr
= LLVMBuildMul(ctx
->ac
.builder
, vertex_id
,
1145 vertex_dw_stride
, "");
1147 /* Write outputs to LDS. The next shader (TCS aka HS) will read
1148 * its inputs from it. */
1149 for (i
= 0; i
< info
->num_outputs
; i
++) {
1150 unsigned name
= info
->output_semantic_name
[i
];
1151 unsigned index
= info
->output_semantic_index
[i
];
1153 /* The ARB_shader_viewport_layer_array spec contains the
1156 * 2) What happens if gl_ViewportIndex or gl_Layer is
1157 * written in the vertex shader and a geometry shader is
1160 * RESOLVED: The value written by the last vertex processing
1161 * stage is used. If the last vertex processing stage
1162 * (vertex, tessellation evaluation or geometry) does not
1163 * statically assign to gl_ViewportIndex or gl_Layer, index
1164 * or layer zero is assumed.
1166 * So writes to those outputs in VS-as-LS are simply ignored.
1168 if (name
== TGSI_SEMANTIC_LAYER
||
1169 name
== TGSI_SEMANTIC_VIEWPORT_INDEX
)
1172 int param
= si_shader_io_get_unique_index(name
, index
, false);
1173 LLVMValueRef dw_addr
= LLVMBuildAdd(ctx
->ac
.builder
, base_dw_addr
,
1174 LLVMConstInt(ctx
->i32
, param
* 4, 0), "");
1176 for (chan
= 0; chan
< 4; chan
++) {
1177 if (!(info
->output_usagemask
[i
] & (1 << chan
)))
1180 lshs_lds_store(ctx
, chan
, dw_addr
,
1181 LLVMBuildLoad(ctx
->ac
.builder
, addrs
[4 * i
+ chan
], ""));
1185 if (ctx
->screen
->info
.chip_class
>= GFX9
)
1186 si_set_ls_return_value_for_tcs(ctx
);
1190 * Compile the TCS epilog function. This writes tesselation factors to memory
1191 * based on the output primitive type of the tesselator (determined by TES).
1193 void si_llvm_build_tcs_epilog(struct si_shader_context
*ctx
,
1194 union si_shader_part_key
*key
)
1196 memset(&ctx
->args
, 0, sizeof(ctx
->args
));
1198 if (ctx
->screen
->info
.chip_class
>= GFX9
) {
1199 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1200 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1201 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
,
1202 &ctx
->tcs_offchip_offset
);
1203 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
); /* wave info */
1204 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
,
1205 &ctx
->tcs_factor_offset
);
1206 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1207 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1208 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1209 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1210 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1211 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1212 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1213 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1214 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1215 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1216 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1217 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
,
1218 &ctx
->tcs_offchip_layout
);
1219 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1220 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
,
1221 &ctx
->tcs_out_lds_layout
);
1223 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1224 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1225 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1226 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1227 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
,
1228 &ctx
->tcs_offchip_layout
);
1229 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1230 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
,
1231 &ctx
->tcs_out_lds_layout
);
1232 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1233 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
,
1234 &ctx
->tcs_offchip_offset
);
1235 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
,
1236 &ctx
->tcs_factor_offset
);
1239 ac_add_arg(&ctx
->args
, AC_ARG_VGPR
, 1, AC_ARG_INT
, NULL
); /* VGPR gap */
1240 ac_add_arg(&ctx
->args
, AC_ARG_VGPR
, 1, AC_ARG_INT
, NULL
); /* VGPR gap */
1241 struct ac_arg rel_patch_id
; /* patch index within the wave (REL_PATCH_ID) */
1242 ac_add_arg(&ctx
->args
, AC_ARG_VGPR
, 1, AC_ARG_INT
, &rel_patch_id
);
1243 struct ac_arg invocation_id
; /* invocation ID within the patch */
1244 ac_add_arg(&ctx
->args
, AC_ARG_VGPR
, 1, AC_ARG_INT
, &invocation_id
);
1245 struct ac_arg tcs_out_current_patch_data_offset
; /* LDS offset where tess factors should be loaded from */
1246 ac_add_arg(&ctx
->args
, AC_ARG_VGPR
, 1, AC_ARG_INT
,
1247 &tcs_out_current_patch_data_offset
);
1249 struct ac_arg tess_factors
[6];
1250 for (unsigned i
= 0; i
< 6; i
++)
1251 ac_add_arg(&ctx
->args
, AC_ARG_VGPR
, 1, AC_ARG_INT
, &tess_factors
[i
]);
1253 /* Create the function. */
1254 si_llvm_create_func(ctx
, "tcs_epilog", NULL
, 0,
1255 ctx
->screen
->info
.chip_class
>= GFX7
? 128 : 0);
1256 ac_declare_lds_as_pointer(&ctx
->ac
);
1258 LLVMValueRef invoc0_tess_factors
[6];
1259 for (unsigned i
= 0; i
< 6; i
++)
1260 invoc0_tess_factors
[i
] = ac_get_arg(&ctx
->ac
, tess_factors
[i
]);
1262 si_write_tess_factors(ctx
,
1263 ac_get_arg(&ctx
->ac
, rel_patch_id
),
1264 ac_get_arg(&ctx
->ac
, invocation_id
),
1265 ac_get_arg(&ctx
->ac
, tcs_out_current_patch_data_offset
),
1266 invoc0_tess_factors
, invoc0_tess_factors
+ 4);
1268 LLVMBuildRetVoid(ctx
->ac
.builder
);
1271 void si_llvm_init_tcs_callbacks(struct si_shader_context
*ctx
)
1273 ctx
->abi
.load_tess_varyings
= si_nir_load_tcs_varyings
;
1274 ctx
->abi
.load_tess_level
= si_load_tess_level
;
1275 ctx
->abi
.store_tcs_outputs
= si_nir_store_output_tcs
;
1276 ctx
->abi
.emit_outputs
= si_llvm_emit_tcs_epilogue
;
1277 ctx
->abi
.load_patch_vertices_in
= si_load_patch_vertices_in
;
1280 void si_llvm_init_tes_callbacks(struct si_shader_context
*ctx
)
1282 ctx
->abi
.load_tess_varyings
= si_nir_load_input_tes
;
1283 ctx
->abi
.load_tess_coord
= si_load_tess_coord
;
1284 ctx
->abi
.load_tess_level
= si_load_tess_level
;
1285 ctx
->abi
.load_patch_vertices_in
= si_load_patch_vertices_in
;