2 * Copyright 2020 Advanced Micro Devices, Inc.
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
26 #include "si_shader_internal.h"
29 static LLVMValueRef
get_rel_patch_id(struct si_shader_context
*ctx
)
32 case PIPE_SHADER_TESS_CTRL
:
33 return si_unpack_param(ctx
, ctx
->args
.tcs_rel_ids
, 0, 8);
35 case PIPE_SHADER_TESS_EVAL
:
36 return ac_get_arg(&ctx
->ac
, ctx
->tes_rel_patch_id
);
44 /* Tessellation shaders pass outputs to the next shader using LDS.
46 * LS outputs = TCS inputs
47 * TCS outputs = TES inputs
50 * - TCS inputs for patch 0
51 * - TCS inputs for patch 1
52 * - TCS inputs for patch 2 = get_tcs_in_current_patch_offset (if RelPatchID==2)
54 * - TCS outputs for patch 0 = get_tcs_out_patch0_offset
55 * - Per-patch TCS outputs for patch 0 = get_tcs_out_patch0_patch_data_offset
56 * - TCS outputs for patch 1
57 * - Per-patch TCS outputs for patch 1
58 * - TCS outputs for patch 2 = get_tcs_out_current_patch_offset (if RelPatchID==2)
59 * - Per-patch TCS outputs for patch 2 = get_tcs_out_current_patch_data_offset (if RelPatchID==2)
62 * All three shaders VS(LS), TCS, TES share the same LDS space.
65 static LLVMValueRef
get_tcs_in_patch_stride(struct si_shader_context
*ctx
)
67 return si_unpack_param(ctx
, ctx
->vs_state_bits
, 11, 13);
70 static unsigned get_tcs_out_vertex_dw_stride_constant(struct si_shader_context
*ctx
)
72 assert(ctx
->type
== PIPE_SHADER_TESS_CTRL
);
74 if (ctx
->shader
->key
.mono
.u
.ff_tcs_inputs_to_copy
)
75 return util_last_bit64(ctx
->shader
->key
.mono
.u
.ff_tcs_inputs_to_copy
) * 4;
77 return util_last_bit64(ctx
->shader
->selector
->outputs_written
) * 4;
80 static LLVMValueRef
get_tcs_out_vertex_dw_stride(struct si_shader_context
*ctx
)
82 unsigned stride
= get_tcs_out_vertex_dw_stride_constant(ctx
);
84 return LLVMConstInt(ctx
->ac
.i32
, stride
, 0);
87 static LLVMValueRef
get_tcs_out_patch_stride(struct si_shader_context
*ctx
)
89 if (ctx
->shader
->key
.mono
.u
.ff_tcs_inputs_to_copy
)
90 return si_unpack_param(ctx
, ctx
->tcs_out_lds_layout
, 0, 13);
92 const struct si_shader_info
*info
= &ctx
->shader
->selector
->info
;
93 unsigned tcs_out_vertices
= info
->properties
[TGSI_PROPERTY_TCS_VERTICES_OUT
];
94 unsigned vertex_dw_stride
= get_tcs_out_vertex_dw_stride_constant(ctx
);
95 unsigned num_patch_outputs
= util_last_bit64(ctx
->shader
->selector
->patch_outputs_written
);
96 unsigned patch_dw_stride
= tcs_out_vertices
* vertex_dw_stride
+ num_patch_outputs
* 4;
97 return LLVMConstInt(ctx
->ac
.i32
, patch_dw_stride
, 0);
100 static LLVMValueRef
get_tcs_out_patch0_offset(struct si_shader_context
*ctx
)
102 return LLVMBuildMul(ctx
->ac
.builder
, si_unpack_param(ctx
, ctx
->tcs_out_lds_offsets
, 0, 16),
103 LLVMConstInt(ctx
->ac
.i32
, 4, 0), "");
106 static LLVMValueRef
get_tcs_out_patch0_patch_data_offset(struct si_shader_context
*ctx
)
108 return LLVMBuildMul(ctx
->ac
.builder
, si_unpack_param(ctx
, ctx
->tcs_out_lds_offsets
, 16, 16),
109 LLVMConstInt(ctx
->ac
.i32
, 4, 0), "");
112 static LLVMValueRef
get_tcs_in_current_patch_offset(struct si_shader_context
*ctx
)
114 LLVMValueRef patch_stride
= get_tcs_in_patch_stride(ctx
);
115 LLVMValueRef rel_patch_id
= get_rel_patch_id(ctx
);
117 return LLVMBuildMul(ctx
->ac
.builder
, patch_stride
, rel_patch_id
, "");
120 static LLVMValueRef
get_tcs_out_current_patch_offset(struct si_shader_context
*ctx
)
122 LLVMValueRef patch0_offset
= get_tcs_out_patch0_offset(ctx
);
123 LLVMValueRef patch_stride
= get_tcs_out_patch_stride(ctx
);
124 LLVMValueRef rel_patch_id
= get_rel_patch_id(ctx
);
126 return ac_build_imad(&ctx
->ac
, patch_stride
, rel_patch_id
, patch0_offset
);
129 static LLVMValueRef
get_tcs_out_current_patch_data_offset(struct si_shader_context
*ctx
)
131 LLVMValueRef patch0_patch_data_offset
= get_tcs_out_patch0_patch_data_offset(ctx
);
132 LLVMValueRef patch_stride
= get_tcs_out_patch_stride(ctx
);
133 LLVMValueRef rel_patch_id
= get_rel_patch_id(ctx
);
135 return ac_build_imad(&ctx
->ac
, patch_stride
, rel_patch_id
, patch0_patch_data_offset
);
138 static LLVMValueRef
get_num_tcs_out_vertices(struct si_shader_context
*ctx
)
140 unsigned tcs_out_vertices
=
141 ctx
->shader
->selector
? ctx
->shader
->selector
->info
.properties
[TGSI_PROPERTY_TCS_VERTICES_OUT
]
144 /* If !tcs_out_vertices, it's either the fixed-func TCS or the TCS epilog. */
145 if (ctx
->type
== PIPE_SHADER_TESS_CTRL
&& tcs_out_vertices
)
146 return LLVMConstInt(ctx
->ac
.i32
, tcs_out_vertices
, 0);
148 return si_unpack_param(ctx
, ctx
->tcs_offchip_layout
, 6, 6);
151 static LLVMValueRef
get_tcs_in_vertex_dw_stride(struct si_shader_context
*ctx
)
156 case PIPE_SHADER_VERTEX
:
157 stride
= ctx
->shader
->selector
->lshs_vertex_stride
/ 4;
158 return LLVMConstInt(ctx
->ac
.i32
, stride
, 0);
160 case PIPE_SHADER_TESS_CTRL
:
161 if (ctx
->screen
->info
.chip_class
>= GFX9
&& ctx
->shader
->is_monolithic
) {
162 stride
= ctx
->shader
->key
.part
.tcs
.ls
->lshs_vertex_stride
/ 4;
163 return LLVMConstInt(ctx
->ac
.i32
, stride
, 0);
165 return si_unpack_param(ctx
, ctx
->vs_state_bits
, 24, 8);
174 get_dw_address_from_generic_indices(struct si_shader_context
*ctx
, LLVMValueRef vertex_dw_stride
,
175 LLVMValueRef base_addr
, LLVMValueRef vertex_index
,
176 LLVMValueRef param_index
, ubyte name
, ubyte index
)
178 if (vertex_dw_stride
) {
179 base_addr
= ac_build_imad(&ctx
->ac
, vertex_index
, vertex_dw_stride
, base_addr
);
183 base_addr
= ac_build_imad(&ctx
->ac
, param_index
, LLVMConstInt(ctx
->ac
.i32
, 4, 0), base_addr
);
186 int param
= name
== TGSI_SEMANTIC_PATCH
|| name
== TGSI_SEMANTIC_TESSINNER
||
187 name
== TGSI_SEMANTIC_TESSOUTER
188 ? si_shader_io_get_unique_index_patch(name
, index
)
189 : si_shader_io_get_unique_index(name
, index
, false);
191 /* Add the base address of the element. */
192 return LLVMBuildAdd(ctx
->ac
.builder
, base_addr
, LLVMConstInt(ctx
->ac
.i32
, param
* 4, 0), "");
195 /* The offchip buffer layout for TCS->TES is
197 * - attribute 0 of patch 0 vertex 0
198 * - attribute 0 of patch 0 vertex 1
199 * - attribute 0 of patch 0 vertex 2
201 * - attribute 0 of patch 1 vertex 0
202 * - attribute 0 of patch 1 vertex 1
204 * - attribute 1 of patch 0 vertex 0
205 * - attribute 1 of patch 0 vertex 1
207 * - per patch attribute 0 of patch 0
208 * - per patch attribute 0 of patch 1
211 * Note that every attribute has 4 components.
213 static LLVMValueRef
get_tcs_tes_buffer_address(struct si_shader_context
*ctx
,
214 LLVMValueRef rel_patch_id
, LLVMValueRef vertex_index
,
215 LLVMValueRef param_index
)
217 LLVMValueRef base_addr
, vertices_per_patch
, num_patches
, total_vertices
;
218 LLVMValueRef param_stride
, constant16
;
220 vertices_per_patch
= get_num_tcs_out_vertices(ctx
);
221 num_patches
= si_unpack_param(ctx
, ctx
->tcs_offchip_layout
, 0, 6);
222 total_vertices
= LLVMBuildMul(ctx
->ac
.builder
, vertices_per_patch
, num_patches
, "");
224 constant16
= LLVMConstInt(ctx
->ac
.i32
, 16, 0);
226 base_addr
= ac_build_imad(&ctx
->ac
, rel_patch_id
, vertices_per_patch
, vertex_index
);
227 param_stride
= total_vertices
;
229 base_addr
= rel_patch_id
;
230 param_stride
= num_patches
;
233 base_addr
= ac_build_imad(&ctx
->ac
, param_index
, param_stride
, base_addr
);
234 base_addr
= LLVMBuildMul(ctx
->ac
.builder
, base_addr
, constant16
, "");
237 LLVMValueRef patch_data_offset
= si_unpack_param(ctx
, ctx
->tcs_offchip_layout
, 12, 20);
239 base_addr
= LLVMBuildAdd(ctx
->ac
.builder
, base_addr
, patch_data_offset
, "");
244 static LLVMValueRef
get_tcs_tes_buffer_address_from_generic_indices(struct si_shader_context
*ctx
,
245 LLVMValueRef vertex_index
,
246 LLVMValueRef param_index
,
247 ubyte name
, ubyte index
)
249 unsigned param_index_base
;
251 param_index_base
= name
== TGSI_SEMANTIC_PATCH
|| name
== TGSI_SEMANTIC_TESSINNER
||
252 name
== TGSI_SEMANTIC_TESSOUTER
253 ? si_shader_io_get_unique_index_patch(name
, index
)
254 : si_shader_io_get_unique_index(name
, index
, false);
257 param_index
= LLVMBuildAdd(ctx
->ac
.builder
, param_index
,
258 LLVMConstInt(ctx
->ac
.i32
, param_index_base
, 0), "");
260 param_index
= LLVMConstInt(ctx
->ac
.i32
, param_index_base
, 0);
263 return get_tcs_tes_buffer_address(ctx
, get_rel_patch_id(ctx
), vertex_index
, param_index
);
266 static LLVMValueRef
buffer_load(struct si_shader_context
*ctx
, LLVMTypeRef type
, unsigned swizzle
,
267 LLVMValueRef buffer
, LLVMValueRef offset
, LLVMValueRef base
,
270 LLVMValueRef value
, value2
;
271 LLVMTypeRef vec_type
= LLVMVectorType(type
, 4);
274 value
= ac_build_buffer_load(&ctx
->ac
, buffer
, 4, NULL
, base
, offset
, 0, ac_glc
,
275 can_speculate
, false);
277 return LLVMBuildBitCast(ctx
->ac
.builder
, value
, vec_type
, "");
280 if (ac_get_type_size(type
) != 8) {
281 value
= ac_build_buffer_load(&ctx
->ac
, buffer
, 4, NULL
, base
, offset
, 0, ac_glc
,
282 can_speculate
, false);
284 value
= LLVMBuildBitCast(ctx
->ac
.builder
, value
, vec_type
, "");
285 return LLVMBuildExtractElement(ctx
->ac
.builder
, value
, LLVMConstInt(ctx
->ac
.i32
, swizzle
, 0),
289 value
= ac_build_buffer_load(&ctx
->ac
, buffer
, 1, NULL
, base
, offset
, swizzle
* 4, ac_glc
,
290 can_speculate
, false);
292 value2
= ac_build_buffer_load(&ctx
->ac
, buffer
, 1, NULL
, base
, offset
, swizzle
* 4 + 4, ac_glc
,
293 can_speculate
, false);
295 return si_build_gather_64bit(ctx
, type
, value
, value2
);
299 * Load from LSHS LDS storage.
301 * \param type output value type
302 * \param swizzle offset (typically 0..3); it can be ~0, which loads a vec4
303 * \param dw_addr address in dwords
305 static LLVMValueRef
lshs_lds_load(struct si_shader_context
*ctx
, LLVMTypeRef type
, unsigned swizzle
,
306 LLVMValueRef dw_addr
)
311 LLVMValueRef values
[4];
313 for (unsigned chan
= 0; chan
< 4; chan
++)
314 values
[chan
] = lshs_lds_load(ctx
, type
, chan
, dw_addr
);
316 return ac_build_gather_values(&ctx
->ac
, values
, 4);
319 /* Split 64-bit loads. */
320 if (ac_get_type_size(type
) == 8) {
323 lo
= lshs_lds_load(ctx
, ctx
->ac
.i32
, swizzle
, dw_addr
);
324 hi
= lshs_lds_load(ctx
, ctx
->ac
.i32
, swizzle
+ 1, dw_addr
);
325 return si_build_gather_64bit(ctx
, type
, lo
, hi
);
328 dw_addr
= LLVMBuildAdd(ctx
->ac
.builder
, dw_addr
, LLVMConstInt(ctx
->ac
.i32
, swizzle
, 0), "");
330 value
= ac_lds_load(&ctx
->ac
, dw_addr
);
332 return LLVMBuildBitCast(ctx
->ac
.builder
, value
, type
, "");
336 * Store to LSHS LDS storage.
338 * \param swizzle offset (typically 0..3)
339 * \param dw_addr address in dwords
340 * \param value value to store
342 static void lshs_lds_store(struct si_shader_context
*ctx
, unsigned dw_offset_imm
,
343 LLVMValueRef dw_addr
, LLVMValueRef value
)
346 LLVMBuildAdd(ctx
->ac
.builder
, dw_addr
, LLVMConstInt(ctx
->ac
.i32
, dw_offset_imm
, 0), "");
348 ac_lds_store(&ctx
->ac
, dw_addr
, value
);
354 TESS_OFFCHIP_RING_TCS
,
355 TESS_OFFCHIP_RING_TES
,
358 static LLVMValueRef
get_tess_ring_descriptor(struct si_shader_context
*ctx
, enum si_tess_ring ring
)
360 LLVMBuilderRef builder
= ctx
->ac
.builder
;
361 LLVMValueRef addr
= ac_get_arg(
362 &ctx
->ac
, ring
== TESS_OFFCHIP_RING_TES
? ctx
->tes_offchip_addr
: ctx
->tcs_out_lds_layout
);
364 /* TCS only receives high 13 bits of the address. */
365 if (ring
== TESS_OFFCHIP_RING_TCS
|| ring
== TCS_FACTOR_RING
) {
366 addr
= LLVMBuildAnd(builder
, addr
, LLVMConstInt(ctx
->ac
.i32
, 0xfff80000, 0), "");
369 if (ring
== TCS_FACTOR_RING
) {
370 unsigned tf_offset
= ctx
->screen
->tess_offchip_ring_size
;
371 addr
= LLVMBuildAdd(builder
, addr
, LLVMConstInt(ctx
->ac
.i32
, tf_offset
, 0), "");
374 uint32_t rsrc3
= S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X
) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y
) |
375 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z
) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W
);
377 if (ctx
->screen
->info
.chip_class
>= GFX10
)
378 rsrc3
|= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT
) |
379 S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW
) | S_008F0C_RESOURCE_LEVEL(1);
381 rsrc3
|= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT
) |
382 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32
);
384 LLVMValueRef desc
[4];
386 desc
[1] = LLVMConstInt(ctx
->ac
.i32
, S_008F04_BASE_ADDRESS_HI(ctx
->screen
->info
.address32_hi
), 0);
387 desc
[2] = LLVMConstInt(ctx
->ac
.i32
, 0xffffffff, 0);
388 desc
[3] = LLVMConstInt(ctx
->ac
.i32
, rsrc3
, false);
390 return ac_build_gather_values(&ctx
->ac
, desc
, 4);
393 void si_llvm_preload_tes_rings(struct si_shader_context
*ctx
)
395 ctx
->tess_offchip_ring
= get_tess_ring_descriptor(ctx
, TESS_OFFCHIP_RING_TES
);
398 static LLVMValueRef
si_nir_load_tcs_varyings(struct ac_shader_abi
*abi
, LLVMTypeRef type
,
399 LLVMValueRef vertex_index
, LLVMValueRef param_index
,
400 unsigned const_index
, unsigned location
,
401 unsigned driver_location
, unsigned component
,
402 unsigned num_components
, bool unused
,
403 bool is_compact
, bool load_input
)
405 struct si_shader_context
*ctx
= si_shader_context_from_abi(abi
);
406 struct si_shader_info
*info
= &ctx
->shader
->selector
->info
;
407 LLVMValueRef dw_addr
, stride
;
410 driver_location
= driver_location
/ 4;
413 name
= info
->input_semantic_name
[driver_location
];
414 index
= info
->input_semantic_index
[driver_location
];
416 name
= info
->output_semantic_name
[driver_location
];
417 index
= info
->output_semantic_index
[driver_location
];
420 bool is_patch
= vertex_index
== NULL
;
421 assert((name
== TGSI_SEMANTIC_PATCH
|| name
== TGSI_SEMANTIC_TESSINNER
||
422 name
== TGSI_SEMANTIC_TESSOUTER
) == is_patch
);
425 stride
= get_tcs_in_vertex_dw_stride(ctx
);
426 dw_addr
= get_tcs_in_current_patch_offset(ctx
);
430 dw_addr
= get_tcs_out_current_patch_data_offset(ctx
);
432 stride
= get_tcs_out_vertex_dw_stride(ctx
);
433 dw_addr
= get_tcs_out_current_patch_offset(ctx
);
438 param_index
= LLVMConstInt(ctx
->ac
.i32
, const_index
, 0);
441 dw_addr
= get_dw_address_from_generic_indices(ctx
, stride
, dw_addr
, vertex_index
, param_index
,
444 LLVMValueRef value
[4];
445 for (unsigned i
= 0; i
< num_components
; i
++) {
447 if (ac_get_type_size(type
) == 8)
451 value
[i
+ component
] = lshs_lds_load(ctx
, type
, offset
, dw_addr
);
454 return ac_build_varying_gather_values(&ctx
->ac
, value
, num_components
, component
);
457 static LLVMValueRef
si_nir_load_input_tes(struct ac_shader_abi
*abi
, LLVMTypeRef type
,
458 LLVMValueRef vertex_index
, LLVMValueRef param_index
,
459 unsigned const_index
, unsigned location
,
460 unsigned driver_location
, unsigned component
,
461 unsigned num_components
, bool unused
, bool is_compact
,
464 struct si_shader_context
*ctx
= si_shader_context_from_abi(abi
);
465 struct si_shader_info
*info
= &ctx
->shader
->selector
->info
;
466 LLVMValueRef base
, addr
;
468 driver_location
= driver_location
/ 4;
469 ubyte name
= info
->input_semantic_name
[driver_location
];
470 ubyte index
= info
->input_semantic_index
[driver_location
];
472 assert((name
== TGSI_SEMANTIC_PATCH
|| name
== TGSI_SEMANTIC_TESSINNER
||
473 name
== TGSI_SEMANTIC_TESSOUTER
) == (vertex_index
== NULL
));
475 base
= ac_get_arg(&ctx
->ac
, ctx
->tcs_offchip_offset
);
478 param_index
= LLVMConstInt(ctx
->ac
.i32
, const_index
, 0);
482 get_tcs_tes_buffer_address_from_generic_indices(ctx
, vertex_index
, param_index
, name
, index
);
484 /* TODO: This will generate rather ordinary llvm code, although it
485 * should be easy for the optimiser to fix up. In future we might want
486 * to refactor buffer_load().
488 LLVMValueRef value
[4];
489 for (unsigned i
= 0; i
< num_components
; i
++) {
491 if (ac_get_type_size(type
) == 8) {
494 ubyte name
= info
->input_semantic_name
[driver_location
+ 1];
495 ubyte index
= info
->input_semantic_index
[driver_location
+ 1];
496 addr
= get_tcs_tes_buffer_address_from_generic_indices(ctx
, vertex_index
, param_index
,
504 value
[i
+ component
] =
505 buffer_load(ctx
, type
, offset
, ctx
->tess_offchip_ring
, base
, addr
, true);
508 return ac_build_varying_gather_values(&ctx
->ac
, value
, num_components
, component
);
511 static void si_nir_store_output_tcs(struct ac_shader_abi
*abi
, const struct nir_variable
*var
,
512 LLVMValueRef vertex_index
, LLVMValueRef param_index
,
513 unsigned const_index
, LLVMValueRef src
, unsigned writemask
,
514 unsigned component
, unsigned driver_location
)
516 struct si_shader_context
*ctx
= si_shader_context_from_abi(abi
);
517 struct si_shader_info
*info
= &ctx
->shader
->selector
->info
;
518 LLVMValueRef dw_addr
, stride
;
519 LLVMValueRef buffer
, base
, addr
;
520 LLVMValueRef values
[8];
521 bool is_tess_factor
= false, is_tess_inner
= false;
523 driver_location
= driver_location
/ 4;
524 ubyte name
= info
->output_semantic_name
[driver_location
];
525 ubyte index
= info
->output_semantic_index
[driver_location
];
527 bool is_const
= !param_index
;
529 param_index
= LLVMConstInt(ctx
->ac
.i32
, const_index
, 0);
531 const bool is_patch
= vertex_index
== NULL
;
533 /* Invalid SPIR-V can cause this. */
534 if ((name
== TGSI_SEMANTIC_PATCH
|| name
== TGSI_SEMANTIC_TESSINNER
||
535 name
== TGSI_SEMANTIC_TESSOUTER
) != is_patch
)
539 stride
= get_tcs_out_vertex_dw_stride(ctx
);
540 dw_addr
= get_tcs_out_current_patch_offset(ctx
);
541 dw_addr
= get_dw_address_from_generic_indices(ctx
, stride
, dw_addr
, vertex_index
, param_index
,
544 dw_addr
= get_tcs_out_current_patch_data_offset(ctx
);
545 dw_addr
= get_dw_address_from_generic_indices(ctx
, NULL
, dw_addr
, vertex_index
, param_index
,
548 if (is_const
&& const_index
== 0) {
549 int name
= info
->output_semantic_name
[driver_location
];
551 /* Always write tess factors into LDS for the TCS epilog. */
552 if (name
== TGSI_SEMANTIC_TESSINNER
|| name
== TGSI_SEMANTIC_TESSOUTER
) {
553 is_tess_factor
= true;
554 is_tess_inner
= name
== TGSI_SEMANTIC_TESSINNER
;
559 buffer
= get_tess_ring_descriptor(ctx
, TESS_OFFCHIP_RING_TCS
);
561 base
= ac_get_arg(&ctx
->ac
, ctx
->tcs_offchip_offset
);
564 get_tcs_tes_buffer_address_from_generic_indices(ctx
, vertex_index
, param_index
, name
, index
);
566 for (unsigned chan
= component
; chan
< 8; chan
++) {
567 if (!(writemask
& (1 << chan
)))
569 LLVMValueRef value
= ac_llvm_extract_elem(&ctx
->ac
, src
, chan
- component
);
571 unsigned buffer_store_offset
= chan
% 4;
573 ubyte name
= info
->output_semantic_name
[driver_location
+ 1];
574 ubyte index
= info
->output_semantic_index
[driver_location
+ 1];
575 addr
= get_tcs_tes_buffer_address_from_generic_indices(ctx
, vertex_index
, param_index
,
579 /* Skip LDS stores if there is no LDS read of this output. */
580 if (info
->output_readmask
[driver_location
+ chan
/ 4] & (1 << (chan
% 4)) ||
581 /* The epilog reads LDS if invocation 0 doesn't define tess factors. */
583 !ctx
->shader
->selector
->info
.tessfactors_are_def_in_all_invocs
))
584 lshs_lds_store(ctx
, chan
, dw_addr
, value
);
586 value
= ac_to_integer(&ctx
->ac
, value
);
587 values
[chan
] = value
;
589 if (writemask
!= 0xF && !is_tess_factor
) {
590 ac_build_buffer_store_dword(&ctx
->ac
, buffer
, value
, 1, addr
, base
,
591 4 * buffer_store_offset
, ac_glc
);
594 /* Write tess factors into VGPRs for the epilog. */
595 if (is_tess_factor
&& ctx
->shader
->selector
->info
.tessfactors_are_def_in_all_invocs
) {
596 if (!is_tess_inner
) {
597 LLVMBuildStore(ctx
->ac
.builder
, value
, /* outer */
598 ctx
->invoc0_tess_factors
[chan
]);
599 } else if (chan
< 2) {
600 LLVMBuildStore(ctx
->ac
.builder
, value
, /* inner */
601 ctx
->invoc0_tess_factors
[4 + chan
]);
606 if (writemask
== 0xF && !is_tess_factor
) {
607 LLVMValueRef value
= ac_build_gather_values(&ctx
->ac
, values
, 4);
608 ac_build_buffer_store_dword(&ctx
->ac
, buffer
, value
, 4, addr
, base
, 0, ac_glc
);
612 static LLVMValueRef
si_load_tess_coord(struct ac_shader_abi
*abi
)
614 struct si_shader_context
*ctx
= si_shader_context_from_abi(abi
);
615 LLVMValueRef coord
[4] = {ac_get_arg(&ctx
->ac
, ctx
->tes_u
), ac_get_arg(&ctx
->ac
, ctx
->tes_v
),
616 ctx
->ac
.f32_0
, ctx
->ac
.f32_0
};
618 /* For triangles, the vector should be (u, v, 1-u-v). */
619 if (ctx
->shader
->selector
->info
.properties
[TGSI_PROPERTY_TES_PRIM_MODE
] == PIPE_PRIM_TRIANGLES
) {
620 coord
[2] = LLVMBuildFSub(ctx
->ac
.builder
, ctx
->ac
.f32_1
,
621 LLVMBuildFAdd(ctx
->ac
.builder
, coord
[0], coord
[1], ""), "");
623 return ac_build_gather_values(&ctx
->ac
, coord
, 4);
626 static LLVMValueRef
load_tess_level(struct si_shader_context
*ctx
, unsigned semantic_name
)
628 LLVMValueRef base
, addr
;
630 int param
= si_shader_io_get_unique_index_patch(semantic_name
, 0);
632 base
= ac_get_arg(&ctx
->ac
, ctx
->tcs_offchip_offset
);
633 addr
= get_tcs_tes_buffer_address(ctx
, get_rel_patch_id(ctx
), NULL
,
634 LLVMConstInt(ctx
->ac
.i32
, param
, 0));
636 return buffer_load(ctx
, ctx
->ac
.f32
, ~0, ctx
->tess_offchip_ring
, base
, addr
, true);
639 static LLVMValueRef
load_tess_level_default(struct si_shader_context
*ctx
, unsigned semantic_name
)
641 LLVMValueRef buf
, slot
, val
[4];
644 slot
= LLVMConstInt(ctx
->ac
.i32
, SI_HS_CONST_DEFAULT_TESS_LEVELS
, 0);
645 buf
= ac_get_arg(&ctx
->ac
, ctx
->rw_buffers
);
646 buf
= ac_build_load_to_sgpr(&ctx
->ac
, buf
, slot
);
647 offset
= semantic_name
== TGSI_SEMANTIC_TESS_DEFAULT_INNER_LEVEL
? 4 : 0;
649 for (i
= 0; i
< 4; i
++)
650 val
[i
] = si_buffer_load_const(ctx
, buf
, LLVMConstInt(ctx
->ac
.i32
, (offset
+ i
) * 4, 0));
651 return ac_build_gather_values(&ctx
->ac
, val
, 4);
654 static LLVMValueRef
si_load_tess_level(struct ac_shader_abi
*abi
, unsigned varying_id
,
655 bool load_default_state
)
657 struct si_shader_context
*ctx
= si_shader_context_from_abi(abi
);
658 unsigned semantic_name
;
660 if (load_default_state
) {
661 switch (varying_id
) {
662 case VARYING_SLOT_TESS_LEVEL_INNER
:
663 semantic_name
= TGSI_SEMANTIC_TESS_DEFAULT_INNER_LEVEL
;
665 case VARYING_SLOT_TESS_LEVEL_OUTER
:
666 semantic_name
= TGSI_SEMANTIC_TESS_DEFAULT_OUTER_LEVEL
;
669 unreachable("unknown tess level");
671 return load_tess_level_default(ctx
, semantic_name
);
674 switch (varying_id
) {
675 case VARYING_SLOT_TESS_LEVEL_INNER
:
676 semantic_name
= TGSI_SEMANTIC_TESSINNER
;
678 case VARYING_SLOT_TESS_LEVEL_OUTER
:
679 semantic_name
= TGSI_SEMANTIC_TESSOUTER
;
682 unreachable("unknown tess level");
685 return load_tess_level(ctx
, semantic_name
);
688 static LLVMValueRef
si_load_patch_vertices_in(struct ac_shader_abi
*abi
)
690 struct si_shader_context
*ctx
= si_shader_context_from_abi(abi
);
691 if (ctx
->type
== PIPE_SHADER_TESS_CTRL
)
692 return si_unpack_param(ctx
, ctx
->tcs_out_lds_layout
, 13, 6);
693 else if (ctx
->type
== PIPE_SHADER_TESS_EVAL
)
694 return get_num_tcs_out_vertices(ctx
);
696 unreachable("invalid shader stage for TGSI_SEMANTIC_VERTICESIN");
700 * Forward all outputs from the vertex shader to the TES. This is only used
701 * for the fixed function TCS.
703 static void si_copy_tcs_inputs(struct si_shader_context
*ctx
)
705 LLVMValueRef invocation_id
, buffer
, buffer_offset
;
706 LLVMValueRef lds_vertex_stride
, lds_base
;
709 invocation_id
= si_unpack_param(ctx
, ctx
->args
.tcs_rel_ids
, 8, 5);
710 buffer
= get_tess_ring_descriptor(ctx
, TESS_OFFCHIP_RING_TCS
);
711 buffer_offset
= ac_get_arg(&ctx
->ac
, ctx
->tcs_offchip_offset
);
713 lds_vertex_stride
= get_tcs_in_vertex_dw_stride(ctx
);
714 lds_base
= get_tcs_in_current_patch_offset(ctx
);
715 lds_base
= ac_build_imad(&ctx
->ac
, invocation_id
, lds_vertex_stride
, lds_base
);
717 inputs
= ctx
->shader
->key
.mono
.u
.ff_tcs_inputs_to_copy
;
719 unsigned i
= u_bit_scan64(&inputs
);
721 LLVMValueRef lds_ptr
=
722 LLVMBuildAdd(ctx
->ac
.builder
, lds_base
, LLVMConstInt(ctx
->ac
.i32
, 4 * i
, 0), "");
724 LLVMValueRef buffer_addr
= get_tcs_tes_buffer_address(
725 ctx
, get_rel_patch_id(ctx
), invocation_id
, LLVMConstInt(ctx
->ac
.i32
, i
, 0));
727 LLVMValueRef value
= lshs_lds_load(ctx
, ctx
->ac
.i32
, ~0, lds_ptr
);
729 ac_build_buffer_store_dword(&ctx
->ac
, buffer
, value
, 4, buffer_addr
, buffer_offset
, 0,
734 static void si_write_tess_factors(struct si_shader_context
*ctx
, LLVMValueRef rel_patch_id
,
735 LLVMValueRef invocation_id
,
736 LLVMValueRef tcs_out_current_patch_data_offset
,
737 LLVMValueRef invoc0_tf_outer
[4], LLVMValueRef invoc0_tf_inner
[2])
739 struct si_shader
*shader
= ctx
->shader
;
740 unsigned tess_inner_index
, tess_outer_index
;
741 LLVMValueRef lds_base
, lds_inner
, lds_outer
, byteoffset
, buffer
;
742 LLVMValueRef out
[6], vec0
, vec1
, tf_base
, inner
[4], outer
[4];
743 unsigned stride
, outer_comps
, inner_comps
, i
, offset
;
745 /* Add a barrier before loading tess factors from LDS. */
746 if (!shader
->key
.part
.tcs
.epilog
.invoc0_tess_factors_are_def
)
747 si_llvm_emit_barrier(ctx
);
749 /* Do this only for invocation 0, because the tess levels are per-patch,
752 * This can't jump, because invocation 0 executes this. It should
753 * at least mask out the loads and stores for other invocations.
755 ac_build_ifcc(&ctx
->ac
,
756 LLVMBuildICmp(ctx
->ac
.builder
, LLVMIntEQ
, invocation_id
, ctx
->ac
.i32_0
, ""), 6503);
758 /* Determine the layout of one tess factor element in the buffer. */
759 switch (shader
->key
.part
.tcs
.epilog
.prim_mode
) {
760 case PIPE_PRIM_LINES
:
761 stride
= 2; /* 2 dwords, 1 vec2 store */
765 case PIPE_PRIM_TRIANGLES
:
766 stride
= 4; /* 4 dwords, 1 vec4 store */
770 case PIPE_PRIM_QUADS
:
771 stride
= 6; /* 6 dwords, 2 stores (vec4 + vec2) */
780 for (i
= 0; i
< 4; i
++) {
781 inner
[i
] = LLVMGetUndef(ctx
->ac
.i32
);
782 outer
[i
] = LLVMGetUndef(ctx
->ac
.i32
);
785 if (shader
->key
.part
.tcs
.epilog
.invoc0_tess_factors_are_def
) {
786 /* Tess factors are in VGPRs. */
787 for (i
= 0; i
< outer_comps
; i
++)
788 outer
[i
] = out
[i
] = invoc0_tf_outer
[i
];
789 for (i
= 0; i
< inner_comps
; i
++)
790 inner
[i
] = out
[outer_comps
+ i
] = invoc0_tf_inner
[i
];
792 /* Load tess_inner and tess_outer from LDS.
793 * Any invocation can write them, so we can't get them from a temporary.
795 tess_inner_index
= si_shader_io_get_unique_index_patch(TGSI_SEMANTIC_TESSINNER
, 0);
796 tess_outer_index
= si_shader_io_get_unique_index_patch(TGSI_SEMANTIC_TESSOUTER
, 0);
798 lds_base
= tcs_out_current_patch_data_offset
;
799 lds_inner
= LLVMBuildAdd(ctx
->ac
.builder
, lds_base
,
800 LLVMConstInt(ctx
->ac
.i32
, tess_inner_index
* 4, 0), "");
801 lds_outer
= LLVMBuildAdd(ctx
->ac
.builder
, lds_base
,
802 LLVMConstInt(ctx
->ac
.i32
, tess_outer_index
* 4, 0), "");
804 for (i
= 0; i
< outer_comps
; i
++) {
805 outer
[i
] = out
[i
] = lshs_lds_load(ctx
, ctx
->ac
.i32
, i
, lds_outer
);
807 for (i
= 0; i
< inner_comps
; i
++) {
808 inner
[i
] = out
[outer_comps
+ i
] = lshs_lds_load(ctx
, ctx
->ac
.i32
, i
, lds_inner
);
812 if (shader
->key
.part
.tcs
.epilog
.prim_mode
== PIPE_PRIM_LINES
) {
813 /* For isolines, the hardware expects tess factors in the
814 * reverse order from what NIR specifies.
816 LLVMValueRef tmp
= out
[0];
821 /* Convert the outputs to vectors for stores. */
822 vec0
= ac_build_gather_values(&ctx
->ac
, out
, MIN2(stride
, 4));
826 vec1
= ac_build_gather_values(&ctx
->ac
, out
+ 4, stride
- 4);
828 /* Get the buffer. */
829 buffer
= get_tess_ring_descriptor(ctx
, TCS_FACTOR_RING
);
831 /* Get the offset. */
832 tf_base
= ac_get_arg(&ctx
->ac
, ctx
->tcs_factor_offset
);
834 LLVMBuildMul(ctx
->ac
.builder
, rel_patch_id
, LLVMConstInt(ctx
->ac
.i32
, 4 * stride
, 0), "");
836 ac_build_ifcc(&ctx
->ac
,
837 LLVMBuildICmp(ctx
->ac
.builder
, LLVMIntEQ
, rel_patch_id
, ctx
->ac
.i32_0
, ""), 6504);
839 /* Store the dynamic HS control word. */
841 if (ctx
->screen
->info
.chip_class
<= GFX8
) {
842 ac_build_buffer_store_dword(&ctx
->ac
, buffer
, LLVMConstInt(ctx
->ac
.i32
, 0x80000000, 0), 1,
843 ctx
->ac
.i32_0
, tf_base
, offset
, ac_glc
);
847 ac_build_endif(&ctx
->ac
, 6504);
849 /* Store the tessellation factors. */
850 ac_build_buffer_store_dword(&ctx
->ac
, buffer
, vec0
, MIN2(stride
, 4), byteoffset
, tf_base
, offset
,
854 ac_build_buffer_store_dword(&ctx
->ac
, buffer
, vec1
, stride
- 4, byteoffset
, tf_base
, offset
,
857 /* Store the tess factors into the offchip buffer if TES reads them. */
858 if (shader
->key
.part
.tcs
.epilog
.tes_reads_tess_factors
) {
859 LLVMValueRef buf
, base
, inner_vec
, outer_vec
, tf_outer_offset
;
860 LLVMValueRef tf_inner_offset
;
861 unsigned param_outer
, param_inner
;
863 buf
= get_tess_ring_descriptor(ctx
, TESS_OFFCHIP_RING_TCS
);
864 base
= ac_get_arg(&ctx
->ac
, ctx
->tcs_offchip_offset
);
866 param_outer
= si_shader_io_get_unique_index_patch(TGSI_SEMANTIC_TESSOUTER
, 0);
867 tf_outer_offset
= get_tcs_tes_buffer_address(ctx
, rel_patch_id
, NULL
,
868 LLVMConstInt(ctx
->ac
.i32
, param_outer
, 0));
870 unsigned outer_vec_size
= ac_has_vec3_support(ctx
->screen
->info
.chip_class
, false)
872 : util_next_power_of_two(outer_comps
);
873 outer_vec
= ac_build_gather_values(&ctx
->ac
, outer
, outer_vec_size
);
875 ac_build_buffer_store_dword(&ctx
->ac
, buf
, outer_vec
, outer_comps
, tf_outer_offset
, base
, 0,
878 param_inner
= si_shader_io_get_unique_index_patch(TGSI_SEMANTIC_TESSINNER
, 0);
879 tf_inner_offset
= get_tcs_tes_buffer_address(ctx
, rel_patch_id
, NULL
,
880 LLVMConstInt(ctx
->ac
.i32
, param_inner
, 0));
883 inner_comps
== 1 ? inner
[0] : ac_build_gather_values(&ctx
->ac
, inner
, inner_comps
);
884 ac_build_buffer_store_dword(&ctx
->ac
, buf
, inner_vec
, inner_comps
, tf_inner_offset
, base
,
889 ac_build_endif(&ctx
->ac
, 6503);
892 /* This only writes the tessellation factor levels. */
893 static void si_llvm_emit_tcs_epilogue(struct ac_shader_abi
*abi
, unsigned max_outputs
,
896 struct si_shader_context
*ctx
= si_shader_context_from_abi(abi
);
897 LLVMBuilderRef builder
= ctx
->ac
.builder
;
898 LLVMValueRef rel_patch_id
, invocation_id
, tf_lds_offset
;
900 si_copy_tcs_inputs(ctx
);
902 rel_patch_id
= get_rel_patch_id(ctx
);
903 invocation_id
= si_unpack_param(ctx
, ctx
->args
.tcs_rel_ids
, 8, 5);
904 tf_lds_offset
= get_tcs_out_current_patch_data_offset(ctx
);
906 if (ctx
->screen
->info
.chip_class
>= GFX9
) {
907 LLVMBasicBlockRef blocks
[2] = {LLVMGetInsertBlock(builder
), ctx
->merged_wrap_if_entry_block
};
908 LLVMValueRef values
[2];
910 ac_build_endif(&ctx
->ac
, ctx
->merged_wrap_if_label
);
912 values
[0] = rel_patch_id
;
913 values
[1] = LLVMGetUndef(ctx
->ac
.i32
);
914 rel_patch_id
= ac_build_phi(&ctx
->ac
, ctx
->ac
.i32
, 2, values
, blocks
);
916 values
[0] = tf_lds_offset
;
917 values
[1] = LLVMGetUndef(ctx
->ac
.i32
);
918 tf_lds_offset
= ac_build_phi(&ctx
->ac
, ctx
->ac
.i32
, 2, values
, blocks
);
920 values
[0] = invocation_id
;
921 values
[1] = ctx
->ac
.i32_1
; /* cause the epilog to skip threads */
922 invocation_id
= ac_build_phi(&ctx
->ac
, ctx
->ac
.i32
, 2, values
, blocks
);
925 /* Return epilog parameters from this function. */
926 LLVMValueRef ret
= ctx
->return_value
;
929 if (ctx
->screen
->info
.chip_class
>= GFX9
) {
931 si_insert_input_ret(ctx
, ret
, ctx
->tcs_offchip_layout
, 8 + GFX9_SGPR_TCS_OFFCHIP_LAYOUT
);
932 ret
= si_insert_input_ret(ctx
, ret
, ctx
->tcs_out_lds_layout
, 8 + GFX9_SGPR_TCS_OUT_LAYOUT
);
933 /* Tess offchip and tess factor offsets are at the beginning. */
934 ret
= si_insert_input_ret(ctx
, ret
, ctx
->tcs_offchip_offset
, 2);
935 ret
= si_insert_input_ret(ctx
, ret
, ctx
->tcs_factor_offset
, 4);
936 vgpr
= 8 + GFX9_SGPR_TCS_OUT_LAYOUT
+ 1;
938 ret
= si_insert_input_ret(ctx
, ret
, ctx
->tcs_offchip_layout
, GFX6_SGPR_TCS_OFFCHIP_LAYOUT
);
939 ret
= si_insert_input_ret(ctx
, ret
, ctx
->tcs_out_lds_layout
, GFX6_SGPR_TCS_OUT_LAYOUT
);
940 /* Tess offchip and tess factor offsets are after user SGPRs. */
941 ret
= si_insert_input_ret(ctx
, ret
, ctx
->tcs_offchip_offset
, GFX6_TCS_NUM_USER_SGPR
);
942 ret
= si_insert_input_ret(ctx
, ret
, ctx
->tcs_factor_offset
, GFX6_TCS_NUM_USER_SGPR
+ 1);
943 vgpr
= GFX6_TCS_NUM_USER_SGPR
+ 2;
947 rel_patch_id
= ac_to_float(&ctx
->ac
, rel_patch_id
);
948 invocation_id
= ac_to_float(&ctx
->ac
, invocation_id
);
949 tf_lds_offset
= ac_to_float(&ctx
->ac
, tf_lds_offset
);
951 /* Leave a hole corresponding to the two input VGPRs. This ensures that
952 * the invocation_id output does not alias the tcs_rel_ids input,
953 * which saves a V_MOV on gfx9.
957 ret
= LLVMBuildInsertValue(builder
, ret
, rel_patch_id
, vgpr
++, "");
958 ret
= LLVMBuildInsertValue(builder
, ret
, invocation_id
, vgpr
++, "");
960 if (ctx
->shader
->selector
->info
.tessfactors_are_def_in_all_invocs
) {
961 vgpr
++; /* skip the tess factor LDS offset */
962 for (unsigned i
= 0; i
< 6; i
++) {
963 LLVMValueRef value
= LLVMBuildLoad(builder
, ctx
->invoc0_tess_factors
[i
], "");
964 value
= ac_to_float(&ctx
->ac
, value
);
965 ret
= LLVMBuildInsertValue(builder
, ret
, value
, vgpr
++, "");
968 ret
= LLVMBuildInsertValue(builder
, ret
, tf_lds_offset
, vgpr
++, "");
970 ctx
->return_value
= ret
;
973 /* Pass TCS inputs from LS to TCS on GFX9. */
974 static void si_set_ls_return_value_for_tcs(struct si_shader_context
*ctx
)
976 LLVMValueRef ret
= ctx
->return_value
;
978 ret
= si_insert_input_ptr(ctx
, ret
, ctx
->other_const_and_shader_buffers
, 0);
979 ret
= si_insert_input_ptr(ctx
, ret
, ctx
->other_samplers_and_images
, 1);
980 ret
= si_insert_input_ret(ctx
, ret
, ctx
->tcs_offchip_offset
, 2);
981 ret
= si_insert_input_ret(ctx
, ret
, ctx
->merged_wave_info
, 3);
982 ret
= si_insert_input_ret(ctx
, ret
, ctx
->tcs_factor_offset
, 4);
983 ret
= si_insert_input_ret(ctx
, ret
, ctx
->merged_scratch_offset
, 5);
985 ret
= si_insert_input_ptr(ctx
, ret
, ctx
->rw_buffers
, 8 + SI_SGPR_RW_BUFFERS
);
986 ret
= si_insert_input_ptr(ctx
, ret
, ctx
->bindless_samplers_and_images
,
987 8 + SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES
);
989 ret
= si_insert_input_ret(ctx
, ret
, ctx
->vs_state_bits
, 8 + SI_SGPR_VS_STATE_BITS
);
991 ret
= si_insert_input_ret(ctx
, ret
, ctx
->tcs_offchip_layout
, 8 + GFX9_SGPR_TCS_OFFCHIP_LAYOUT
);
992 ret
= si_insert_input_ret(ctx
, ret
, ctx
->tcs_out_lds_offsets
, 8 + GFX9_SGPR_TCS_OUT_OFFSETS
);
993 ret
= si_insert_input_ret(ctx
, ret
, ctx
->tcs_out_lds_layout
, 8 + GFX9_SGPR_TCS_OUT_LAYOUT
);
995 unsigned vgpr
= 8 + GFX9_TCS_NUM_USER_SGPR
;
996 ret
= LLVMBuildInsertValue(ctx
->ac
.builder
, ret
,
997 ac_to_float(&ctx
->ac
, ac_get_arg(&ctx
->ac
, ctx
->args
.tcs_patch_id
)),
999 ret
= LLVMBuildInsertValue(ctx
->ac
.builder
, ret
,
1000 ac_to_float(&ctx
->ac
, ac_get_arg(&ctx
->ac
, ctx
->args
.tcs_rel_ids
)),
1002 ctx
->return_value
= ret
;
1005 void si_llvm_emit_ls_epilogue(struct ac_shader_abi
*abi
, unsigned max_outputs
, LLVMValueRef
*addrs
)
1007 struct si_shader_context
*ctx
= si_shader_context_from_abi(abi
);
1008 struct si_shader
*shader
= ctx
->shader
;
1009 struct si_shader_info
*info
= &shader
->selector
->info
;
1011 LLVMValueRef vertex_id
= ac_get_arg(&ctx
->ac
, ctx
->rel_auto_id
);
1012 LLVMValueRef vertex_dw_stride
= get_tcs_in_vertex_dw_stride(ctx
);
1013 LLVMValueRef base_dw_addr
= LLVMBuildMul(ctx
->ac
.builder
, vertex_id
, vertex_dw_stride
, "");
1015 /* Write outputs to LDS. The next shader (TCS aka HS) will read
1016 * its inputs from it. */
1017 for (i
= 0; i
< info
->num_outputs
; i
++) {
1018 unsigned name
= info
->output_semantic_name
[i
];
1019 unsigned index
= info
->output_semantic_index
[i
];
1021 /* The ARB_shader_viewport_layer_array spec contains the
1024 * 2) What happens if gl_ViewportIndex or gl_Layer is
1025 * written in the vertex shader and a geometry shader is
1028 * RESOLVED: The value written by the last vertex processing
1029 * stage is used. If the last vertex processing stage
1030 * (vertex, tessellation evaluation or geometry) does not
1031 * statically assign to gl_ViewportIndex or gl_Layer, index
1032 * or layer zero is assumed.
1034 * So writes to those outputs in VS-as-LS are simply ignored.
1036 if (name
== TGSI_SEMANTIC_LAYER
|| name
== TGSI_SEMANTIC_VIEWPORT_INDEX
)
1039 int param
= si_shader_io_get_unique_index(name
, index
, false);
1040 LLVMValueRef dw_addr
=
1041 LLVMBuildAdd(ctx
->ac
.builder
, base_dw_addr
, LLVMConstInt(ctx
->ac
.i32
, param
* 4, 0), "");
1043 for (chan
= 0; chan
< 4; chan
++) {
1044 if (!(info
->output_usagemask
[i
] & (1 << chan
)))
1047 lshs_lds_store(ctx
, chan
, dw_addr
,
1048 LLVMBuildLoad(ctx
->ac
.builder
, addrs
[4 * i
+ chan
], ""));
1052 if (ctx
->screen
->info
.chip_class
>= GFX9
)
1053 si_set_ls_return_value_for_tcs(ctx
);
1057 * Compile the TCS epilog function. This writes tesselation factors to memory
1058 * based on the output primitive type of the tesselator (determined by TES).
1060 void si_llvm_build_tcs_epilog(struct si_shader_context
*ctx
, union si_shader_part_key
*key
)
1062 memset(&ctx
->args
, 0, sizeof(ctx
->args
));
1064 if (ctx
->screen
->info
.chip_class
>= GFX9
) {
1065 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1066 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1067 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, &ctx
->tcs_offchip_offset
);
1068 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
); /* wave info */
1069 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, &ctx
->tcs_factor_offset
);
1070 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1071 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1072 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1073 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1074 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1075 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1076 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1077 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1078 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1079 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1080 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1081 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, &ctx
->tcs_offchip_layout
);
1082 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1083 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, &ctx
->tcs_out_lds_layout
);
1085 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1086 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1087 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1088 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1089 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, &ctx
->tcs_offchip_layout
);
1090 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1091 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, &ctx
->tcs_out_lds_layout
);
1092 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1093 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, &ctx
->tcs_offchip_offset
);
1094 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, &ctx
->tcs_factor_offset
);
1097 ac_add_arg(&ctx
->args
, AC_ARG_VGPR
, 1, AC_ARG_INT
, NULL
); /* VGPR gap */
1098 ac_add_arg(&ctx
->args
, AC_ARG_VGPR
, 1, AC_ARG_INT
, NULL
); /* VGPR gap */
1099 struct ac_arg rel_patch_id
; /* patch index within the wave (REL_PATCH_ID) */
1100 ac_add_arg(&ctx
->args
, AC_ARG_VGPR
, 1, AC_ARG_INT
, &rel_patch_id
);
1101 struct ac_arg invocation_id
; /* invocation ID within the patch */
1102 ac_add_arg(&ctx
->args
, AC_ARG_VGPR
, 1, AC_ARG_INT
, &invocation_id
);
1104 tcs_out_current_patch_data_offset
; /* LDS offset where tess factors should be loaded from */
1105 ac_add_arg(&ctx
->args
, AC_ARG_VGPR
, 1, AC_ARG_INT
, &tcs_out_current_patch_data_offset
);
1107 struct ac_arg tess_factors
[6];
1108 for (unsigned i
= 0; i
< 6; i
++)
1109 ac_add_arg(&ctx
->args
, AC_ARG_VGPR
, 1, AC_ARG_INT
, &tess_factors
[i
]);
1111 /* Create the function. */
1112 si_llvm_create_func(ctx
, "tcs_epilog", NULL
, 0, ctx
->screen
->info
.chip_class
>= GFX7
? 128 : 0);
1113 ac_declare_lds_as_pointer(&ctx
->ac
);
1115 LLVMValueRef invoc0_tess_factors
[6];
1116 for (unsigned i
= 0; i
< 6; i
++)
1117 invoc0_tess_factors
[i
] = ac_get_arg(&ctx
->ac
, tess_factors
[i
]);
1119 si_write_tess_factors(ctx
, ac_get_arg(&ctx
->ac
, rel_patch_id
),
1120 ac_get_arg(&ctx
->ac
, invocation_id
),
1121 ac_get_arg(&ctx
->ac
, tcs_out_current_patch_data_offset
),
1122 invoc0_tess_factors
, invoc0_tess_factors
+ 4);
1124 LLVMBuildRetVoid(ctx
->ac
.builder
);
1127 void si_llvm_init_tcs_callbacks(struct si_shader_context
*ctx
)
1129 ctx
->abi
.load_tess_varyings
= si_nir_load_tcs_varyings
;
1130 ctx
->abi
.load_tess_level
= si_load_tess_level
;
1131 ctx
->abi
.store_tcs_outputs
= si_nir_store_output_tcs
;
1132 ctx
->abi
.emit_outputs
= si_llvm_emit_tcs_epilogue
;
1133 ctx
->abi
.load_patch_vertices_in
= si_load_patch_vertices_in
;
1136 void si_llvm_init_tes_callbacks(struct si_shader_context
*ctx
, bool ngg_cull_shader
)
1138 ctx
->abi
.load_tess_varyings
= si_nir_load_input_tes
;
1139 ctx
->abi
.load_tess_coord
= si_load_tess_coord
;
1140 ctx
->abi
.load_tess_level
= si_load_tess_level
;
1141 ctx
->abi
.load_patch_vertices_in
= si_load_patch_vertices_in
;
1143 if (ctx
->shader
->key
.as_es
)
1144 ctx
->abi
.emit_outputs
= si_llvm_emit_es_epilogue
;
1145 else if (ngg_cull_shader
)
1146 ctx
->abi
.emit_outputs
= gfx10_emit_ngg_culling_epilogue
;
1147 else if (ctx
->shader
->key
.as_ngg
)
1148 ctx
->abi
.emit_outputs
= gfx10_emit_ngg_epilogue
;
1150 ctx
->abi
.emit_outputs
= si_llvm_emit_vs_epilogue
;