2 * Copyright 2020 Advanced Micro Devices, Inc.
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
26 #include "si_shader_internal.h"
29 static LLVMValueRef
get_rel_patch_id(struct si_shader_context
*ctx
)
32 case PIPE_SHADER_TESS_CTRL
:
33 return si_unpack_param(ctx
, ctx
->args
.tcs_rel_ids
, 0, 8);
35 case PIPE_SHADER_TESS_EVAL
:
36 return ac_get_arg(&ctx
->ac
, ctx
->tes_rel_patch_id
);
44 /* Tessellation shaders pass outputs to the next shader using LDS.
46 * LS outputs = TCS inputs
47 * TCS outputs = TES inputs
50 * - TCS inputs for patch 0
51 * - TCS inputs for patch 1
52 * - TCS inputs for patch 2 = get_tcs_in_current_patch_offset (if RelPatchID==2)
54 * - TCS outputs for patch 0 = get_tcs_out_patch0_offset
55 * - Per-patch TCS outputs for patch 0 = get_tcs_out_patch0_patch_data_offset
56 * - TCS outputs for patch 1
57 * - Per-patch TCS outputs for patch 1
58 * - TCS outputs for patch 2 = get_tcs_out_current_patch_offset (if RelPatchID==2)
59 * - Per-patch TCS outputs for patch 2 = get_tcs_out_current_patch_data_offset (if RelPatchID==2)
62 * All three shaders VS(LS), TCS, TES share the same LDS space.
65 static LLVMValueRef
get_tcs_in_patch_stride(struct si_shader_context
*ctx
)
67 return si_unpack_param(ctx
, ctx
->vs_state_bits
, 11, 13);
70 static unsigned get_tcs_out_vertex_dw_stride_constant(struct si_shader_context
*ctx
)
72 assert(ctx
->type
== PIPE_SHADER_TESS_CTRL
);
74 if (ctx
->shader
->key
.mono
.u
.ff_tcs_inputs_to_copy
)
75 return util_last_bit64(ctx
->shader
->key
.mono
.u
.ff_tcs_inputs_to_copy
) * 4;
77 return util_last_bit64(ctx
->shader
->selector
->outputs_written
) * 4;
80 static LLVMValueRef
get_tcs_out_vertex_dw_stride(struct si_shader_context
*ctx
)
82 unsigned stride
= get_tcs_out_vertex_dw_stride_constant(ctx
);
84 return LLVMConstInt(ctx
->ac
.i32
, stride
, 0);
87 static LLVMValueRef
get_tcs_out_patch_stride(struct si_shader_context
*ctx
)
89 if (ctx
->shader
->key
.mono
.u
.ff_tcs_inputs_to_copy
)
90 return si_unpack_param(ctx
, ctx
->tcs_out_lds_layout
, 0, 13);
92 const struct si_shader_info
*info
= &ctx
->shader
->selector
->info
;
93 unsigned tcs_out_vertices
= info
->properties
[TGSI_PROPERTY_TCS_VERTICES_OUT
];
94 unsigned vertex_dw_stride
= get_tcs_out_vertex_dw_stride_constant(ctx
);
95 unsigned num_patch_outputs
= util_last_bit64(ctx
->shader
->selector
->patch_outputs_written
);
96 unsigned patch_dw_stride
= tcs_out_vertices
* vertex_dw_stride
+ num_patch_outputs
* 4;
97 return LLVMConstInt(ctx
->ac
.i32
, patch_dw_stride
, 0);
100 static LLVMValueRef
get_tcs_out_patch0_offset(struct si_shader_context
*ctx
)
102 return LLVMBuildMul(ctx
->ac
.builder
, si_unpack_param(ctx
, ctx
->tcs_out_lds_offsets
, 0, 16),
103 LLVMConstInt(ctx
->ac
.i32
, 4, 0), "");
106 static LLVMValueRef
get_tcs_out_patch0_patch_data_offset(struct si_shader_context
*ctx
)
108 return LLVMBuildMul(ctx
->ac
.builder
, si_unpack_param(ctx
, ctx
->tcs_out_lds_offsets
, 16, 16),
109 LLVMConstInt(ctx
->ac
.i32
, 4, 0), "");
112 static LLVMValueRef
get_tcs_in_current_patch_offset(struct si_shader_context
*ctx
)
114 LLVMValueRef patch_stride
= get_tcs_in_patch_stride(ctx
);
115 LLVMValueRef rel_patch_id
= get_rel_patch_id(ctx
);
117 return LLVMBuildMul(ctx
->ac
.builder
, patch_stride
, rel_patch_id
, "");
120 static LLVMValueRef
get_tcs_out_current_patch_offset(struct si_shader_context
*ctx
)
122 LLVMValueRef patch0_offset
= get_tcs_out_patch0_offset(ctx
);
123 LLVMValueRef patch_stride
= get_tcs_out_patch_stride(ctx
);
124 LLVMValueRef rel_patch_id
= get_rel_patch_id(ctx
);
126 return ac_build_imad(&ctx
->ac
, patch_stride
, rel_patch_id
, patch0_offset
);
129 static LLVMValueRef
get_tcs_out_current_patch_data_offset(struct si_shader_context
*ctx
)
131 LLVMValueRef patch0_patch_data_offset
= get_tcs_out_patch0_patch_data_offset(ctx
);
132 LLVMValueRef patch_stride
= get_tcs_out_patch_stride(ctx
);
133 LLVMValueRef rel_patch_id
= get_rel_patch_id(ctx
);
135 return ac_build_imad(&ctx
->ac
, patch_stride
, rel_patch_id
, patch0_patch_data_offset
);
138 static LLVMValueRef
get_num_tcs_out_vertices(struct si_shader_context
*ctx
)
140 unsigned tcs_out_vertices
=
141 ctx
->shader
->selector
? ctx
->shader
->selector
->info
.properties
[TGSI_PROPERTY_TCS_VERTICES_OUT
]
144 /* If !tcs_out_vertices, it's either the fixed-func TCS or the TCS epilog. */
145 if (ctx
->type
== PIPE_SHADER_TESS_CTRL
&& tcs_out_vertices
)
146 return LLVMConstInt(ctx
->ac
.i32
, tcs_out_vertices
, 0);
148 return si_unpack_param(ctx
, ctx
->tcs_offchip_layout
, 6, 6);
151 static LLVMValueRef
get_tcs_in_vertex_dw_stride(struct si_shader_context
*ctx
)
156 case PIPE_SHADER_VERTEX
:
157 stride
= ctx
->shader
->selector
->lshs_vertex_stride
/ 4;
158 return LLVMConstInt(ctx
->ac
.i32
, stride
, 0);
160 case PIPE_SHADER_TESS_CTRL
:
161 if (ctx
->screen
->info
.chip_class
>= GFX9
&& ctx
->shader
->is_monolithic
) {
162 stride
= ctx
->shader
->key
.part
.tcs
.ls
->lshs_vertex_stride
/ 4;
163 return LLVMConstInt(ctx
->ac
.i32
, stride
, 0);
165 return si_unpack_param(ctx
, ctx
->vs_state_bits
, 24, 8);
174 get_dw_address_from_generic_indices(struct si_shader_context
*ctx
, LLVMValueRef vertex_dw_stride
,
175 LLVMValueRef base_addr
, LLVMValueRef vertex_index
,
176 LLVMValueRef param_index
, ubyte name
, ubyte index
)
178 if (vertex_dw_stride
) {
179 base_addr
= ac_build_imad(&ctx
->ac
, vertex_index
, vertex_dw_stride
, base_addr
);
183 base_addr
= ac_build_imad(&ctx
->ac
, param_index
, LLVMConstInt(ctx
->ac
.i32
, 4, 0), base_addr
);
186 int param
= name
== TGSI_SEMANTIC_PATCH
|| name
== TGSI_SEMANTIC_TESSINNER
||
187 name
== TGSI_SEMANTIC_TESSOUTER
188 ? si_shader_io_get_unique_index_patch(name
, index
)
189 : si_shader_io_get_unique_index(name
, index
, false);
191 /* Add the base address of the element. */
192 return LLVMBuildAdd(ctx
->ac
.builder
, base_addr
, LLVMConstInt(ctx
->ac
.i32
, param
* 4, 0), "");
195 /* The offchip buffer layout for TCS->TES is
197 * - attribute 0 of patch 0 vertex 0
198 * - attribute 0 of patch 0 vertex 1
199 * - attribute 0 of patch 0 vertex 2
201 * - attribute 0 of patch 1 vertex 0
202 * - attribute 0 of patch 1 vertex 1
204 * - attribute 1 of patch 0 vertex 0
205 * - attribute 1 of patch 0 vertex 1
207 * - per patch attribute 0 of patch 0
208 * - per patch attribute 0 of patch 1
211 * Note that every attribute has 4 components.
213 static LLVMValueRef
get_tcs_tes_buffer_address(struct si_shader_context
*ctx
,
214 LLVMValueRef rel_patch_id
, LLVMValueRef vertex_index
,
215 LLVMValueRef param_index
)
217 LLVMValueRef base_addr
, vertices_per_patch
, num_patches
, total_vertices
;
218 LLVMValueRef param_stride
, constant16
;
220 vertices_per_patch
= get_num_tcs_out_vertices(ctx
);
221 num_patches
= si_unpack_param(ctx
, ctx
->tcs_offchip_layout
, 0, 6);
222 total_vertices
= LLVMBuildMul(ctx
->ac
.builder
, vertices_per_patch
, num_patches
, "");
224 constant16
= LLVMConstInt(ctx
->ac
.i32
, 16, 0);
226 base_addr
= ac_build_imad(&ctx
->ac
, rel_patch_id
, vertices_per_patch
, vertex_index
);
227 param_stride
= total_vertices
;
229 base_addr
= rel_patch_id
;
230 param_stride
= num_patches
;
233 base_addr
= ac_build_imad(&ctx
->ac
, param_index
, param_stride
, base_addr
);
234 base_addr
= LLVMBuildMul(ctx
->ac
.builder
, base_addr
, constant16
, "");
237 LLVMValueRef patch_data_offset
= si_unpack_param(ctx
, ctx
->tcs_offchip_layout
, 12, 20);
239 base_addr
= LLVMBuildAdd(ctx
->ac
.builder
, base_addr
, patch_data_offset
, "");
244 static LLVMValueRef
get_tcs_tes_buffer_address_from_generic_indices(struct si_shader_context
*ctx
,
245 LLVMValueRef vertex_index
,
246 LLVMValueRef param_index
,
247 ubyte name
, ubyte index
)
249 unsigned param_index_base
;
251 param_index_base
= name
== TGSI_SEMANTIC_PATCH
|| name
== TGSI_SEMANTIC_TESSINNER
||
252 name
== TGSI_SEMANTIC_TESSOUTER
253 ? si_shader_io_get_unique_index_patch(name
, index
)
254 : si_shader_io_get_unique_index(name
, index
, false);
257 param_index
= LLVMBuildAdd(ctx
->ac
.builder
, param_index
,
258 LLVMConstInt(ctx
->ac
.i32
, param_index_base
, 0), "");
260 param_index
= LLVMConstInt(ctx
->ac
.i32
, param_index_base
, 0);
263 return get_tcs_tes_buffer_address(ctx
, get_rel_patch_id(ctx
), vertex_index
, param_index
);
266 static LLVMValueRef
buffer_load(struct si_shader_context
*ctx
, LLVMTypeRef type
, unsigned swizzle
,
267 LLVMValueRef buffer
, LLVMValueRef offset
, LLVMValueRef base
,
270 LLVMValueRef value
, value2
;
271 LLVMTypeRef vec_type
= LLVMVectorType(type
, 4);
274 value
= ac_build_buffer_load(&ctx
->ac
, buffer
, 4, NULL
, base
, offset
, 0, ac_glc
,
275 can_speculate
, false);
277 return LLVMBuildBitCast(ctx
->ac
.builder
, value
, vec_type
, "");
280 if (ac_get_type_size(type
) != 8) {
281 value
= ac_build_buffer_load(&ctx
->ac
, buffer
, 4, NULL
, base
, offset
, 0, ac_glc
,
282 can_speculate
, false);
284 value
= LLVMBuildBitCast(ctx
->ac
.builder
, value
, vec_type
, "");
285 return LLVMBuildExtractElement(ctx
->ac
.builder
, value
, LLVMConstInt(ctx
->ac
.i32
, swizzle
, 0),
289 value
= ac_build_buffer_load(&ctx
->ac
, buffer
, 1, NULL
, base
, offset
, swizzle
* 4, ac_glc
,
290 can_speculate
, false);
292 value2
= ac_build_buffer_load(&ctx
->ac
, buffer
, 1, NULL
, base
, offset
, swizzle
* 4 + 4, ac_glc
,
293 can_speculate
, false);
295 return si_build_gather_64bit(ctx
, type
, value
, value2
);
299 * Load from LSHS LDS storage.
301 * \param type output value type
302 * \param swizzle offset (typically 0..3); it can be ~0, which loads a vec4
303 * \param dw_addr address in dwords
305 static LLVMValueRef
lshs_lds_load(struct si_shader_context
*ctx
, LLVMTypeRef type
, unsigned swizzle
,
306 LLVMValueRef dw_addr
)
311 LLVMValueRef values
[4];
313 for (unsigned chan
= 0; chan
< 4; chan
++)
314 values
[chan
] = lshs_lds_load(ctx
, type
, chan
, dw_addr
);
316 return ac_build_gather_values(&ctx
->ac
, values
, 4);
319 /* Split 64-bit loads. */
320 if (ac_get_type_size(type
) == 8) {
323 lo
= lshs_lds_load(ctx
, ctx
->ac
.i32
, swizzle
, dw_addr
);
324 hi
= lshs_lds_load(ctx
, ctx
->ac
.i32
, swizzle
+ 1, dw_addr
);
325 return si_build_gather_64bit(ctx
, type
, lo
, hi
);
328 dw_addr
= LLVMBuildAdd(ctx
->ac
.builder
, dw_addr
, LLVMConstInt(ctx
->ac
.i32
, swizzle
, 0), "");
330 value
= ac_lds_load(&ctx
->ac
, dw_addr
);
332 return LLVMBuildBitCast(ctx
->ac
.builder
, value
, type
, "");
336 * Store to LSHS LDS storage.
338 * \param swizzle offset (typically 0..3)
339 * \param dw_addr address in dwords
340 * \param value value to store
342 static void lshs_lds_store(struct si_shader_context
*ctx
, unsigned dw_offset_imm
,
343 LLVMValueRef dw_addr
, LLVMValueRef value
)
346 LLVMBuildAdd(ctx
->ac
.builder
, dw_addr
, LLVMConstInt(ctx
->ac
.i32
, dw_offset_imm
, 0), "");
348 ac_lds_store(&ctx
->ac
, dw_addr
, value
);
354 TESS_OFFCHIP_RING_TCS
,
355 TESS_OFFCHIP_RING_TES
,
358 static LLVMValueRef
get_tess_ring_descriptor(struct si_shader_context
*ctx
, enum si_tess_ring ring
)
360 LLVMBuilderRef builder
= ctx
->ac
.builder
;
361 LLVMValueRef addr
= ac_get_arg(
362 &ctx
->ac
, ring
== TESS_OFFCHIP_RING_TES
? ctx
->tes_offchip_addr
: ctx
->tcs_out_lds_layout
);
364 /* TCS only receives high 13 bits of the address. */
365 if (ring
== TESS_OFFCHIP_RING_TCS
|| ring
== TCS_FACTOR_RING
) {
366 addr
= LLVMBuildAnd(builder
, addr
, LLVMConstInt(ctx
->ac
.i32
, 0xfff80000, 0), "");
369 if (ring
== TCS_FACTOR_RING
) {
370 unsigned tf_offset
= ctx
->screen
->tess_offchip_ring_size
;
371 addr
= LLVMBuildAdd(builder
, addr
, LLVMConstInt(ctx
->ac
.i32
, tf_offset
, 0), "");
374 uint32_t rsrc3
= S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X
) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y
) |
375 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z
) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W
);
377 if (ctx
->screen
->info
.chip_class
>= GFX10
)
378 rsrc3
|= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT
) |
379 S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW
) | S_008F0C_RESOURCE_LEVEL(1);
381 rsrc3
|= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT
) |
382 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32
);
384 LLVMValueRef desc
[4];
386 desc
[1] = LLVMConstInt(ctx
->ac
.i32
, S_008F04_BASE_ADDRESS_HI(ctx
->screen
->info
.address32_hi
), 0);
387 desc
[2] = LLVMConstInt(ctx
->ac
.i32
, 0xffffffff, 0);
388 desc
[3] = LLVMConstInt(ctx
->ac
.i32
, rsrc3
, false);
390 return ac_build_gather_values(&ctx
->ac
, desc
, 4);
393 void si_llvm_preload_tes_rings(struct si_shader_context
*ctx
)
395 ctx
->tess_offchip_ring
= get_tess_ring_descriptor(ctx
, TESS_OFFCHIP_RING_TES
);
398 static LLVMValueRef
si_nir_load_tcs_varyings(struct ac_shader_abi
*abi
, LLVMTypeRef type
,
399 LLVMValueRef vertex_index
, LLVMValueRef param_index
,
400 unsigned const_index
, unsigned location
,
401 unsigned driver_location
, unsigned component
,
402 unsigned num_components
, bool is_patch
,
403 bool is_compact
, bool load_input
)
405 struct si_shader_context
*ctx
= si_shader_context_from_abi(abi
);
406 struct si_shader_info
*info
= &ctx
->shader
->selector
->info
;
407 LLVMValueRef dw_addr
, stride
;
410 driver_location
= driver_location
/ 4;
413 name
= info
->input_semantic_name
[driver_location
];
414 index
= info
->input_semantic_index
[driver_location
];
416 name
= info
->output_semantic_name
[driver_location
];
417 index
= info
->output_semantic_index
[driver_location
];
420 assert((name
== TGSI_SEMANTIC_PATCH
|| name
== TGSI_SEMANTIC_TESSINNER
||
421 name
== TGSI_SEMANTIC_TESSOUTER
) == is_patch
);
424 stride
= get_tcs_in_vertex_dw_stride(ctx
);
425 dw_addr
= get_tcs_in_current_patch_offset(ctx
);
429 dw_addr
= get_tcs_out_current_patch_data_offset(ctx
);
431 stride
= get_tcs_out_vertex_dw_stride(ctx
);
432 dw_addr
= get_tcs_out_current_patch_offset(ctx
);
437 param_index
= LLVMConstInt(ctx
->ac
.i32
, const_index
, 0);
440 dw_addr
= get_dw_address_from_generic_indices(ctx
, stride
, dw_addr
, vertex_index
, param_index
,
443 LLVMValueRef value
[4];
444 for (unsigned i
= 0; i
< num_components
; i
++) {
446 if (ac_get_type_size(type
) == 8)
450 value
[i
+ component
] = lshs_lds_load(ctx
, type
, offset
, dw_addr
);
453 return ac_build_varying_gather_values(&ctx
->ac
, value
, num_components
, component
);
456 static LLVMValueRef
si_nir_load_input_tes(struct ac_shader_abi
*abi
, LLVMTypeRef type
,
457 LLVMValueRef vertex_index
, LLVMValueRef param_index
,
458 unsigned const_index
, unsigned location
,
459 unsigned driver_location
, unsigned component
,
460 unsigned num_components
, bool is_patch
, bool is_compact
,
463 struct si_shader_context
*ctx
= si_shader_context_from_abi(abi
);
464 struct si_shader_info
*info
= &ctx
->shader
->selector
->info
;
465 LLVMValueRef base
, addr
;
467 driver_location
= driver_location
/ 4;
468 ubyte name
= info
->input_semantic_name
[driver_location
];
469 ubyte index
= info
->input_semantic_index
[driver_location
];
471 assert((name
== TGSI_SEMANTIC_PATCH
|| name
== TGSI_SEMANTIC_TESSINNER
||
472 name
== TGSI_SEMANTIC_TESSOUTER
) == is_patch
);
474 base
= ac_get_arg(&ctx
->ac
, ctx
->tcs_offchip_offset
);
477 param_index
= LLVMConstInt(ctx
->ac
.i32
, const_index
, 0);
481 get_tcs_tes_buffer_address_from_generic_indices(ctx
, vertex_index
, param_index
, name
, index
);
483 /* TODO: This will generate rather ordinary llvm code, although it
484 * should be easy for the optimiser to fix up. In future we might want
485 * to refactor buffer_load().
487 LLVMValueRef value
[4];
488 for (unsigned i
= 0; i
< num_components
; i
++) {
490 if (ac_get_type_size(type
) == 8) {
493 ubyte name
= info
->input_semantic_name
[driver_location
+ 1];
494 ubyte index
= info
->input_semantic_index
[driver_location
+ 1];
495 addr
= get_tcs_tes_buffer_address_from_generic_indices(ctx
, vertex_index
, param_index
,
503 value
[i
+ component
] =
504 buffer_load(ctx
, type
, offset
, ctx
->tess_offchip_ring
, base
, addr
, true);
507 return ac_build_varying_gather_values(&ctx
->ac
, value
, num_components
, component
);
510 static void si_nir_store_output_tcs(struct ac_shader_abi
*abi
, const struct nir_variable
*var
,
511 LLVMValueRef vertex_index
, LLVMValueRef param_index
,
512 unsigned const_index
, LLVMValueRef src
, unsigned writemask
)
514 struct si_shader_context
*ctx
= si_shader_context_from_abi(abi
);
515 struct si_shader_info
*info
= &ctx
->shader
->selector
->info
;
516 const unsigned component
= var
->data
.location_frac
;
517 unsigned driver_location
= var
->data
.driver_location
;
518 LLVMValueRef dw_addr
, stride
;
519 LLVMValueRef buffer
, base
, addr
;
520 LLVMValueRef values
[8];
522 bool is_tess_factor
= false, is_tess_inner
= false;
524 driver_location
= driver_location
/ 4;
525 ubyte name
= info
->output_semantic_name
[driver_location
];
526 ubyte index
= info
->output_semantic_index
[driver_location
];
528 bool is_const
= !param_index
;
530 param_index
= LLVMConstInt(ctx
->ac
.i32
, const_index
, 0);
532 const bool is_patch
= var
->data
.patch
|| var
->data
.location
== VARYING_SLOT_TESS_LEVEL_INNER
||
533 var
->data
.location
== VARYING_SLOT_TESS_LEVEL_OUTER
;
535 /* Invalid SPIR-V can cause this. */
536 if ((name
== TGSI_SEMANTIC_PATCH
|| name
== TGSI_SEMANTIC_TESSINNER
||
537 name
== TGSI_SEMANTIC_TESSOUTER
) != is_patch
)
541 stride
= get_tcs_out_vertex_dw_stride(ctx
);
542 dw_addr
= get_tcs_out_current_patch_offset(ctx
);
543 dw_addr
= get_dw_address_from_generic_indices(ctx
, stride
, dw_addr
, vertex_index
, param_index
,
546 skip_lds_store
= !info
->reads_pervertex_outputs
;
548 dw_addr
= get_tcs_out_current_patch_data_offset(ctx
);
549 dw_addr
= get_dw_address_from_generic_indices(ctx
, NULL
, dw_addr
, vertex_index
, param_index
,
552 skip_lds_store
= !info
->reads_perpatch_outputs
;
554 if (is_const
&& const_index
== 0) {
555 int name
= info
->output_semantic_name
[driver_location
];
557 /* Always write tess factors into LDS for the TCS epilog. */
558 if (name
== TGSI_SEMANTIC_TESSINNER
|| name
== TGSI_SEMANTIC_TESSOUTER
) {
559 /* The epilog doesn't read LDS if invocation 0 defines tess factors. */
560 skip_lds_store
= !info
->reads_tessfactor_outputs
&&
561 ctx
->shader
->selector
->info
.tessfactors_are_def_in_all_invocs
;
562 is_tess_factor
= true;
563 is_tess_inner
= name
== TGSI_SEMANTIC_TESSINNER
;
568 buffer
= get_tess_ring_descriptor(ctx
, TESS_OFFCHIP_RING_TCS
);
570 base
= ac_get_arg(&ctx
->ac
, ctx
->tcs_offchip_offset
);
573 get_tcs_tes_buffer_address_from_generic_indices(ctx
, vertex_index
, param_index
, name
, index
);
575 for (unsigned chan
= component
; chan
< 8; chan
++) {
576 if (!(writemask
& (1 << chan
)))
578 LLVMValueRef value
= ac_llvm_extract_elem(&ctx
->ac
, src
, chan
- component
);
580 unsigned buffer_store_offset
= chan
% 4;
582 ubyte name
= info
->output_semantic_name
[driver_location
+ 1];
583 ubyte index
= info
->output_semantic_index
[driver_location
+ 1];
584 addr
= get_tcs_tes_buffer_address_from_generic_indices(ctx
, vertex_index
, param_index
,
588 /* Skip LDS stores if there is no LDS read of this output. */
590 lshs_lds_store(ctx
, chan
, dw_addr
, value
);
592 value
= ac_to_integer(&ctx
->ac
, value
);
593 values
[chan
] = value
;
595 if (writemask
!= 0xF && !is_tess_factor
) {
596 ac_build_buffer_store_dword(&ctx
->ac
, buffer
, value
, 1, addr
, base
,
597 4 * buffer_store_offset
, ac_glc
);
600 /* Write tess factors into VGPRs for the epilog. */
601 if (is_tess_factor
&& ctx
->shader
->selector
->info
.tessfactors_are_def_in_all_invocs
) {
602 if (!is_tess_inner
) {
603 LLVMBuildStore(ctx
->ac
.builder
, value
, /* outer */
604 ctx
->invoc0_tess_factors
[chan
]);
605 } else if (chan
< 2) {
606 LLVMBuildStore(ctx
->ac
.builder
, value
, /* inner */
607 ctx
->invoc0_tess_factors
[4 + chan
]);
612 if (writemask
== 0xF && !is_tess_factor
) {
613 LLVMValueRef value
= ac_build_gather_values(&ctx
->ac
, values
, 4);
614 ac_build_buffer_store_dword(&ctx
->ac
, buffer
, value
, 4, addr
, base
, 0, ac_glc
);
618 static LLVMValueRef
si_load_tess_coord(struct ac_shader_abi
*abi
)
620 struct si_shader_context
*ctx
= si_shader_context_from_abi(abi
);
621 LLVMValueRef coord
[4] = {ac_get_arg(&ctx
->ac
, ctx
->tes_u
), ac_get_arg(&ctx
->ac
, ctx
->tes_v
),
622 ctx
->ac
.f32_0
, ctx
->ac
.f32_0
};
624 /* For triangles, the vector should be (u, v, 1-u-v). */
625 if (ctx
->shader
->selector
->info
.properties
[TGSI_PROPERTY_TES_PRIM_MODE
] == PIPE_PRIM_TRIANGLES
) {
626 coord
[2] = LLVMBuildFSub(ctx
->ac
.builder
, ctx
->ac
.f32_1
,
627 LLVMBuildFAdd(ctx
->ac
.builder
, coord
[0], coord
[1], ""), "");
629 return ac_build_gather_values(&ctx
->ac
, coord
, 4);
632 static LLVMValueRef
load_tess_level(struct si_shader_context
*ctx
, unsigned semantic_name
)
634 LLVMValueRef base
, addr
;
636 int param
= si_shader_io_get_unique_index_patch(semantic_name
, 0);
638 base
= ac_get_arg(&ctx
->ac
, ctx
->tcs_offchip_offset
);
639 addr
= get_tcs_tes_buffer_address(ctx
, get_rel_patch_id(ctx
), NULL
,
640 LLVMConstInt(ctx
->ac
.i32
, param
, 0));
642 return buffer_load(ctx
, ctx
->ac
.f32
, ~0, ctx
->tess_offchip_ring
, base
, addr
, true);
645 static LLVMValueRef
load_tess_level_default(struct si_shader_context
*ctx
, unsigned semantic_name
)
647 LLVMValueRef buf
, slot
, val
[4];
650 slot
= LLVMConstInt(ctx
->ac
.i32
, SI_HS_CONST_DEFAULT_TESS_LEVELS
, 0);
651 buf
= ac_get_arg(&ctx
->ac
, ctx
->rw_buffers
);
652 buf
= ac_build_load_to_sgpr(&ctx
->ac
, buf
, slot
);
653 offset
= semantic_name
== TGSI_SEMANTIC_TESS_DEFAULT_INNER_LEVEL
? 4 : 0;
655 for (i
= 0; i
< 4; i
++)
656 val
[i
] = si_buffer_load_const(ctx
, buf
, LLVMConstInt(ctx
->ac
.i32
, (offset
+ i
) * 4, 0));
657 return ac_build_gather_values(&ctx
->ac
, val
, 4);
660 static LLVMValueRef
si_load_tess_level(struct ac_shader_abi
*abi
, unsigned varying_id
,
661 bool load_default_state
)
663 struct si_shader_context
*ctx
= si_shader_context_from_abi(abi
);
664 unsigned semantic_name
;
666 if (load_default_state
) {
667 switch (varying_id
) {
668 case VARYING_SLOT_TESS_LEVEL_INNER
:
669 semantic_name
= TGSI_SEMANTIC_TESS_DEFAULT_INNER_LEVEL
;
671 case VARYING_SLOT_TESS_LEVEL_OUTER
:
672 semantic_name
= TGSI_SEMANTIC_TESS_DEFAULT_OUTER_LEVEL
;
675 unreachable("unknown tess level");
677 return load_tess_level_default(ctx
, semantic_name
);
680 switch (varying_id
) {
681 case VARYING_SLOT_TESS_LEVEL_INNER
:
682 semantic_name
= TGSI_SEMANTIC_TESSINNER
;
684 case VARYING_SLOT_TESS_LEVEL_OUTER
:
685 semantic_name
= TGSI_SEMANTIC_TESSOUTER
;
688 unreachable("unknown tess level");
691 return load_tess_level(ctx
, semantic_name
);
694 static LLVMValueRef
si_load_patch_vertices_in(struct ac_shader_abi
*abi
)
696 struct si_shader_context
*ctx
= si_shader_context_from_abi(abi
);
697 if (ctx
->type
== PIPE_SHADER_TESS_CTRL
)
698 return si_unpack_param(ctx
, ctx
->tcs_out_lds_layout
, 13, 6);
699 else if (ctx
->type
== PIPE_SHADER_TESS_EVAL
)
700 return get_num_tcs_out_vertices(ctx
);
702 unreachable("invalid shader stage for TGSI_SEMANTIC_VERTICESIN");
706 * Forward all outputs from the vertex shader to the TES. This is only used
707 * for the fixed function TCS.
709 static void si_copy_tcs_inputs(struct si_shader_context
*ctx
)
711 LLVMValueRef invocation_id
, buffer
, buffer_offset
;
712 LLVMValueRef lds_vertex_stride
, lds_base
;
715 invocation_id
= si_unpack_param(ctx
, ctx
->args
.tcs_rel_ids
, 8, 5);
716 buffer
= get_tess_ring_descriptor(ctx
, TESS_OFFCHIP_RING_TCS
);
717 buffer_offset
= ac_get_arg(&ctx
->ac
, ctx
->tcs_offchip_offset
);
719 lds_vertex_stride
= get_tcs_in_vertex_dw_stride(ctx
);
720 lds_base
= get_tcs_in_current_patch_offset(ctx
);
721 lds_base
= ac_build_imad(&ctx
->ac
, invocation_id
, lds_vertex_stride
, lds_base
);
723 inputs
= ctx
->shader
->key
.mono
.u
.ff_tcs_inputs_to_copy
;
725 unsigned i
= u_bit_scan64(&inputs
);
727 LLVMValueRef lds_ptr
=
728 LLVMBuildAdd(ctx
->ac
.builder
, lds_base
, LLVMConstInt(ctx
->ac
.i32
, 4 * i
, 0), "");
730 LLVMValueRef buffer_addr
= get_tcs_tes_buffer_address(
731 ctx
, get_rel_patch_id(ctx
), invocation_id
, LLVMConstInt(ctx
->ac
.i32
, i
, 0));
733 LLVMValueRef value
= lshs_lds_load(ctx
, ctx
->ac
.i32
, ~0, lds_ptr
);
735 ac_build_buffer_store_dword(&ctx
->ac
, buffer
, value
, 4, buffer_addr
, buffer_offset
, 0,
740 static void si_write_tess_factors(struct si_shader_context
*ctx
, LLVMValueRef rel_patch_id
,
741 LLVMValueRef invocation_id
,
742 LLVMValueRef tcs_out_current_patch_data_offset
,
743 LLVMValueRef invoc0_tf_outer
[4], LLVMValueRef invoc0_tf_inner
[2])
745 struct si_shader
*shader
= ctx
->shader
;
746 unsigned tess_inner_index
, tess_outer_index
;
747 LLVMValueRef lds_base
, lds_inner
, lds_outer
, byteoffset
, buffer
;
748 LLVMValueRef out
[6], vec0
, vec1
, tf_base
, inner
[4], outer
[4];
749 unsigned stride
, outer_comps
, inner_comps
, i
, offset
;
751 /* Add a barrier before loading tess factors from LDS. */
752 if (!shader
->key
.part
.tcs
.epilog
.invoc0_tess_factors_are_def
)
753 si_llvm_emit_barrier(ctx
);
755 /* Do this only for invocation 0, because the tess levels are per-patch,
758 * This can't jump, because invocation 0 executes this. It should
759 * at least mask out the loads and stores for other invocations.
761 ac_build_ifcc(&ctx
->ac
,
762 LLVMBuildICmp(ctx
->ac
.builder
, LLVMIntEQ
, invocation_id
, ctx
->ac
.i32_0
, ""), 6503);
764 /* Determine the layout of one tess factor element in the buffer. */
765 switch (shader
->key
.part
.tcs
.epilog
.prim_mode
) {
766 case PIPE_PRIM_LINES
:
767 stride
= 2; /* 2 dwords, 1 vec2 store */
771 case PIPE_PRIM_TRIANGLES
:
772 stride
= 4; /* 4 dwords, 1 vec4 store */
776 case PIPE_PRIM_QUADS
:
777 stride
= 6; /* 6 dwords, 2 stores (vec4 + vec2) */
786 for (i
= 0; i
< 4; i
++) {
787 inner
[i
] = LLVMGetUndef(ctx
->ac
.i32
);
788 outer
[i
] = LLVMGetUndef(ctx
->ac
.i32
);
791 if (shader
->key
.part
.tcs
.epilog
.invoc0_tess_factors_are_def
) {
792 /* Tess factors are in VGPRs. */
793 for (i
= 0; i
< outer_comps
; i
++)
794 outer
[i
] = out
[i
] = invoc0_tf_outer
[i
];
795 for (i
= 0; i
< inner_comps
; i
++)
796 inner
[i
] = out
[outer_comps
+ i
] = invoc0_tf_inner
[i
];
798 /* Load tess_inner and tess_outer from LDS.
799 * Any invocation can write them, so we can't get them from a temporary.
801 tess_inner_index
= si_shader_io_get_unique_index_patch(TGSI_SEMANTIC_TESSINNER
, 0);
802 tess_outer_index
= si_shader_io_get_unique_index_patch(TGSI_SEMANTIC_TESSOUTER
, 0);
804 lds_base
= tcs_out_current_patch_data_offset
;
805 lds_inner
= LLVMBuildAdd(ctx
->ac
.builder
, lds_base
,
806 LLVMConstInt(ctx
->ac
.i32
, tess_inner_index
* 4, 0), "");
807 lds_outer
= LLVMBuildAdd(ctx
->ac
.builder
, lds_base
,
808 LLVMConstInt(ctx
->ac
.i32
, tess_outer_index
* 4, 0), "");
810 for (i
= 0; i
< outer_comps
; i
++) {
811 outer
[i
] = out
[i
] = lshs_lds_load(ctx
, ctx
->ac
.i32
, i
, lds_outer
);
813 for (i
= 0; i
< inner_comps
; i
++) {
814 inner
[i
] = out
[outer_comps
+ i
] = lshs_lds_load(ctx
, ctx
->ac
.i32
, i
, lds_inner
);
818 if (shader
->key
.part
.tcs
.epilog
.prim_mode
== PIPE_PRIM_LINES
) {
819 /* For isolines, the hardware expects tess factors in the
820 * reverse order from what NIR specifies.
822 LLVMValueRef tmp
= out
[0];
827 /* Convert the outputs to vectors for stores. */
828 vec0
= ac_build_gather_values(&ctx
->ac
, out
, MIN2(stride
, 4));
832 vec1
= ac_build_gather_values(&ctx
->ac
, out
+ 4, stride
- 4);
834 /* Get the buffer. */
835 buffer
= get_tess_ring_descriptor(ctx
, TCS_FACTOR_RING
);
837 /* Get the offset. */
838 tf_base
= ac_get_arg(&ctx
->ac
, ctx
->tcs_factor_offset
);
840 LLVMBuildMul(ctx
->ac
.builder
, rel_patch_id
, LLVMConstInt(ctx
->ac
.i32
, 4 * stride
, 0), "");
842 ac_build_ifcc(&ctx
->ac
,
843 LLVMBuildICmp(ctx
->ac
.builder
, LLVMIntEQ
, rel_patch_id
, ctx
->ac
.i32_0
, ""), 6504);
845 /* Store the dynamic HS control word. */
847 if (ctx
->screen
->info
.chip_class
<= GFX8
) {
848 ac_build_buffer_store_dword(&ctx
->ac
, buffer
, LLVMConstInt(ctx
->ac
.i32
, 0x80000000, 0), 1,
849 ctx
->ac
.i32_0
, tf_base
, offset
, ac_glc
);
853 ac_build_endif(&ctx
->ac
, 6504);
855 /* Store the tessellation factors. */
856 ac_build_buffer_store_dword(&ctx
->ac
, buffer
, vec0
, MIN2(stride
, 4), byteoffset
, tf_base
, offset
,
860 ac_build_buffer_store_dword(&ctx
->ac
, buffer
, vec1
, stride
- 4, byteoffset
, tf_base
, offset
,
863 /* Store the tess factors into the offchip buffer if TES reads them. */
864 if (shader
->key
.part
.tcs
.epilog
.tes_reads_tess_factors
) {
865 LLVMValueRef buf
, base
, inner_vec
, outer_vec
, tf_outer_offset
;
866 LLVMValueRef tf_inner_offset
;
867 unsigned param_outer
, param_inner
;
869 buf
= get_tess_ring_descriptor(ctx
, TESS_OFFCHIP_RING_TCS
);
870 base
= ac_get_arg(&ctx
->ac
, ctx
->tcs_offchip_offset
);
872 param_outer
= si_shader_io_get_unique_index_patch(TGSI_SEMANTIC_TESSOUTER
, 0);
873 tf_outer_offset
= get_tcs_tes_buffer_address(ctx
, rel_patch_id
, NULL
,
874 LLVMConstInt(ctx
->ac
.i32
, param_outer
, 0));
876 unsigned outer_vec_size
= ac_has_vec3_support(ctx
->screen
->info
.chip_class
, false)
878 : util_next_power_of_two(outer_comps
);
879 outer_vec
= ac_build_gather_values(&ctx
->ac
, outer
, outer_vec_size
);
881 ac_build_buffer_store_dword(&ctx
->ac
, buf
, outer_vec
, outer_comps
, tf_outer_offset
, base
, 0,
884 param_inner
= si_shader_io_get_unique_index_patch(TGSI_SEMANTIC_TESSINNER
, 0);
885 tf_inner_offset
= get_tcs_tes_buffer_address(ctx
, rel_patch_id
, NULL
,
886 LLVMConstInt(ctx
->ac
.i32
, param_inner
, 0));
889 inner_comps
== 1 ? inner
[0] : ac_build_gather_values(&ctx
->ac
, inner
, inner_comps
);
890 ac_build_buffer_store_dword(&ctx
->ac
, buf
, inner_vec
, inner_comps
, tf_inner_offset
, base
,
895 ac_build_endif(&ctx
->ac
, 6503);
898 /* This only writes the tessellation factor levels. */
899 static void si_llvm_emit_tcs_epilogue(struct ac_shader_abi
*abi
, unsigned max_outputs
,
902 struct si_shader_context
*ctx
= si_shader_context_from_abi(abi
);
903 LLVMBuilderRef builder
= ctx
->ac
.builder
;
904 LLVMValueRef rel_patch_id
, invocation_id
, tf_lds_offset
;
906 si_copy_tcs_inputs(ctx
);
908 rel_patch_id
= get_rel_patch_id(ctx
);
909 invocation_id
= si_unpack_param(ctx
, ctx
->args
.tcs_rel_ids
, 8, 5);
910 tf_lds_offset
= get_tcs_out_current_patch_data_offset(ctx
);
912 if (ctx
->screen
->info
.chip_class
>= GFX9
) {
913 LLVMBasicBlockRef blocks
[2] = {LLVMGetInsertBlock(builder
), ctx
->merged_wrap_if_entry_block
};
914 LLVMValueRef values
[2];
916 ac_build_endif(&ctx
->ac
, ctx
->merged_wrap_if_label
);
918 values
[0] = rel_patch_id
;
919 values
[1] = LLVMGetUndef(ctx
->ac
.i32
);
920 rel_patch_id
= ac_build_phi(&ctx
->ac
, ctx
->ac
.i32
, 2, values
, blocks
);
922 values
[0] = tf_lds_offset
;
923 values
[1] = LLVMGetUndef(ctx
->ac
.i32
);
924 tf_lds_offset
= ac_build_phi(&ctx
->ac
, ctx
->ac
.i32
, 2, values
, blocks
);
926 values
[0] = invocation_id
;
927 values
[1] = ctx
->ac
.i32_1
; /* cause the epilog to skip threads */
928 invocation_id
= ac_build_phi(&ctx
->ac
, ctx
->ac
.i32
, 2, values
, blocks
);
931 /* Return epilog parameters from this function. */
932 LLVMValueRef ret
= ctx
->return_value
;
935 if (ctx
->screen
->info
.chip_class
>= GFX9
) {
937 si_insert_input_ret(ctx
, ret
, ctx
->tcs_offchip_layout
, 8 + GFX9_SGPR_TCS_OFFCHIP_LAYOUT
);
938 ret
= si_insert_input_ret(ctx
, ret
, ctx
->tcs_out_lds_layout
, 8 + GFX9_SGPR_TCS_OUT_LAYOUT
);
939 /* Tess offchip and tess factor offsets are at the beginning. */
940 ret
= si_insert_input_ret(ctx
, ret
, ctx
->tcs_offchip_offset
, 2);
941 ret
= si_insert_input_ret(ctx
, ret
, ctx
->tcs_factor_offset
, 4);
942 vgpr
= 8 + GFX9_SGPR_TCS_OUT_LAYOUT
+ 1;
944 ret
= si_insert_input_ret(ctx
, ret
, ctx
->tcs_offchip_layout
, GFX6_SGPR_TCS_OFFCHIP_LAYOUT
);
945 ret
= si_insert_input_ret(ctx
, ret
, ctx
->tcs_out_lds_layout
, GFX6_SGPR_TCS_OUT_LAYOUT
);
946 /* Tess offchip and tess factor offsets are after user SGPRs. */
947 ret
= si_insert_input_ret(ctx
, ret
, ctx
->tcs_offchip_offset
, GFX6_TCS_NUM_USER_SGPR
);
948 ret
= si_insert_input_ret(ctx
, ret
, ctx
->tcs_factor_offset
, GFX6_TCS_NUM_USER_SGPR
+ 1);
949 vgpr
= GFX6_TCS_NUM_USER_SGPR
+ 2;
953 rel_patch_id
= ac_to_float(&ctx
->ac
, rel_patch_id
);
954 invocation_id
= ac_to_float(&ctx
->ac
, invocation_id
);
955 tf_lds_offset
= ac_to_float(&ctx
->ac
, tf_lds_offset
);
957 /* Leave a hole corresponding to the two input VGPRs. This ensures that
958 * the invocation_id output does not alias the tcs_rel_ids input,
959 * which saves a V_MOV on gfx9.
963 ret
= LLVMBuildInsertValue(builder
, ret
, rel_patch_id
, vgpr
++, "");
964 ret
= LLVMBuildInsertValue(builder
, ret
, invocation_id
, vgpr
++, "");
966 if (ctx
->shader
->selector
->info
.tessfactors_are_def_in_all_invocs
) {
967 vgpr
++; /* skip the tess factor LDS offset */
968 for (unsigned i
= 0; i
< 6; i
++) {
969 LLVMValueRef value
= LLVMBuildLoad(builder
, ctx
->invoc0_tess_factors
[i
], "");
970 value
= ac_to_float(&ctx
->ac
, value
);
971 ret
= LLVMBuildInsertValue(builder
, ret
, value
, vgpr
++, "");
974 ret
= LLVMBuildInsertValue(builder
, ret
, tf_lds_offset
, vgpr
++, "");
976 ctx
->return_value
= ret
;
979 /* Pass TCS inputs from LS to TCS on GFX9. */
980 static void si_set_ls_return_value_for_tcs(struct si_shader_context
*ctx
)
982 LLVMValueRef ret
= ctx
->return_value
;
984 ret
= si_insert_input_ptr(ctx
, ret
, ctx
->other_const_and_shader_buffers
, 0);
985 ret
= si_insert_input_ptr(ctx
, ret
, ctx
->other_samplers_and_images
, 1);
986 ret
= si_insert_input_ret(ctx
, ret
, ctx
->tcs_offchip_offset
, 2);
987 ret
= si_insert_input_ret(ctx
, ret
, ctx
->merged_wave_info
, 3);
988 ret
= si_insert_input_ret(ctx
, ret
, ctx
->tcs_factor_offset
, 4);
989 ret
= si_insert_input_ret(ctx
, ret
, ctx
->merged_scratch_offset
, 5);
991 ret
= si_insert_input_ptr(ctx
, ret
, ctx
->rw_buffers
, 8 + SI_SGPR_RW_BUFFERS
);
992 ret
= si_insert_input_ptr(ctx
, ret
, ctx
->bindless_samplers_and_images
,
993 8 + SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES
);
995 ret
= si_insert_input_ret(ctx
, ret
, ctx
->vs_state_bits
, 8 + SI_SGPR_VS_STATE_BITS
);
997 ret
= si_insert_input_ret(ctx
, ret
, ctx
->tcs_offchip_layout
, 8 + GFX9_SGPR_TCS_OFFCHIP_LAYOUT
);
998 ret
= si_insert_input_ret(ctx
, ret
, ctx
->tcs_out_lds_offsets
, 8 + GFX9_SGPR_TCS_OUT_OFFSETS
);
999 ret
= si_insert_input_ret(ctx
, ret
, ctx
->tcs_out_lds_layout
, 8 + GFX9_SGPR_TCS_OUT_LAYOUT
);
1001 unsigned vgpr
= 8 + GFX9_TCS_NUM_USER_SGPR
;
1002 ret
= LLVMBuildInsertValue(ctx
->ac
.builder
, ret
,
1003 ac_to_float(&ctx
->ac
, ac_get_arg(&ctx
->ac
, ctx
->args
.tcs_patch_id
)),
1005 ret
= LLVMBuildInsertValue(ctx
->ac
.builder
, ret
,
1006 ac_to_float(&ctx
->ac
, ac_get_arg(&ctx
->ac
, ctx
->args
.tcs_rel_ids
)),
1008 ctx
->return_value
= ret
;
1011 void si_llvm_emit_ls_epilogue(struct ac_shader_abi
*abi
, unsigned max_outputs
, LLVMValueRef
*addrs
)
1013 struct si_shader_context
*ctx
= si_shader_context_from_abi(abi
);
1014 struct si_shader
*shader
= ctx
->shader
;
1015 struct si_shader_info
*info
= &shader
->selector
->info
;
1017 LLVMValueRef vertex_id
= ac_get_arg(&ctx
->ac
, ctx
->rel_auto_id
);
1018 LLVMValueRef vertex_dw_stride
= get_tcs_in_vertex_dw_stride(ctx
);
1019 LLVMValueRef base_dw_addr
= LLVMBuildMul(ctx
->ac
.builder
, vertex_id
, vertex_dw_stride
, "");
1021 /* Write outputs to LDS. The next shader (TCS aka HS) will read
1022 * its inputs from it. */
1023 for (i
= 0; i
< info
->num_outputs
; i
++) {
1024 unsigned name
= info
->output_semantic_name
[i
];
1025 unsigned index
= info
->output_semantic_index
[i
];
1027 /* The ARB_shader_viewport_layer_array spec contains the
1030 * 2) What happens if gl_ViewportIndex or gl_Layer is
1031 * written in the vertex shader and a geometry shader is
1034 * RESOLVED: The value written by the last vertex processing
1035 * stage is used. If the last vertex processing stage
1036 * (vertex, tessellation evaluation or geometry) does not
1037 * statically assign to gl_ViewportIndex or gl_Layer, index
1038 * or layer zero is assumed.
1040 * So writes to those outputs in VS-as-LS are simply ignored.
1042 if (name
== TGSI_SEMANTIC_LAYER
|| name
== TGSI_SEMANTIC_VIEWPORT_INDEX
)
1045 int param
= si_shader_io_get_unique_index(name
, index
, false);
1046 LLVMValueRef dw_addr
=
1047 LLVMBuildAdd(ctx
->ac
.builder
, base_dw_addr
, LLVMConstInt(ctx
->ac
.i32
, param
* 4, 0), "");
1049 for (chan
= 0; chan
< 4; chan
++) {
1050 if (!(info
->output_usagemask
[i
] & (1 << chan
)))
1053 lshs_lds_store(ctx
, chan
, dw_addr
,
1054 LLVMBuildLoad(ctx
->ac
.builder
, addrs
[4 * i
+ chan
], ""));
1058 if (ctx
->screen
->info
.chip_class
>= GFX9
)
1059 si_set_ls_return_value_for_tcs(ctx
);
1063 * Compile the TCS epilog function. This writes tesselation factors to memory
1064 * based on the output primitive type of the tesselator (determined by TES).
1066 void si_llvm_build_tcs_epilog(struct si_shader_context
*ctx
, union si_shader_part_key
*key
)
1068 memset(&ctx
->args
, 0, sizeof(ctx
->args
));
1070 if (ctx
->screen
->info
.chip_class
>= GFX9
) {
1071 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1072 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1073 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, &ctx
->tcs_offchip_offset
);
1074 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
); /* wave info */
1075 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, &ctx
->tcs_factor_offset
);
1076 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1077 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1078 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1079 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1080 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1081 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1082 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1083 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1084 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1085 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1086 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1087 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, &ctx
->tcs_offchip_layout
);
1088 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1089 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, &ctx
->tcs_out_lds_layout
);
1091 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1092 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1093 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1094 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1095 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, &ctx
->tcs_offchip_layout
);
1096 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1097 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, &ctx
->tcs_out_lds_layout
);
1098 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1099 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, &ctx
->tcs_offchip_offset
);
1100 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, &ctx
->tcs_factor_offset
);
1103 ac_add_arg(&ctx
->args
, AC_ARG_VGPR
, 1, AC_ARG_INT
, NULL
); /* VGPR gap */
1104 ac_add_arg(&ctx
->args
, AC_ARG_VGPR
, 1, AC_ARG_INT
, NULL
); /* VGPR gap */
1105 struct ac_arg rel_patch_id
; /* patch index within the wave (REL_PATCH_ID) */
1106 ac_add_arg(&ctx
->args
, AC_ARG_VGPR
, 1, AC_ARG_INT
, &rel_patch_id
);
1107 struct ac_arg invocation_id
; /* invocation ID within the patch */
1108 ac_add_arg(&ctx
->args
, AC_ARG_VGPR
, 1, AC_ARG_INT
, &invocation_id
);
1110 tcs_out_current_patch_data_offset
; /* LDS offset where tess factors should be loaded from */
1111 ac_add_arg(&ctx
->args
, AC_ARG_VGPR
, 1, AC_ARG_INT
, &tcs_out_current_patch_data_offset
);
1113 struct ac_arg tess_factors
[6];
1114 for (unsigned i
= 0; i
< 6; i
++)
1115 ac_add_arg(&ctx
->args
, AC_ARG_VGPR
, 1, AC_ARG_INT
, &tess_factors
[i
]);
1117 /* Create the function. */
1118 si_llvm_create_func(ctx
, "tcs_epilog", NULL
, 0, ctx
->screen
->info
.chip_class
>= GFX7
? 128 : 0);
1119 ac_declare_lds_as_pointer(&ctx
->ac
);
1121 LLVMValueRef invoc0_tess_factors
[6];
1122 for (unsigned i
= 0; i
< 6; i
++)
1123 invoc0_tess_factors
[i
] = ac_get_arg(&ctx
->ac
, tess_factors
[i
]);
1125 si_write_tess_factors(ctx
, ac_get_arg(&ctx
->ac
, rel_patch_id
),
1126 ac_get_arg(&ctx
->ac
, invocation_id
),
1127 ac_get_arg(&ctx
->ac
, tcs_out_current_patch_data_offset
),
1128 invoc0_tess_factors
, invoc0_tess_factors
+ 4);
1130 LLVMBuildRetVoid(ctx
->ac
.builder
);
1133 void si_llvm_init_tcs_callbacks(struct si_shader_context
*ctx
)
1135 ctx
->abi
.load_tess_varyings
= si_nir_load_tcs_varyings
;
1136 ctx
->abi
.load_tess_level
= si_load_tess_level
;
1137 ctx
->abi
.store_tcs_outputs
= si_nir_store_output_tcs
;
1138 ctx
->abi
.emit_outputs
= si_llvm_emit_tcs_epilogue
;
1139 ctx
->abi
.load_patch_vertices_in
= si_load_patch_vertices_in
;
1142 void si_llvm_init_tes_callbacks(struct si_shader_context
*ctx
, bool ngg_cull_shader
)
1144 ctx
->abi
.load_tess_varyings
= si_nir_load_input_tes
;
1145 ctx
->abi
.load_tess_coord
= si_load_tess_coord
;
1146 ctx
->abi
.load_tess_level
= si_load_tess_level
;
1147 ctx
->abi
.load_patch_vertices_in
= si_load_patch_vertices_in
;
1149 if (ctx
->shader
->key
.as_es
)
1150 ctx
->abi
.emit_outputs
= si_llvm_emit_es_epilogue
;
1151 else if (ngg_cull_shader
)
1152 ctx
->abi
.emit_outputs
= gfx10_emit_ngg_culling_epilogue
;
1153 else if (ctx
->shader
->key
.as_ngg
)
1154 ctx
->abi
.emit_outputs
= gfx10_emit_ngg_epilogue
;
1156 ctx
->abi
.emit_outputs
= si_llvm_emit_vs_epilogue
;