2 * Copyright 2020 Advanced Micro Devices, Inc.
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
25 #include "si_shader_internal.h"
29 static LLVMValueRef
get_rel_patch_id(struct si_shader_context
*ctx
)
32 case PIPE_SHADER_TESS_CTRL
:
33 return si_unpack_param(ctx
, ctx
->args
.tcs_rel_ids
, 0, 8);
35 case PIPE_SHADER_TESS_EVAL
:
36 return ac_get_arg(&ctx
->ac
, ctx
->tes_rel_patch_id
);
44 /* Tessellation shaders pass outputs to the next shader using LDS.
46 * LS outputs = TCS inputs
47 * TCS outputs = TES inputs
50 * - TCS inputs for patch 0
51 * - TCS inputs for patch 1
52 * - TCS inputs for patch 2 = get_tcs_in_current_patch_offset (if RelPatchID==2)
54 * - TCS outputs for patch 0 = get_tcs_out_patch0_offset
55 * - Per-patch TCS outputs for patch 0 = get_tcs_out_patch0_patch_data_offset
56 * - TCS outputs for patch 1
57 * - Per-patch TCS outputs for patch 1
58 * - TCS outputs for patch 2 = get_tcs_out_current_patch_offset (if RelPatchID==2)
59 * - Per-patch TCS outputs for patch 2 = get_tcs_out_current_patch_data_offset (if RelPatchID==2)
62 * All three shaders VS(LS), TCS, TES share the same LDS space.
66 get_tcs_in_patch_stride(struct si_shader_context
*ctx
)
68 return si_unpack_param(ctx
, ctx
->vs_state_bits
, 11, 13);
71 static unsigned get_tcs_out_vertex_dw_stride_constant(struct si_shader_context
*ctx
)
73 assert(ctx
->type
== PIPE_SHADER_TESS_CTRL
);
75 if (ctx
->shader
->key
.mono
.u
.ff_tcs_inputs_to_copy
)
76 return util_last_bit64(ctx
->shader
->key
.mono
.u
.ff_tcs_inputs_to_copy
) * 4;
78 return util_last_bit64(ctx
->shader
->selector
->outputs_written
) * 4;
81 static LLVMValueRef
get_tcs_out_vertex_dw_stride(struct si_shader_context
*ctx
)
83 unsigned stride
= get_tcs_out_vertex_dw_stride_constant(ctx
);
85 return LLVMConstInt(ctx
->i32
, stride
, 0);
88 static LLVMValueRef
get_tcs_out_patch_stride(struct si_shader_context
*ctx
)
90 if (ctx
->shader
->key
.mono
.u
.ff_tcs_inputs_to_copy
)
91 return si_unpack_param(ctx
, ctx
->tcs_out_lds_layout
, 0, 13);
93 const struct si_shader_info
*info
= &ctx
->shader
->selector
->info
;
94 unsigned tcs_out_vertices
= info
->properties
[TGSI_PROPERTY_TCS_VERTICES_OUT
];
95 unsigned vertex_dw_stride
= get_tcs_out_vertex_dw_stride_constant(ctx
);
96 unsigned num_patch_outputs
= util_last_bit64(ctx
->shader
->selector
->patch_outputs_written
);
97 unsigned patch_dw_stride
= tcs_out_vertices
* vertex_dw_stride
+
98 num_patch_outputs
* 4;
99 return LLVMConstInt(ctx
->i32
, patch_dw_stride
, 0);
103 get_tcs_out_patch0_offset(struct si_shader_context
*ctx
)
105 return LLVMBuildMul(ctx
->ac
.builder
,
106 si_unpack_param(ctx
, ctx
->tcs_out_lds_offsets
, 0, 16),
107 LLVMConstInt(ctx
->i32
, 4, 0), "");
111 get_tcs_out_patch0_patch_data_offset(struct si_shader_context
*ctx
)
113 return LLVMBuildMul(ctx
->ac
.builder
,
114 si_unpack_param(ctx
, ctx
->tcs_out_lds_offsets
, 16, 16),
115 LLVMConstInt(ctx
->i32
, 4, 0), "");
119 get_tcs_in_current_patch_offset(struct si_shader_context
*ctx
)
121 LLVMValueRef patch_stride
= get_tcs_in_patch_stride(ctx
);
122 LLVMValueRef rel_patch_id
= get_rel_patch_id(ctx
);
124 return LLVMBuildMul(ctx
->ac
.builder
, patch_stride
, rel_patch_id
, "");
128 get_tcs_out_current_patch_offset(struct si_shader_context
*ctx
)
130 LLVMValueRef patch0_offset
= get_tcs_out_patch0_offset(ctx
);
131 LLVMValueRef patch_stride
= get_tcs_out_patch_stride(ctx
);
132 LLVMValueRef rel_patch_id
= get_rel_patch_id(ctx
);
134 return ac_build_imad(&ctx
->ac
, patch_stride
, rel_patch_id
, patch0_offset
);
138 get_tcs_out_current_patch_data_offset(struct si_shader_context
*ctx
)
140 LLVMValueRef patch0_patch_data_offset
=
141 get_tcs_out_patch0_patch_data_offset(ctx
);
142 LLVMValueRef patch_stride
= get_tcs_out_patch_stride(ctx
);
143 LLVMValueRef rel_patch_id
= get_rel_patch_id(ctx
);
145 return ac_build_imad(&ctx
->ac
, patch_stride
, rel_patch_id
, patch0_patch_data_offset
);
148 static LLVMValueRef
get_num_tcs_out_vertices(struct si_shader_context
*ctx
)
150 unsigned tcs_out_vertices
=
151 ctx
->shader
->selector
?
152 ctx
->shader
->selector
->info
.properties
[TGSI_PROPERTY_TCS_VERTICES_OUT
] : 0;
154 /* If !tcs_out_vertices, it's either the fixed-func TCS or the TCS epilog. */
155 if (ctx
->type
== PIPE_SHADER_TESS_CTRL
&& tcs_out_vertices
)
156 return LLVMConstInt(ctx
->i32
, tcs_out_vertices
, 0);
158 return si_unpack_param(ctx
, ctx
->tcs_offchip_layout
, 6, 6);
161 static LLVMValueRef
get_tcs_in_vertex_dw_stride(struct si_shader_context
*ctx
)
166 case PIPE_SHADER_VERTEX
:
167 stride
= ctx
->shader
->selector
->lshs_vertex_stride
/ 4;
168 return LLVMConstInt(ctx
->i32
, stride
, 0);
170 case PIPE_SHADER_TESS_CTRL
:
171 if (ctx
->screen
->info
.chip_class
>= GFX9
&&
172 ctx
->shader
->is_monolithic
) {
173 stride
= ctx
->shader
->key
.part
.tcs
.ls
->lshs_vertex_stride
/ 4;
174 return LLVMConstInt(ctx
->i32
, stride
, 0);
176 return si_unpack_param(ctx
, ctx
->vs_state_bits
, 24, 8);
184 static LLVMValueRef
get_dw_address_from_generic_indices(struct si_shader_context
*ctx
,
185 LLVMValueRef vertex_dw_stride
,
186 LLVMValueRef base_addr
,
187 LLVMValueRef vertex_index
,
188 LLVMValueRef param_index
,
189 ubyte name
, ubyte index
)
191 if (vertex_dw_stride
) {
192 base_addr
= ac_build_imad(&ctx
->ac
, vertex_index
,
193 vertex_dw_stride
, base_addr
);
197 base_addr
= ac_build_imad(&ctx
->ac
, param_index
,
198 LLVMConstInt(ctx
->i32
, 4, 0), base_addr
);
201 int param
= name
== TGSI_SEMANTIC_PATCH
||
202 name
== TGSI_SEMANTIC_TESSINNER
||
203 name
== TGSI_SEMANTIC_TESSOUTER
?
204 si_shader_io_get_unique_index_patch(name
, index
) :
205 si_shader_io_get_unique_index(name
, index
, false);
207 /* Add the base address of the element. */
208 return LLVMBuildAdd(ctx
->ac
.builder
, base_addr
,
209 LLVMConstInt(ctx
->i32
, param
* 4, 0), "");
212 /* The offchip buffer layout for TCS->TES is
214 * - attribute 0 of patch 0 vertex 0
215 * - attribute 0 of patch 0 vertex 1
216 * - attribute 0 of patch 0 vertex 2
218 * - attribute 0 of patch 1 vertex 0
219 * - attribute 0 of patch 1 vertex 1
221 * - attribute 1 of patch 0 vertex 0
222 * - attribute 1 of patch 0 vertex 1
224 * - per patch attribute 0 of patch 0
225 * - per patch attribute 0 of patch 1
228 * Note that every attribute has 4 components.
230 static LLVMValueRef
get_tcs_tes_buffer_address(struct si_shader_context
*ctx
,
231 LLVMValueRef rel_patch_id
,
232 LLVMValueRef vertex_index
,
233 LLVMValueRef param_index
)
235 LLVMValueRef base_addr
, vertices_per_patch
, num_patches
, total_vertices
;
236 LLVMValueRef param_stride
, constant16
;
238 vertices_per_patch
= get_num_tcs_out_vertices(ctx
);
239 num_patches
= si_unpack_param(ctx
, ctx
->tcs_offchip_layout
, 0, 6);
240 total_vertices
= LLVMBuildMul(ctx
->ac
.builder
, vertices_per_patch
,
243 constant16
= LLVMConstInt(ctx
->i32
, 16, 0);
245 base_addr
= ac_build_imad(&ctx
->ac
, rel_patch_id
,
246 vertices_per_patch
, vertex_index
);
247 param_stride
= total_vertices
;
249 base_addr
= rel_patch_id
;
250 param_stride
= num_patches
;
253 base_addr
= ac_build_imad(&ctx
->ac
, param_index
, param_stride
, base_addr
);
254 base_addr
= LLVMBuildMul(ctx
->ac
.builder
, base_addr
, constant16
, "");
257 LLVMValueRef patch_data_offset
=
258 si_unpack_param(ctx
, ctx
->tcs_offchip_layout
, 12, 20);
260 base_addr
= LLVMBuildAdd(ctx
->ac
.builder
, base_addr
,
261 patch_data_offset
, "");
266 static LLVMValueRef
get_tcs_tes_buffer_address_from_generic_indices(
267 struct si_shader_context
*ctx
,
268 LLVMValueRef vertex_index
,
269 LLVMValueRef param_index
,
270 ubyte name
, ubyte index
)
272 unsigned param_index_base
;
274 param_index_base
= name
== TGSI_SEMANTIC_PATCH
||
275 name
== TGSI_SEMANTIC_TESSINNER
||
276 name
== TGSI_SEMANTIC_TESSOUTER
?
277 si_shader_io_get_unique_index_patch(name
, index
) :
278 si_shader_io_get_unique_index(name
, index
, false);
281 param_index
= LLVMBuildAdd(ctx
->ac
.builder
, param_index
,
282 LLVMConstInt(ctx
->i32
, param_index_base
, 0),
285 param_index
= LLVMConstInt(ctx
->i32
, param_index_base
, 0);
288 return get_tcs_tes_buffer_address(ctx
, get_rel_patch_id(ctx
),
289 vertex_index
, param_index
);
292 static LLVMValueRef
buffer_load(struct si_shader_context
*ctx
,
293 LLVMTypeRef type
, unsigned swizzle
,
294 LLVMValueRef buffer
, LLVMValueRef offset
,
295 LLVMValueRef base
, bool can_speculate
)
297 LLVMValueRef value
, value2
;
298 LLVMTypeRef vec_type
= LLVMVectorType(type
, 4);
301 value
= ac_build_buffer_load(&ctx
->ac
, buffer
, 4, NULL
, base
, offset
,
302 0, ac_glc
, can_speculate
, false);
304 return LLVMBuildBitCast(ctx
->ac
.builder
, value
, vec_type
, "");
307 if (ac_get_type_size(type
) != 8) {
308 value
= ac_build_buffer_load(&ctx
->ac
, buffer
, 4, NULL
, base
, offset
,
309 0, ac_glc
, can_speculate
, false);
311 value
= LLVMBuildBitCast(ctx
->ac
.builder
, value
, vec_type
, "");
312 return LLVMBuildExtractElement(ctx
->ac
.builder
, value
,
313 LLVMConstInt(ctx
->i32
, swizzle
, 0), "");
316 value
= ac_build_buffer_load(&ctx
->ac
, buffer
, 1, NULL
, base
, offset
,
317 swizzle
* 4, ac_glc
, can_speculate
, false);
319 value2
= ac_build_buffer_load(&ctx
->ac
, buffer
, 1, NULL
, base
, offset
,
320 swizzle
* 4 + 4, ac_glc
, can_speculate
, false);
322 return si_build_gather_64bit(ctx
, type
, value
, value2
);
326 * Load from LSHS LDS storage.
328 * \param type output value type
329 * \param swizzle offset (typically 0..3); it can be ~0, which loads a vec4
330 * \param dw_addr address in dwords
332 static LLVMValueRef
lshs_lds_load(struct si_shader_context
*ctx
,
333 LLVMTypeRef type
, unsigned swizzle
,
334 LLVMValueRef dw_addr
)
339 LLVMValueRef values
[4];
341 for (unsigned chan
= 0; chan
< 4; chan
++)
342 values
[chan
] = lshs_lds_load(ctx
, type
, chan
, dw_addr
);
344 return ac_build_gather_values(&ctx
->ac
, values
, 4);
347 /* Split 64-bit loads. */
348 if (ac_get_type_size(type
) == 8) {
351 lo
= lshs_lds_load(ctx
, ctx
->i32
, swizzle
, dw_addr
);
352 hi
= lshs_lds_load(ctx
, ctx
->i32
, swizzle
+ 1, dw_addr
);
353 return si_build_gather_64bit(ctx
, type
, lo
, hi
);
356 dw_addr
= LLVMBuildAdd(ctx
->ac
.builder
, dw_addr
,
357 LLVMConstInt(ctx
->i32
, swizzle
, 0), "");
359 value
= ac_lds_load(&ctx
->ac
, dw_addr
);
361 return LLVMBuildBitCast(ctx
->ac
.builder
, value
, type
, "");
365 * Store to LSHS LDS storage.
367 * \param swizzle offset (typically 0..3)
368 * \param dw_addr address in dwords
369 * \param value value to store
371 static void lshs_lds_store(struct si_shader_context
*ctx
,
372 unsigned dw_offset_imm
, LLVMValueRef dw_addr
,
375 dw_addr
= LLVMBuildAdd(ctx
->ac
.builder
, dw_addr
,
376 LLVMConstInt(ctx
->i32
, dw_offset_imm
, 0), "");
378 ac_lds_store(&ctx
->ac
, dw_addr
, value
);
383 TESS_OFFCHIP_RING_TCS
,
384 TESS_OFFCHIP_RING_TES
,
387 static LLVMValueRef
get_tess_ring_descriptor(struct si_shader_context
*ctx
,
388 enum si_tess_ring ring
)
390 LLVMBuilderRef builder
= ctx
->ac
.builder
;
391 LLVMValueRef addr
= ac_get_arg(&ctx
->ac
,
392 ring
== TESS_OFFCHIP_RING_TES
?
393 ctx
->tes_offchip_addr
:
394 ctx
->tcs_out_lds_layout
);
396 /* TCS only receives high 13 bits of the address. */
397 if (ring
== TESS_OFFCHIP_RING_TCS
|| ring
== TCS_FACTOR_RING
) {
398 addr
= LLVMBuildAnd(builder
, addr
,
399 LLVMConstInt(ctx
->i32
, 0xfff80000, 0), "");
402 if (ring
== TCS_FACTOR_RING
) {
403 unsigned tf_offset
= ctx
->screen
->tess_offchip_ring_size
;
404 addr
= LLVMBuildAdd(builder
, addr
,
405 LLVMConstInt(ctx
->i32
, tf_offset
, 0), "");
408 uint32_t rsrc3
= S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X
) |
409 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y
) |
410 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z
) |
411 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W
);
413 if (ctx
->screen
->info
.chip_class
>= GFX10
)
414 rsrc3
|= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT
) |
415 S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW
) |
416 S_008F0C_RESOURCE_LEVEL(1);
418 rsrc3
|= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT
) |
419 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32
);
421 LLVMValueRef desc
[4];
423 desc
[1] = LLVMConstInt(ctx
->i32
,
424 S_008F04_BASE_ADDRESS_HI(ctx
->screen
->info
.address32_hi
), 0);
425 desc
[2] = LLVMConstInt(ctx
->i32
, 0xffffffff, 0);
426 desc
[3] = LLVMConstInt(ctx
->i32
, rsrc3
, false);
428 return ac_build_gather_values(&ctx
->ac
, desc
, 4);
431 void si_llvm_preload_tes_rings(struct si_shader_context
*ctx
)
433 ctx
->tess_offchip_ring
= get_tess_ring_descriptor(ctx
, TESS_OFFCHIP_RING_TES
);
436 static LLVMValueRef
si_nir_load_tcs_varyings(struct ac_shader_abi
*abi
,
438 LLVMValueRef vertex_index
,
439 LLVMValueRef param_index
,
440 unsigned const_index
,
442 unsigned driver_location
,
444 unsigned num_components
,
449 struct si_shader_context
*ctx
= si_shader_context_from_abi(abi
);
450 struct si_shader_info
*info
= &ctx
->shader
->selector
->info
;
451 LLVMValueRef dw_addr
, stride
;
454 driver_location
= driver_location
/ 4;
457 name
= info
->input_semantic_name
[driver_location
];
458 index
= info
->input_semantic_index
[driver_location
];
460 name
= info
->output_semantic_name
[driver_location
];
461 index
= info
->output_semantic_index
[driver_location
];
464 assert((name
== TGSI_SEMANTIC_PATCH
||
465 name
== TGSI_SEMANTIC_TESSINNER
||
466 name
== TGSI_SEMANTIC_TESSOUTER
) == is_patch
);
469 stride
= get_tcs_in_vertex_dw_stride(ctx
);
470 dw_addr
= get_tcs_in_current_patch_offset(ctx
);
474 dw_addr
= get_tcs_out_current_patch_data_offset(ctx
);
476 stride
= get_tcs_out_vertex_dw_stride(ctx
);
477 dw_addr
= get_tcs_out_current_patch_offset(ctx
);
482 param_index
= LLVMConstInt(ctx
->i32
, const_index
, 0);
485 dw_addr
= get_dw_address_from_generic_indices(ctx
, stride
, dw_addr
,
486 vertex_index
, param_index
,
489 LLVMValueRef value
[4];
490 for (unsigned i
= 0; i
< num_components
; i
++) {
492 if (ac_get_type_size(type
) == 8)
496 value
[i
+ component
] = lshs_lds_load(ctx
, type
, offset
, dw_addr
);
499 return ac_build_varying_gather_values(&ctx
->ac
, value
, num_components
, component
);
502 LLVMValueRef
si_nir_load_input_tes(struct ac_shader_abi
*abi
,
504 LLVMValueRef vertex_index
,
505 LLVMValueRef param_index
,
506 unsigned const_index
,
508 unsigned driver_location
,
510 unsigned num_components
,
515 struct si_shader_context
*ctx
= si_shader_context_from_abi(abi
);
516 struct si_shader_info
*info
= &ctx
->shader
->selector
->info
;
517 LLVMValueRef base
, addr
;
519 driver_location
= driver_location
/ 4;
520 ubyte name
= info
->input_semantic_name
[driver_location
];
521 ubyte index
= info
->input_semantic_index
[driver_location
];
523 assert((name
== TGSI_SEMANTIC_PATCH
||
524 name
== TGSI_SEMANTIC_TESSINNER
||
525 name
== TGSI_SEMANTIC_TESSOUTER
) == is_patch
);
527 base
= ac_get_arg(&ctx
->ac
, ctx
->tcs_offchip_offset
);
530 param_index
= LLVMConstInt(ctx
->i32
, const_index
, 0);
533 addr
= get_tcs_tes_buffer_address_from_generic_indices(ctx
, vertex_index
,
537 /* TODO: This will generate rather ordinary llvm code, although it
538 * should be easy for the optimiser to fix up. In future we might want
539 * to refactor buffer_load().
541 LLVMValueRef value
[4];
542 for (unsigned i
= 0; i
< num_components
; i
++) {
544 if (ac_get_type_size(type
) == 8) {
547 ubyte name
= info
->input_semantic_name
[driver_location
+ 1];
548 ubyte index
= info
->input_semantic_index
[driver_location
+ 1];
549 addr
= get_tcs_tes_buffer_address_from_generic_indices(ctx
,
559 value
[i
+ component
] = buffer_load(ctx
, type
, offset
,
560 ctx
->tess_offchip_ring
, base
, addr
, true);
563 return ac_build_varying_gather_values(&ctx
->ac
, value
, num_components
, component
);
566 static void si_nir_store_output_tcs(struct ac_shader_abi
*abi
,
567 const struct nir_variable
*var
,
568 LLVMValueRef vertex_index
,
569 LLVMValueRef param_index
,
570 unsigned const_index
,
574 struct si_shader_context
*ctx
= si_shader_context_from_abi(abi
);
575 struct si_shader_info
*info
= &ctx
->shader
->selector
->info
;
576 const unsigned component
= var
->data
.location_frac
;
577 unsigned driver_location
= var
->data
.driver_location
;
578 LLVMValueRef dw_addr
, stride
;
579 LLVMValueRef buffer
, base
, addr
;
580 LLVMValueRef values
[8];
582 bool is_tess_factor
= false, is_tess_inner
= false;
584 driver_location
= driver_location
/ 4;
585 ubyte name
= info
->output_semantic_name
[driver_location
];
586 ubyte index
= info
->output_semantic_index
[driver_location
];
588 bool is_const
= !param_index
;
590 param_index
= LLVMConstInt(ctx
->i32
, const_index
, 0);
592 const bool is_patch
= var
->data
.patch
||
593 var
->data
.location
== VARYING_SLOT_TESS_LEVEL_INNER
||
594 var
->data
.location
== VARYING_SLOT_TESS_LEVEL_OUTER
;
596 assert((name
== TGSI_SEMANTIC_PATCH
||
597 name
== TGSI_SEMANTIC_TESSINNER
||
598 name
== TGSI_SEMANTIC_TESSOUTER
) == is_patch
);
601 stride
= get_tcs_out_vertex_dw_stride(ctx
);
602 dw_addr
= get_tcs_out_current_patch_offset(ctx
);
603 dw_addr
= get_dw_address_from_generic_indices(ctx
, stride
, dw_addr
,
604 vertex_index
, param_index
,
607 skip_lds_store
= !info
->reads_pervertex_outputs
;
609 dw_addr
= get_tcs_out_current_patch_data_offset(ctx
);
610 dw_addr
= get_dw_address_from_generic_indices(ctx
, NULL
, dw_addr
,
611 vertex_index
, param_index
,
614 skip_lds_store
= !info
->reads_perpatch_outputs
;
616 if (is_const
&& const_index
== 0) {
617 int name
= info
->output_semantic_name
[driver_location
];
619 /* Always write tess factors into LDS for the TCS epilog. */
620 if (name
== TGSI_SEMANTIC_TESSINNER
||
621 name
== TGSI_SEMANTIC_TESSOUTER
) {
622 /* The epilog doesn't read LDS if invocation 0 defines tess factors. */
623 skip_lds_store
= !info
->reads_tessfactor_outputs
&&
624 ctx
->shader
->selector
->info
.tessfactors_are_def_in_all_invocs
;
625 is_tess_factor
= true;
626 is_tess_inner
= name
== TGSI_SEMANTIC_TESSINNER
;
631 buffer
= get_tess_ring_descriptor(ctx
, TESS_OFFCHIP_RING_TCS
);
633 base
= ac_get_arg(&ctx
->ac
, ctx
->tcs_offchip_offset
);
635 addr
= get_tcs_tes_buffer_address_from_generic_indices(ctx
, vertex_index
,
636 param_index
, name
, index
);
638 for (unsigned chan
= component
; chan
< 8; chan
++) {
639 if (!(writemask
& (1 << chan
)))
641 LLVMValueRef value
= ac_llvm_extract_elem(&ctx
->ac
, src
, chan
- component
);
643 unsigned buffer_store_offset
= chan
% 4;
645 ubyte name
= info
->output_semantic_name
[driver_location
+ 1];
646 ubyte index
= info
->output_semantic_index
[driver_location
+ 1];
647 addr
= get_tcs_tes_buffer_address_from_generic_indices(ctx
,
653 /* Skip LDS stores if there is no LDS read of this output. */
655 lshs_lds_store(ctx
, chan
, dw_addr
, value
);
657 value
= ac_to_integer(&ctx
->ac
, value
);
658 values
[chan
] = value
;
660 if (writemask
!= 0xF && !is_tess_factor
) {
661 ac_build_buffer_store_dword(&ctx
->ac
, buffer
, value
, 1,
663 4 * buffer_store_offset
,
667 /* Write tess factors into VGPRs for the epilog. */
668 if (is_tess_factor
&&
669 ctx
->shader
->selector
->info
.tessfactors_are_def_in_all_invocs
) {
670 if (!is_tess_inner
) {
671 LLVMBuildStore(ctx
->ac
.builder
, value
, /* outer */
672 ctx
->invoc0_tess_factors
[chan
]);
673 } else if (chan
< 2) {
674 LLVMBuildStore(ctx
->ac
.builder
, value
, /* inner */
675 ctx
->invoc0_tess_factors
[4 + chan
]);
680 if (writemask
== 0xF && !is_tess_factor
) {
681 LLVMValueRef value
= ac_build_gather_values(&ctx
->ac
,
683 ac_build_buffer_store_dword(&ctx
->ac
, buffer
, value
, 4, addr
,
688 static LLVMValueRef
si_load_tess_coord(struct ac_shader_abi
*abi
)
690 struct si_shader_context
*ctx
= si_shader_context_from_abi(abi
);
691 LLVMValueRef coord
[4] = {
692 ac_get_arg(&ctx
->ac
, ctx
->tes_u
),
693 ac_get_arg(&ctx
->ac
, ctx
->tes_v
),
698 /* For triangles, the vector should be (u, v, 1-u-v). */
699 if (ctx
->shader
->selector
->info
.properties
[TGSI_PROPERTY_TES_PRIM_MODE
] ==
700 PIPE_PRIM_TRIANGLES
) {
701 coord
[2] = LLVMBuildFSub(ctx
->ac
.builder
, ctx
->ac
.f32_1
,
702 LLVMBuildFAdd(ctx
->ac
.builder
,
703 coord
[0], coord
[1], ""), "");
705 return ac_build_gather_values(&ctx
->ac
, coord
, 4);
708 static LLVMValueRef
load_tess_level(struct si_shader_context
*ctx
,
709 unsigned semantic_name
)
711 LLVMValueRef base
, addr
;
713 int param
= si_shader_io_get_unique_index_patch(semantic_name
, 0);
715 base
= ac_get_arg(&ctx
->ac
, ctx
->tcs_offchip_offset
);
716 addr
= get_tcs_tes_buffer_address(ctx
, get_rel_patch_id(ctx
), NULL
,
717 LLVMConstInt(ctx
->i32
, param
, 0));
719 return buffer_load(ctx
, ctx
->f32
,
720 ~0, ctx
->tess_offchip_ring
, base
, addr
, true);
724 static LLVMValueRef
load_tess_level_default(struct si_shader_context
*ctx
,
725 unsigned semantic_name
)
727 LLVMValueRef buf
, slot
, val
[4];
730 slot
= LLVMConstInt(ctx
->i32
, SI_HS_CONST_DEFAULT_TESS_LEVELS
, 0);
731 buf
= ac_get_arg(&ctx
->ac
, ctx
->rw_buffers
);
732 buf
= ac_build_load_to_sgpr(&ctx
->ac
, buf
, slot
);
733 offset
= semantic_name
== TGSI_SEMANTIC_TESS_DEFAULT_INNER_LEVEL
? 4 : 0;
735 for (i
= 0; i
< 4; i
++)
736 val
[i
] = si_buffer_load_const(ctx
, buf
,
737 LLVMConstInt(ctx
->i32
, (offset
+ i
) * 4, 0));
738 return ac_build_gather_values(&ctx
->ac
, val
, 4);
741 static LLVMValueRef
si_load_tess_level(struct ac_shader_abi
*abi
,
743 bool load_default_state
)
745 struct si_shader_context
*ctx
= si_shader_context_from_abi(abi
);
746 unsigned semantic_name
;
748 if (load_default_state
) {
749 switch (varying_id
) {
750 case VARYING_SLOT_TESS_LEVEL_INNER
:
751 semantic_name
= TGSI_SEMANTIC_TESS_DEFAULT_INNER_LEVEL
;
753 case VARYING_SLOT_TESS_LEVEL_OUTER
:
754 semantic_name
= TGSI_SEMANTIC_TESS_DEFAULT_OUTER_LEVEL
;
757 unreachable("unknown tess level");
759 return load_tess_level_default(ctx
, semantic_name
);
762 switch (varying_id
) {
763 case VARYING_SLOT_TESS_LEVEL_INNER
:
764 semantic_name
= TGSI_SEMANTIC_TESSINNER
;
766 case VARYING_SLOT_TESS_LEVEL_OUTER
:
767 semantic_name
= TGSI_SEMANTIC_TESSOUTER
;
770 unreachable("unknown tess level");
773 return load_tess_level(ctx
, semantic_name
);
777 static LLVMValueRef
si_load_patch_vertices_in(struct ac_shader_abi
*abi
)
779 struct si_shader_context
*ctx
= si_shader_context_from_abi(abi
);
780 if (ctx
->type
== PIPE_SHADER_TESS_CTRL
)
781 return si_unpack_param(ctx
, ctx
->tcs_out_lds_layout
, 13, 6);
782 else if (ctx
->type
== PIPE_SHADER_TESS_EVAL
)
783 return get_num_tcs_out_vertices(ctx
);
785 unreachable("invalid shader stage for TGSI_SEMANTIC_VERTICESIN");
789 * Forward all outputs from the vertex shader to the TES. This is only used
790 * for the fixed function TCS.
792 static void si_copy_tcs_inputs(struct si_shader_context
*ctx
)
794 LLVMValueRef invocation_id
, buffer
, buffer_offset
;
795 LLVMValueRef lds_vertex_stride
, lds_base
;
798 invocation_id
= si_unpack_param(ctx
, ctx
->args
.tcs_rel_ids
, 8, 5);
799 buffer
= get_tess_ring_descriptor(ctx
, TESS_OFFCHIP_RING_TCS
);
800 buffer_offset
= ac_get_arg(&ctx
->ac
, ctx
->tcs_offchip_offset
);
802 lds_vertex_stride
= get_tcs_in_vertex_dw_stride(ctx
);
803 lds_base
= get_tcs_in_current_patch_offset(ctx
);
804 lds_base
= ac_build_imad(&ctx
->ac
, invocation_id
, lds_vertex_stride
,
807 inputs
= ctx
->shader
->key
.mono
.u
.ff_tcs_inputs_to_copy
;
809 unsigned i
= u_bit_scan64(&inputs
);
811 LLVMValueRef lds_ptr
= LLVMBuildAdd(ctx
->ac
.builder
, lds_base
,
812 LLVMConstInt(ctx
->i32
, 4 * i
, 0),
815 LLVMValueRef buffer_addr
= get_tcs_tes_buffer_address(ctx
,
816 get_rel_patch_id(ctx
),
818 LLVMConstInt(ctx
->i32
, i
, 0));
820 LLVMValueRef value
= lshs_lds_load(ctx
, ctx
->ac
.i32
, ~0, lds_ptr
);
822 ac_build_buffer_store_dword(&ctx
->ac
, buffer
, value
, 4, buffer_addr
,
823 buffer_offset
, 0, ac_glc
);
827 static void si_write_tess_factors(struct si_shader_context
*ctx
,
828 LLVMValueRef rel_patch_id
,
829 LLVMValueRef invocation_id
,
830 LLVMValueRef tcs_out_current_patch_data_offset
,
831 LLVMValueRef invoc0_tf_outer
[4],
832 LLVMValueRef invoc0_tf_inner
[2])
834 struct si_shader
*shader
= ctx
->shader
;
835 unsigned tess_inner_index
, tess_outer_index
;
836 LLVMValueRef lds_base
, lds_inner
, lds_outer
, byteoffset
, buffer
;
837 LLVMValueRef out
[6], vec0
, vec1
, tf_base
, inner
[4], outer
[4];
838 unsigned stride
, outer_comps
, inner_comps
, i
, offset
;
840 /* Add a barrier before loading tess factors from LDS. */
841 if (!shader
->key
.part
.tcs
.epilog
.invoc0_tess_factors_are_def
)
842 si_llvm_emit_barrier(ctx
);
844 /* Do this only for invocation 0, because the tess levels are per-patch,
847 * This can't jump, because invocation 0 executes this. It should
848 * at least mask out the loads and stores for other invocations.
850 ac_build_ifcc(&ctx
->ac
,
851 LLVMBuildICmp(ctx
->ac
.builder
, LLVMIntEQ
,
852 invocation_id
, ctx
->i32_0
, ""), 6503);
854 /* Determine the layout of one tess factor element in the buffer. */
855 switch (shader
->key
.part
.tcs
.epilog
.prim_mode
) {
856 case PIPE_PRIM_LINES
:
857 stride
= 2; /* 2 dwords, 1 vec2 store */
861 case PIPE_PRIM_TRIANGLES
:
862 stride
= 4; /* 4 dwords, 1 vec4 store */
866 case PIPE_PRIM_QUADS
:
867 stride
= 6; /* 6 dwords, 2 stores (vec4 + vec2) */
876 for (i
= 0; i
< 4; i
++) {
877 inner
[i
] = LLVMGetUndef(ctx
->i32
);
878 outer
[i
] = LLVMGetUndef(ctx
->i32
);
881 if (shader
->key
.part
.tcs
.epilog
.invoc0_tess_factors_are_def
) {
882 /* Tess factors are in VGPRs. */
883 for (i
= 0; i
< outer_comps
; i
++)
884 outer
[i
] = out
[i
] = invoc0_tf_outer
[i
];
885 for (i
= 0; i
< inner_comps
; i
++)
886 inner
[i
] = out
[outer_comps
+i
] = invoc0_tf_inner
[i
];
888 /* Load tess_inner and tess_outer from LDS.
889 * Any invocation can write them, so we can't get them from a temporary.
891 tess_inner_index
= si_shader_io_get_unique_index_patch(TGSI_SEMANTIC_TESSINNER
, 0);
892 tess_outer_index
= si_shader_io_get_unique_index_patch(TGSI_SEMANTIC_TESSOUTER
, 0);
894 lds_base
= tcs_out_current_patch_data_offset
;
895 lds_inner
= LLVMBuildAdd(ctx
->ac
.builder
, lds_base
,
896 LLVMConstInt(ctx
->i32
,
897 tess_inner_index
* 4, 0), "");
898 lds_outer
= LLVMBuildAdd(ctx
->ac
.builder
, lds_base
,
899 LLVMConstInt(ctx
->i32
,
900 tess_outer_index
* 4, 0), "");
902 for (i
= 0; i
< outer_comps
; i
++) {
904 lshs_lds_load(ctx
, ctx
->ac
.i32
, i
, lds_outer
);
906 for (i
= 0; i
< inner_comps
; i
++) {
907 inner
[i
] = out
[outer_comps
+i
] =
908 lshs_lds_load(ctx
, ctx
->ac
.i32
, i
, lds_inner
);
912 if (shader
->key
.part
.tcs
.epilog
.prim_mode
== PIPE_PRIM_LINES
) {
913 /* For isolines, the hardware expects tess factors in the
914 * reverse order from what NIR specifies.
916 LLVMValueRef tmp
= out
[0];
921 /* Convert the outputs to vectors for stores. */
922 vec0
= ac_build_gather_values(&ctx
->ac
, out
, MIN2(stride
, 4));
926 vec1
= ac_build_gather_values(&ctx
->ac
, out
+4, stride
- 4);
928 /* Get the buffer. */
929 buffer
= get_tess_ring_descriptor(ctx
, TCS_FACTOR_RING
);
931 /* Get the offset. */
932 tf_base
= ac_get_arg(&ctx
->ac
,
933 ctx
->tcs_factor_offset
);
934 byteoffset
= LLVMBuildMul(ctx
->ac
.builder
, rel_patch_id
,
935 LLVMConstInt(ctx
->i32
, 4 * stride
, 0), "");
937 ac_build_ifcc(&ctx
->ac
,
938 LLVMBuildICmp(ctx
->ac
.builder
, LLVMIntEQ
,
939 rel_patch_id
, ctx
->i32_0
, ""), 6504);
941 /* Store the dynamic HS control word. */
943 if (ctx
->screen
->info
.chip_class
<= GFX8
) {
944 ac_build_buffer_store_dword(&ctx
->ac
, buffer
,
945 LLVMConstInt(ctx
->i32
, 0x80000000, 0),
946 1, ctx
->i32_0
, tf_base
,
951 ac_build_endif(&ctx
->ac
, 6504);
953 /* Store the tessellation factors. */
954 ac_build_buffer_store_dword(&ctx
->ac
, buffer
, vec0
,
955 MIN2(stride
, 4), byteoffset
, tf_base
,
959 ac_build_buffer_store_dword(&ctx
->ac
, buffer
, vec1
,
960 stride
- 4, byteoffset
, tf_base
,
963 /* Store the tess factors into the offchip buffer if TES reads them. */
964 if (shader
->key
.part
.tcs
.epilog
.tes_reads_tess_factors
) {
965 LLVMValueRef buf
, base
, inner_vec
, outer_vec
, tf_outer_offset
;
966 LLVMValueRef tf_inner_offset
;
967 unsigned param_outer
, param_inner
;
969 buf
= get_tess_ring_descriptor(ctx
, TESS_OFFCHIP_RING_TCS
);
970 base
= ac_get_arg(&ctx
->ac
, ctx
->tcs_offchip_offset
);
972 param_outer
= si_shader_io_get_unique_index_patch(
973 TGSI_SEMANTIC_TESSOUTER
, 0);
974 tf_outer_offset
= get_tcs_tes_buffer_address(ctx
, rel_patch_id
, NULL
,
975 LLVMConstInt(ctx
->i32
, param_outer
, 0));
977 unsigned outer_vec_size
=
978 ac_has_vec3_support(ctx
->screen
->info
.chip_class
, false) ?
979 outer_comps
: util_next_power_of_two(outer_comps
);
980 outer_vec
= ac_build_gather_values(&ctx
->ac
, outer
, outer_vec_size
);
982 ac_build_buffer_store_dword(&ctx
->ac
, buf
, outer_vec
,
983 outer_comps
, tf_outer_offset
,
986 param_inner
= si_shader_io_get_unique_index_patch(
987 TGSI_SEMANTIC_TESSINNER
, 0);
988 tf_inner_offset
= get_tcs_tes_buffer_address(ctx
, rel_patch_id
, NULL
,
989 LLVMConstInt(ctx
->i32
, param_inner
, 0));
991 inner_vec
= inner_comps
== 1 ? inner
[0] :
992 ac_build_gather_values(&ctx
->ac
, inner
, inner_comps
);
993 ac_build_buffer_store_dword(&ctx
->ac
, buf
, inner_vec
,
994 inner_comps
, tf_inner_offset
,
999 ac_build_endif(&ctx
->ac
, 6503);
1002 /* This only writes the tessellation factor levels. */
1003 static void si_llvm_emit_tcs_epilogue(struct ac_shader_abi
*abi
,
1004 unsigned max_outputs
,
1005 LLVMValueRef
*addrs
)
1007 struct si_shader_context
*ctx
= si_shader_context_from_abi(abi
);
1008 LLVMBuilderRef builder
= ctx
->ac
.builder
;
1009 LLVMValueRef rel_patch_id
, invocation_id
, tf_lds_offset
;
1011 si_copy_tcs_inputs(ctx
);
1013 rel_patch_id
= get_rel_patch_id(ctx
);
1014 invocation_id
= si_unpack_param(ctx
, ctx
->args
.tcs_rel_ids
, 8, 5);
1015 tf_lds_offset
= get_tcs_out_current_patch_data_offset(ctx
);
1017 if (ctx
->screen
->info
.chip_class
>= GFX9
) {
1018 LLVMBasicBlockRef blocks
[2] = {
1019 LLVMGetInsertBlock(builder
),
1020 ctx
->merged_wrap_if_entry_block
1022 LLVMValueRef values
[2];
1024 ac_build_endif(&ctx
->ac
, ctx
->merged_wrap_if_label
);
1026 values
[0] = rel_patch_id
;
1027 values
[1] = LLVMGetUndef(ctx
->i32
);
1028 rel_patch_id
= ac_build_phi(&ctx
->ac
, ctx
->i32
, 2, values
, blocks
);
1030 values
[0] = tf_lds_offset
;
1031 values
[1] = LLVMGetUndef(ctx
->i32
);
1032 tf_lds_offset
= ac_build_phi(&ctx
->ac
, ctx
->i32
, 2, values
, blocks
);
1034 values
[0] = invocation_id
;
1035 values
[1] = ctx
->i32_1
; /* cause the epilog to skip threads */
1036 invocation_id
= ac_build_phi(&ctx
->ac
, ctx
->i32
, 2, values
, blocks
);
1039 /* Return epilog parameters from this function. */
1040 LLVMValueRef ret
= ctx
->return_value
;
1043 if (ctx
->screen
->info
.chip_class
>= GFX9
) {
1044 ret
= si_insert_input_ret(ctx
, ret
, ctx
->tcs_offchip_layout
,
1045 8 + GFX9_SGPR_TCS_OFFCHIP_LAYOUT
);
1046 ret
= si_insert_input_ret(ctx
, ret
, ctx
->tcs_out_lds_layout
,
1047 8 + GFX9_SGPR_TCS_OUT_LAYOUT
);
1048 /* Tess offchip and tess factor offsets are at the beginning. */
1049 ret
= si_insert_input_ret(ctx
, ret
, ctx
->tcs_offchip_offset
, 2);
1050 ret
= si_insert_input_ret(ctx
, ret
, ctx
->tcs_factor_offset
, 4);
1051 vgpr
= 8 + GFX9_SGPR_TCS_OUT_LAYOUT
+ 1;
1053 ret
= si_insert_input_ret(ctx
, ret
, ctx
->tcs_offchip_layout
,
1054 GFX6_SGPR_TCS_OFFCHIP_LAYOUT
);
1055 ret
= si_insert_input_ret(ctx
, ret
, ctx
->tcs_out_lds_layout
,
1056 GFX6_SGPR_TCS_OUT_LAYOUT
);
1057 /* Tess offchip and tess factor offsets are after user SGPRs. */
1058 ret
= si_insert_input_ret(ctx
, ret
, ctx
->tcs_offchip_offset
,
1059 GFX6_TCS_NUM_USER_SGPR
);
1060 ret
= si_insert_input_ret(ctx
, ret
, ctx
->tcs_factor_offset
,
1061 GFX6_TCS_NUM_USER_SGPR
+ 1);
1062 vgpr
= GFX6_TCS_NUM_USER_SGPR
+ 2;
1066 rel_patch_id
= ac_to_float(&ctx
->ac
, rel_patch_id
);
1067 invocation_id
= ac_to_float(&ctx
->ac
, invocation_id
);
1068 tf_lds_offset
= ac_to_float(&ctx
->ac
, tf_lds_offset
);
1070 /* Leave a hole corresponding to the two input VGPRs. This ensures that
1071 * the invocation_id output does not alias the tcs_rel_ids input,
1072 * which saves a V_MOV on gfx9.
1076 ret
= LLVMBuildInsertValue(builder
, ret
, rel_patch_id
, vgpr
++, "");
1077 ret
= LLVMBuildInsertValue(builder
, ret
, invocation_id
, vgpr
++, "");
1079 if (ctx
->shader
->selector
->info
.tessfactors_are_def_in_all_invocs
) {
1080 vgpr
++; /* skip the tess factor LDS offset */
1081 for (unsigned i
= 0; i
< 6; i
++) {
1082 LLVMValueRef value
=
1083 LLVMBuildLoad(builder
, ctx
->invoc0_tess_factors
[i
], "");
1084 value
= ac_to_float(&ctx
->ac
, value
);
1085 ret
= LLVMBuildInsertValue(builder
, ret
, value
, vgpr
++, "");
1088 ret
= LLVMBuildInsertValue(builder
, ret
, tf_lds_offset
, vgpr
++, "");
1090 ctx
->return_value
= ret
;
1093 /* Pass TCS inputs from LS to TCS on GFX9. */
1094 static void si_set_ls_return_value_for_tcs(struct si_shader_context
*ctx
)
1096 LLVMValueRef ret
= ctx
->return_value
;
1098 ret
= si_insert_input_ptr(ctx
, ret
, ctx
->other_const_and_shader_buffers
, 0);
1099 ret
= si_insert_input_ptr(ctx
, ret
, ctx
->other_samplers_and_images
, 1);
1100 ret
= si_insert_input_ret(ctx
, ret
, ctx
->tcs_offchip_offset
, 2);
1101 ret
= si_insert_input_ret(ctx
, ret
, ctx
->merged_wave_info
, 3);
1102 ret
= si_insert_input_ret(ctx
, ret
, ctx
->tcs_factor_offset
, 4);
1103 ret
= si_insert_input_ret(ctx
, ret
, ctx
->merged_scratch_offset
, 5);
1105 ret
= si_insert_input_ptr(ctx
, ret
, ctx
->rw_buffers
,
1106 8 + SI_SGPR_RW_BUFFERS
);
1107 ret
= si_insert_input_ptr(ctx
, ret
,
1108 ctx
->bindless_samplers_and_images
,
1109 8 + SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES
);
1111 ret
= si_insert_input_ret(ctx
, ret
, ctx
->vs_state_bits
,
1112 8 + SI_SGPR_VS_STATE_BITS
);
1114 ret
= si_insert_input_ret(ctx
, ret
, ctx
->tcs_offchip_layout
,
1115 8 + GFX9_SGPR_TCS_OFFCHIP_LAYOUT
);
1116 ret
= si_insert_input_ret(ctx
, ret
, ctx
->tcs_out_lds_offsets
,
1117 8 + GFX9_SGPR_TCS_OUT_OFFSETS
);
1118 ret
= si_insert_input_ret(ctx
, ret
, ctx
->tcs_out_lds_layout
,
1119 8 + GFX9_SGPR_TCS_OUT_LAYOUT
);
1121 unsigned vgpr
= 8 + GFX9_TCS_NUM_USER_SGPR
;
1122 ret
= LLVMBuildInsertValue(ctx
->ac
.builder
, ret
,
1123 ac_to_float(&ctx
->ac
,
1124 ac_get_arg(&ctx
->ac
, ctx
->args
.tcs_patch_id
)),
1126 ret
= LLVMBuildInsertValue(ctx
->ac
.builder
, ret
,
1127 ac_to_float(&ctx
->ac
,
1128 ac_get_arg(&ctx
->ac
, ctx
->args
.tcs_rel_ids
)),
1130 ctx
->return_value
= ret
;
1133 void si_llvm_emit_ls_epilogue(struct ac_shader_abi
*abi
, unsigned max_outputs
,
1134 LLVMValueRef
*addrs
)
1136 struct si_shader_context
*ctx
= si_shader_context_from_abi(abi
);
1137 struct si_shader
*shader
= ctx
->shader
;
1138 struct si_shader_info
*info
= &shader
->selector
->info
;
1140 LLVMValueRef vertex_id
= ac_get_arg(&ctx
->ac
, ctx
->rel_auto_id
);
1141 LLVMValueRef vertex_dw_stride
= get_tcs_in_vertex_dw_stride(ctx
);
1142 LLVMValueRef base_dw_addr
= LLVMBuildMul(ctx
->ac
.builder
, vertex_id
,
1143 vertex_dw_stride
, "");
1145 /* Write outputs to LDS. The next shader (TCS aka HS) will read
1146 * its inputs from it. */
1147 for (i
= 0; i
< info
->num_outputs
; i
++) {
1148 unsigned name
= info
->output_semantic_name
[i
];
1149 unsigned index
= info
->output_semantic_index
[i
];
1151 /* The ARB_shader_viewport_layer_array spec contains the
1154 * 2) What happens if gl_ViewportIndex or gl_Layer is
1155 * written in the vertex shader and a geometry shader is
1158 * RESOLVED: The value written by the last vertex processing
1159 * stage is used. If the last vertex processing stage
1160 * (vertex, tessellation evaluation or geometry) does not
1161 * statically assign to gl_ViewportIndex or gl_Layer, index
1162 * or layer zero is assumed.
1164 * So writes to those outputs in VS-as-LS are simply ignored.
1166 if (name
== TGSI_SEMANTIC_LAYER
||
1167 name
== TGSI_SEMANTIC_VIEWPORT_INDEX
)
1170 int param
= si_shader_io_get_unique_index(name
, index
, false);
1171 LLVMValueRef dw_addr
= LLVMBuildAdd(ctx
->ac
.builder
, base_dw_addr
,
1172 LLVMConstInt(ctx
->i32
, param
* 4, 0), "");
1174 for (chan
= 0; chan
< 4; chan
++) {
1175 if (!(info
->output_usagemask
[i
] & (1 << chan
)))
1178 lshs_lds_store(ctx
, chan
, dw_addr
,
1179 LLVMBuildLoad(ctx
->ac
.builder
, addrs
[4 * i
+ chan
], ""));
1183 if (ctx
->screen
->info
.chip_class
>= GFX9
)
1184 si_set_ls_return_value_for_tcs(ctx
);
1188 * Compile the TCS epilog function. This writes tesselation factors to memory
1189 * based on the output primitive type of the tesselator (determined by TES).
1191 void si_llvm_build_tcs_epilog(struct si_shader_context
*ctx
,
1192 union si_shader_part_key
*key
)
1194 memset(&ctx
->args
, 0, sizeof(ctx
->args
));
1196 if (ctx
->screen
->info
.chip_class
>= GFX9
) {
1197 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1198 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1199 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
,
1200 &ctx
->tcs_offchip_offset
);
1201 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
); /* wave info */
1202 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
,
1203 &ctx
->tcs_factor_offset
);
1204 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1205 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1206 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1207 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1208 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1209 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1210 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1211 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1212 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1213 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1214 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1215 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
,
1216 &ctx
->tcs_offchip_layout
);
1217 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1218 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
,
1219 &ctx
->tcs_out_lds_layout
);
1221 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1222 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1223 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1224 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1225 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
,
1226 &ctx
->tcs_offchip_layout
);
1227 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1228 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
,
1229 &ctx
->tcs_out_lds_layout
);
1230 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1231 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
,
1232 &ctx
->tcs_offchip_offset
);
1233 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
,
1234 &ctx
->tcs_factor_offset
);
1237 ac_add_arg(&ctx
->args
, AC_ARG_VGPR
, 1, AC_ARG_INT
, NULL
); /* VGPR gap */
1238 ac_add_arg(&ctx
->args
, AC_ARG_VGPR
, 1, AC_ARG_INT
, NULL
); /* VGPR gap */
1239 struct ac_arg rel_patch_id
; /* patch index within the wave (REL_PATCH_ID) */
1240 ac_add_arg(&ctx
->args
, AC_ARG_VGPR
, 1, AC_ARG_INT
, &rel_patch_id
);
1241 struct ac_arg invocation_id
; /* invocation ID within the patch */
1242 ac_add_arg(&ctx
->args
, AC_ARG_VGPR
, 1, AC_ARG_INT
, &invocation_id
);
1243 struct ac_arg tcs_out_current_patch_data_offset
; /* LDS offset where tess factors should be loaded from */
1244 ac_add_arg(&ctx
->args
, AC_ARG_VGPR
, 1, AC_ARG_INT
,
1245 &tcs_out_current_patch_data_offset
);
1247 struct ac_arg tess_factors
[6];
1248 for (unsigned i
= 0; i
< 6; i
++)
1249 ac_add_arg(&ctx
->args
, AC_ARG_VGPR
, 1, AC_ARG_INT
, &tess_factors
[i
]);
1251 /* Create the function. */
1252 si_llvm_create_func(ctx
, "tcs_epilog", NULL
, 0,
1253 ctx
->screen
->info
.chip_class
>= GFX7
? 128 : 0);
1254 ac_declare_lds_as_pointer(&ctx
->ac
);
1256 LLVMValueRef invoc0_tess_factors
[6];
1257 for (unsigned i
= 0; i
< 6; i
++)
1258 invoc0_tess_factors
[i
] = ac_get_arg(&ctx
->ac
, tess_factors
[i
]);
1260 si_write_tess_factors(ctx
,
1261 ac_get_arg(&ctx
->ac
, rel_patch_id
),
1262 ac_get_arg(&ctx
->ac
, invocation_id
),
1263 ac_get_arg(&ctx
->ac
, tcs_out_current_patch_data_offset
),
1264 invoc0_tess_factors
, invoc0_tess_factors
+ 4);
1266 LLVMBuildRetVoid(ctx
->ac
.builder
);
1269 void si_llvm_init_tcs_callbacks(struct si_shader_context
*ctx
)
1271 ctx
->abi
.load_tess_varyings
= si_nir_load_tcs_varyings
;
1272 ctx
->abi
.load_tess_level
= si_load_tess_level
;
1273 ctx
->abi
.store_tcs_outputs
= si_nir_store_output_tcs
;
1274 ctx
->abi
.emit_outputs
= si_llvm_emit_tcs_epilogue
;
1275 ctx
->abi
.load_patch_vertices_in
= si_load_patch_vertices_in
;
1278 void si_llvm_init_tes_callbacks(struct si_shader_context
*ctx
)
1280 ctx
->abi
.load_tess_varyings
= si_nir_load_input_tes
;
1281 ctx
->abi
.load_tess_coord
= si_load_tess_coord
;
1282 ctx
->abi
.load_tess_level
= si_load_tess_level
;
1283 ctx
->abi
.load_patch_vertices_in
= si_load_patch_vertices_in
;