2 * Copyright 2020 Advanced Micro Devices, Inc.
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
26 #include "si_shader_internal.h"
29 static LLVMValueRef
get_rel_patch_id(struct si_shader_context
*ctx
)
32 case MESA_SHADER_TESS_CTRL
:
33 return si_unpack_param(ctx
, ctx
->args
.tcs_rel_ids
, 0, 8);
35 case MESA_SHADER_TESS_EVAL
:
36 return ac_get_arg(&ctx
->ac
, ctx
->tes_rel_patch_id
);
44 /* Tessellation shaders pass outputs to the next shader using LDS.
46 * LS outputs = TCS inputs
47 * TCS outputs = TES inputs
50 * - TCS inputs for patch 0
51 * - TCS inputs for patch 1
52 * - TCS inputs for patch 2 = get_tcs_in_current_patch_offset (if RelPatchID==2)
54 * - TCS outputs for patch 0 = get_tcs_out_patch0_offset
55 * - Per-patch TCS outputs for patch 0 = get_tcs_out_patch0_patch_data_offset
56 * - TCS outputs for patch 1
57 * - Per-patch TCS outputs for patch 1
58 * - TCS outputs for patch 2 = get_tcs_out_current_patch_offset (if RelPatchID==2)
59 * - Per-patch TCS outputs for patch 2 = get_tcs_out_current_patch_data_offset (if RelPatchID==2)
62 * All three shaders VS(LS), TCS, TES share the same LDS space.
65 static LLVMValueRef
get_tcs_in_patch_stride(struct si_shader_context
*ctx
)
67 return si_unpack_param(ctx
, ctx
->vs_state_bits
, 11, 13);
70 static unsigned get_tcs_out_vertex_dw_stride_constant(struct si_shader_context
*ctx
)
72 assert(ctx
->stage
== MESA_SHADER_TESS_CTRL
);
74 if (ctx
->shader
->key
.mono
.u
.ff_tcs_inputs_to_copy
)
75 return util_last_bit64(ctx
->shader
->key
.mono
.u
.ff_tcs_inputs_to_copy
) * 4;
77 return util_last_bit64(ctx
->shader
->selector
->outputs_written
) * 4;
80 static LLVMValueRef
get_tcs_out_vertex_dw_stride(struct si_shader_context
*ctx
)
82 unsigned stride
= get_tcs_out_vertex_dw_stride_constant(ctx
);
84 return LLVMConstInt(ctx
->ac
.i32
, stride
, 0);
87 static LLVMValueRef
get_tcs_out_patch_stride(struct si_shader_context
*ctx
)
89 if (ctx
->shader
->key
.mono
.u
.ff_tcs_inputs_to_copy
)
90 return si_unpack_param(ctx
, ctx
->tcs_out_lds_layout
, 0, 13);
92 const struct si_shader_info
*info
= &ctx
->shader
->selector
->info
;
93 unsigned tcs_out_vertices
= info
->base
.tess
.tcs_vertices_out
;
94 unsigned vertex_dw_stride
= get_tcs_out_vertex_dw_stride_constant(ctx
);
95 unsigned num_patch_outputs
= util_last_bit64(ctx
->shader
->selector
->patch_outputs_written
);
96 unsigned patch_dw_stride
= tcs_out_vertices
* vertex_dw_stride
+ num_patch_outputs
* 4;
97 return LLVMConstInt(ctx
->ac
.i32
, patch_dw_stride
, 0);
100 static LLVMValueRef
get_tcs_out_patch0_offset(struct si_shader_context
*ctx
)
102 return LLVMBuildMul(ctx
->ac
.builder
, si_unpack_param(ctx
, ctx
->tcs_out_lds_offsets
, 0, 16),
103 LLVMConstInt(ctx
->ac
.i32
, 4, 0), "");
106 static LLVMValueRef
get_tcs_out_patch0_patch_data_offset(struct si_shader_context
*ctx
)
108 return LLVMBuildMul(ctx
->ac
.builder
, si_unpack_param(ctx
, ctx
->tcs_out_lds_offsets
, 16, 16),
109 LLVMConstInt(ctx
->ac
.i32
, 4, 0), "");
112 static LLVMValueRef
get_tcs_in_current_patch_offset(struct si_shader_context
*ctx
)
114 LLVMValueRef patch_stride
= get_tcs_in_patch_stride(ctx
);
115 LLVMValueRef rel_patch_id
= get_rel_patch_id(ctx
);
117 return LLVMBuildMul(ctx
->ac
.builder
, patch_stride
, rel_patch_id
, "");
120 static LLVMValueRef
get_tcs_out_current_patch_offset(struct si_shader_context
*ctx
)
122 LLVMValueRef patch0_offset
= get_tcs_out_patch0_offset(ctx
);
123 LLVMValueRef patch_stride
= get_tcs_out_patch_stride(ctx
);
124 LLVMValueRef rel_patch_id
= get_rel_patch_id(ctx
);
126 return ac_build_imad(&ctx
->ac
, patch_stride
, rel_patch_id
, patch0_offset
);
129 static LLVMValueRef
get_tcs_out_current_patch_data_offset(struct si_shader_context
*ctx
)
131 LLVMValueRef patch0_patch_data_offset
= get_tcs_out_patch0_patch_data_offset(ctx
);
132 LLVMValueRef patch_stride
= get_tcs_out_patch_stride(ctx
);
133 LLVMValueRef rel_patch_id
= get_rel_patch_id(ctx
);
135 return ac_build_imad(&ctx
->ac
, patch_stride
, rel_patch_id
, patch0_patch_data_offset
);
138 static LLVMValueRef
get_num_tcs_out_vertices(struct si_shader_context
*ctx
)
140 unsigned tcs_out_vertices
=
141 ctx
->shader
->selector
? ctx
->shader
->selector
->info
.base
.tess
.tcs_vertices_out
144 /* If !tcs_out_vertices, it's either the fixed-func TCS or the TCS epilog. */
145 if (ctx
->stage
== MESA_SHADER_TESS_CTRL
&& tcs_out_vertices
)
146 return LLVMConstInt(ctx
->ac
.i32
, tcs_out_vertices
, 0);
148 return si_unpack_param(ctx
, ctx
->tcs_offchip_layout
, 6, 6);
151 static LLVMValueRef
get_tcs_in_vertex_dw_stride(struct si_shader_context
*ctx
)
155 switch (ctx
->stage
) {
156 case MESA_SHADER_VERTEX
:
157 stride
= ctx
->shader
->selector
->lshs_vertex_stride
/ 4;
158 return LLVMConstInt(ctx
->ac
.i32
, stride
, 0);
160 case MESA_SHADER_TESS_CTRL
:
161 if (ctx
->screen
->info
.chip_class
>= GFX9
&& ctx
->shader
->is_monolithic
) {
162 stride
= ctx
->shader
->key
.part
.tcs
.ls
->lshs_vertex_stride
/ 4;
163 return LLVMConstInt(ctx
->ac
.i32
, stride
, 0);
165 return si_unpack_param(ctx
, ctx
->vs_state_bits
, 24, 8);
174 get_dw_address_from_generic_indices(struct si_shader_context
*ctx
, LLVMValueRef vertex_dw_stride
,
175 LLVMValueRef base_addr
, LLVMValueRef vertex_index
,
176 LLVMValueRef param_index
, ubyte name
)
178 if (vertex_dw_stride
) {
179 base_addr
= ac_build_imad(&ctx
->ac
, vertex_index
, vertex_dw_stride
, base_addr
);
183 base_addr
= ac_build_imad(&ctx
->ac
, param_index
, LLVMConstInt(ctx
->ac
.i32
, 4, 0), base_addr
);
186 int param
= name
>= VARYING_SLOT_PATCH0
||
187 name
== VARYING_SLOT_TESS_LEVEL_INNER
||
188 name
== VARYING_SLOT_TESS_LEVEL_OUTER
189 ? si_shader_io_get_unique_index_patch(name
)
190 : si_shader_io_get_unique_index(name
, false);
192 /* Add the base address of the element. */
193 return LLVMBuildAdd(ctx
->ac
.builder
, base_addr
, LLVMConstInt(ctx
->ac
.i32
, param
* 4, 0), "");
196 /* The offchip buffer layout for TCS->TES is
198 * - attribute 0 of patch 0 vertex 0
199 * - attribute 0 of patch 0 vertex 1
200 * - attribute 0 of patch 0 vertex 2
202 * - attribute 0 of patch 1 vertex 0
203 * - attribute 0 of patch 1 vertex 1
205 * - attribute 1 of patch 0 vertex 0
206 * - attribute 1 of patch 0 vertex 1
208 * - per patch attribute 0 of patch 0
209 * - per patch attribute 0 of patch 1
212 * Note that every attribute has 4 components.
214 static LLVMValueRef
get_tcs_tes_buffer_address(struct si_shader_context
*ctx
,
215 LLVMValueRef rel_patch_id
, LLVMValueRef vertex_index
,
216 LLVMValueRef param_index
)
218 LLVMValueRef base_addr
, vertices_per_patch
, num_patches
, total_vertices
;
219 LLVMValueRef param_stride
, constant16
;
221 vertices_per_patch
= get_num_tcs_out_vertices(ctx
);
222 num_patches
= si_unpack_param(ctx
, ctx
->tcs_offchip_layout
, 0, 6);
223 total_vertices
= LLVMBuildMul(ctx
->ac
.builder
, vertices_per_patch
, num_patches
, "");
225 constant16
= LLVMConstInt(ctx
->ac
.i32
, 16, 0);
227 base_addr
= ac_build_imad(&ctx
->ac
, rel_patch_id
, vertices_per_patch
, vertex_index
);
228 param_stride
= total_vertices
;
230 base_addr
= rel_patch_id
;
231 param_stride
= num_patches
;
234 base_addr
= ac_build_imad(&ctx
->ac
, param_index
, param_stride
, base_addr
);
235 base_addr
= LLVMBuildMul(ctx
->ac
.builder
, base_addr
, constant16
, "");
238 LLVMValueRef patch_data_offset
= si_unpack_param(ctx
, ctx
->tcs_offchip_layout
, 12, 20);
240 base_addr
= LLVMBuildAdd(ctx
->ac
.builder
, base_addr
, patch_data_offset
, "");
245 static LLVMValueRef
get_tcs_tes_buffer_address_from_generic_indices(struct si_shader_context
*ctx
,
246 LLVMValueRef vertex_index
,
247 LLVMValueRef param_index
,
250 unsigned param_index_base
;
252 param_index_base
= name
>= VARYING_SLOT_PATCH0
||
253 name
== VARYING_SLOT_TESS_LEVEL_INNER
||
254 name
== VARYING_SLOT_TESS_LEVEL_OUTER
255 ? si_shader_io_get_unique_index_patch(name
)
256 : si_shader_io_get_unique_index(name
, false);
259 param_index
= LLVMBuildAdd(ctx
->ac
.builder
, param_index
,
260 LLVMConstInt(ctx
->ac
.i32
, param_index_base
, 0), "");
262 param_index
= LLVMConstInt(ctx
->ac
.i32
, param_index_base
, 0);
265 return get_tcs_tes_buffer_address(ctx
, get_rel_patch_id(ctx
), vertex_index
, param_index
);
268 static LLVMValueRef
buffer_load(struct si_shader_context
*ctx
, LLVMTypeRef type
, unsigned swizzle
,
269 LLVMValueRef buffer
, LLVMValueRef offset
, LLVMValueRef base
,
272 LLVMValueRef value
, value2
;
273 LLVMTypeRef vec_type
= LLVMVectorType(type
, 4);
276 value
= ac_build_buffer_load(&ctx
->ac
, buffer
, 4, NULL
, base
, offset
, 0, ac_glc
,
277 can_speculate
, false);
279 return LLVMBuildBitCast(ctx
->ac
.builder
, value
, vec_type
, "");
282 if (ac_get_type_size(type
) != 8) {
283 value
= ac_build_buffer_load(&ctx
->ac
, buffer
, 4, NULL
, base
, offset
, 0, ac_glc
,
284 can_speculate
, false);
286 value
= LLVMBuildBitCast(ctx
->ac
.builder
, value
, vec_type
, "");
287 return LLVMBuildExtractElement(ctx
->ac
.builder
, value
, LLVMConstInt(ctx
->ac
.i32
, swizzle
, 0),
291 value
= ac_build_buffer_load(&ctx
->ac
, buffer
, 1, NULL
, base
, offset
, swizzle
* 4, ac_glc
,
292 can_speculate
, false);
294 value2
= ac_build_buffer_load(&ctx
->ac
, buffer
, 1, NULL
, base
, offset
, swizzle
* 4 + 4, ac_glc
,
295 can_speculate
, false);
297 return si_build_gather_64bit(ctx
, type
, value
, value2
);
301 * Load from LSHS LDS storage.
303 * \param type output value type
304 * \param swizzle offset (typically 0..3); it can be ~0, which loads a vec4
305 * \param dw_addr address in dwords
307 static LLVMValueRef
lshs_lds_load(struct si_shader_context
*ctx
, LLVMTypeRef type
, unsigned swizzle
,
308 LLVMValueRef dw_addr
)
313 LLVMValueRef values
[4];
315 for (unsigned chan
= 0; chan
< 4; chan
++)
316 values
[chan
] = lshs_lds_load(ctx
, type
, chan
, dw_addr
);
318 return ac_build_gather_values(&ctx
->ac
, values
, 4);
321 /* Split 64-bit loads. */
322 if (ac_get_type_size(type
) == 8) {
325 lo
= lshs_lds_load(ctx
, ctx
->ac
.i32
, swizzle
, dw_addr
);
326 hi
= lshs_lds_load(ctx
, ctx
->ac
.i32
, swizzle
+ 1, dw_addr
);
327 return si_build_gather_64bit(ctx
, type
, lo
, hi
);
330 dw_addr
= LLVMBuildAdd(ctx
->ac
.builder
, dw_addr
, LLVMConstInt(ctx
->ac
.i32
, swizzle
, 0), "");
332 value
= ac_lds_load(&ctx
->ac
, dw_addr
);
334 return LLVMBuildBitCast(ctx
->ac
.builder
, value
, type
, "");
338 * Store to LSHS LDS storage.
340 * \param swizzle offset (typically 0..3)
341 * \param dw_addr address in dwords
342 * \param value value to store
344 static void lshs_lds_store(struct si_shader_context
*ctx
, unsigned dw_offset_imm
,
345 LLVMValueRef dw_addr
, LLVMValueRef value
)
348 LLVMBuildAdd(ctx
->ac
.builder
, dw_addr
, LLVMConstInt(ctx
->ac
.i32
, dw_offset_imm
, 0), "");
350 ac_lds_store(&ctx
->ac
, dw_addr
, value
);
356 TESS_OFFCHIP_RING_TCS
,
357 TESS_OFFCHIP_RING_TES
,
360 static LLVMValueRef
get_tess_ring_descriptor(struct si_shader_context
*ctx
, enum si_tess_ring ring
)
362 LLVMBuilderRef builder
= ctx
->ac
.builder
;
363 LLVMValueRef addr
= ac_get_arg(
364 &ctx
->ac
, ring
== TESS_OFFCHIP_RING_TES
? ctx
->tes_offchip_addr
: ctx
->tcs_out_lds_layout
);
366 /* TCS only receives high 13 bits of the address. */
367 if (ring
== TESS_OFFCHIP_RING_TCS
|| ring
== TCS_FACTOR_RING
) {
368 addr
= LLVMBuildAnd(builder
, addr
, LLVMConstInt(ctx
->ac
.i32
, 0xfff80000, 0), "");
371 if (ring
== TCS_FACTOR_RING
) {
372 unsigned tf_offset
= ctx
->screen
->tess_offchip_ring_size
;
373 addr
= LLVMBuildAdd(builder
, addr
, LLVMConstInt(ctx
->ac
.i32
, tf_offset
, 0), "");
376 uint32_t rsrc3
= S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X
) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y
) |
377 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z
) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W
);
379 if (ctx
->screen
->info
.chip_class
>= GFX10
)
380 rsrc3
|= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT
) |
381 S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW
) | S_008F0C_RESOURCE_LEVEL(1);
383 rsrc3
|= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT
) |
384 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32
);
386 LLVMValueRef desc
[4];
388 desc
[1] = LLVMConstInt(ctx
->ac
.i32
, S_008F04_BASE_ADDRESS_HI(ctx
->screen
->info
.address32_hi
), 0);
389 desc
[2] = LLVMConstInt(ctx
->ac
.i32
, 0xffffffff, 0);
390 desc
[3] = LLVMConstInt(ctx
->ac
.i32
, rsrc3
, false);
392 return ac_build_gather_values(&ctx
->ac
, desc
, 4);
395 void si_llvm_preload_tes_rings(struct si_shader_context
*ctx
)
397 ctx
->tess_offchip_ring
= get_tess_ring_descriptor(ctx
, TESS_OFFCHIP_RING_TES
);
400 static LLVMValueRef
si_nir_load_tcs_varyings(struct ac_shader_abi
*abi
, LLVMTypeRef type
,
401 LLVMValueRef vertex_index
, LLVMValueRef param_index
,
402 unsigned const_index
, unsigned location
,
403 unsigned driver_location
, unsigned component
,
404 unsigned num_components
, bool unused
,
405 bool is_compact
, bool load_input
)
407 struct si_shader_context
*ctx
= si_shader_context_from_abi(abi
);
408 struct si_shader_info
*info
= &ctx
->shader
->selector
->info
;
409 LLVMValueRef dw_addr
, stride
;
412 driver_location
= driver_location
/ 4;
415 semantic
= info
->input_semantic
[driver_location
];
417 semantic
= info
->output_semantic
[driver_location
];
420 bool is_patch
= vertex_index
== NULL
;
421 assert((semantic
>= VARYING_SLOT_PATCH0
||
422 semantic
== VARYING_SLOT_TESS_LEVEL_INNER
||
423 semantic
== VARYING_SLOT_TESS_LEVEL_OUTER
) == is_patch
);
426 stride
= get_tcs_in_vertex_dw_stride(ctx
);
427 dw_addr
= get_tcs_in_current_patch_offset(ctx
);
431 dw_addr
= get_tcs_out_current_patch_data_offset(ctx
);
433 stride
= get_tcs_out_vertex_dw_stride(ctx
);
434 dw_addr
= get_tcs_out_current_patch_offset(ctx
);
439 param_index
= LLVMConstInt(ctx
->ac
.i32
, const_index
, 0);
442 dw_addr
= get_dw_address_from_generic_indices(ctx
, stride
, dw_addr
, vertex_index
, param_index
,
445 LLVMValueRef value
[4];
446 for (unsigned i
= 0; i
< num_components
; i
++) {
448 if (ac_get_type_size(type
) == 8)
452 value
[i
+ component
] = lshs_lds_load(ctx
, type
, offset
, dw_addr
);
455 return ac_build_varying_gather_values(&ctx
->ac
, value
, num_components
, component
);
458 static LLVMValueRef
si_nir_load_input_tes(struct ac_shader_abi
*abi
, LLVMTypeRef type
,
459 LLVMValueRef vertex_index
, LLVMValueRef param_index
,
460 unsigned const_index
, unsigned location
,
461 unsigned driver_location
, unsigned component
,
462 unsigned num_components
, bool unused
, bool is_compact
,
465 struct si_shader_context
*ctx
= si_shader_context_from_abi(abi
);
466 struct si_shader_info
*info
= &ctx
->shader
->selector
->info
;
467 LLVMValueRef base
, addr
;
469 driver_location
= driver_location
/ 4;
470 ubyte semantic
= info
->input_semantic
[driver_location
];
472 assert((semantic
>= VARYING_SLOT_PATCH0
||
473 semantic
== VARYING_SLOT_TESS_LEVEL_INNER
||
474 semantic
== VARYING_SLOT_TESS_LEVEL_OUTER
) == (vertex_index
== NULL
));
476 base
= ac_get_arg(&ctx
->ac
, ctx
->tcs_offchip_offset
);
479 param_index
= LLVMConstInt(ctx
->ac
.i32
, const_index
, 0);
483 get_tcs_tes_buffer_address_from_generic_indices(ctx
, vertex_index
, param_index
, semantic
);
485 /* TODO: This will generate rather ordinary llvm code, although it
486 * should be easy for the optimiser to fix up. In future we might want
487 * to refactor buffer_load().
489 LLVMValueRef value
[4];
490 for (unsigned i
= 0; i
< num_components
; i
++) {
492 if (ac_get_type_size(type
) == 8) {
495 ubyte semantic
= info
->input_semantic
[driver_location
+ 1];
496 addr
= get_tcs_tes_buffer_address_from_generic_indices(ctx
, vertex_index
, param_index
,
504 value
[i
+ component
] =
505 buffer_load(ctx
, type
, offset
, ctx
->tess_offchip_ring
, base
, addr
, true);
508 return ac_build_varying_gather_values(&ctx
->ac
, value
, num_components
, component
);
511 static void si_nir_store_output_tcs(struct ac_shader_abi
*abi
, const struct nir_variable
*var
,
512 LLVMValueRef vertex_index
, LLVMValueRef param_index
,
513 unsigned const_index
, LLVMValueRef src
, unsigned writemask
,
514 unsigned component
, unsigned driver_location
)
516 struct si_shader_context
*ctx
= si_shader_context_from_abi(abi
);
517 struct si_shader_info
*info
= &ctx
->shader
->selector
->info
;
518 LLVMValueRef dw_addr
, stride
;
519 LLVMValueRef buffer
, base
, addr
;
520 LLVMValueRef values
[8];
521 bool is_tess_factor
= false, is_tess_inner
= false;
523 driver_location
= driver_location
/ 4;
524 ubyte semantic
= info
->output_semantic
[driver_location
];
526 bool is_const
= !param_index
;
528 param_index
= LLVMConstInt(ctx
->ac
.i32
, const_index
, 0);
530 const bool is_patch
= vertex_index
== NULL
;
532 /* Invalid SPIR-V can cause this. */
533 if ((semantic
>= VARYING_SLOT_PATCH0
|| semantic
== VARYING_SLOT_TESS_LEVEL_INNER
||
534 semantic
== VARYING_SLOT_TESS_LEVEL_OUTER
) != is_patch
)
538 stride
= get_tcs_out_vertex_dw_stride(ctx
);
539 dw_addr
= get_tcs_out_current_patch_offset(ctx
);
540 dw_addr
= get_dw_address_from_generic_indices(ctx
, stride
, dw_addr
, vertex_index
, param_index
,
543 dw_addr
= get_tcs_out_current_patch_data_offset(ctx
);
544 dw_addr
= get_dw_address_from_generic_indices(ctx
, NULL
, dw_addr
, vertex_index
, param_index
,
547 if (is_const
&& const_index
== 0) {
548 int semantic
= info
->output_semantic
[driver_location
];
550 /* Always write tess factors into LDS for the TCS epilog. */
551 if (semantic
== VARYING_SLOT_TESS_LEVEL_INNER
||
552 semantic
== VARYING_SLOT_TESS_LEVEL_OUTER
) {
553 is_tess_factor
= true;
554 is_tess_inner
= semantic
== VARYING_SLOT_TESS_LEVEL_INNER
;
559 buffer
= get_tess_ring_descriptor(ctx
, TESS_OFFCHIP_RING_TCS
);
561 base
= ac_get_arg(&ctx
->ac
, ctx
->tcs_offchip_offset
);
564 get_tcs_tes_buffer_address_from_generic_indices(ctx
, vertex_index
, param_index
, semantic
);
566 for (unsigned chan
= component
; chan
< 8; chan
++) {
567 if (!(writemask
& (1 << chan
)))
569 LLVMValueRef value
= ac_llvm_extract_elem(&ctx
->ac
, src
, chan
- component
);
571 unsigned buffer_store_offset
= chan
% 4;
573 ubyte semantic
= info
->output_semantic
[driver_location
+ 1];
574 addr
= get_tcs_tes_buffer_address_from_generic_indices(ctx
, vertex_index
, param_index
,
578 /* Skip LDS stores if there is no LDS read of this output. */
579 if (info
->output_readmask
[driver_location
+ chan
/ 4] & (1 << (chan
% 4)) ||
580 /* The epilog reads LDS if invocation 0 doesn't define tess factors. */
582 !ctx
->shader
->selector
->info
.tessfactors_are_def_in_all_invocs
))
583 lshs_lds_store(ctx
, chan
, dw_addr
, value
);
585 value
= ac_to_integer(&ctx
->ac
, value
);
586 values
[chan
] = value
;
588 if (writemask
!= 0xF && !is_tess_factor
) {
589 ac_build_buffer_store_dword(&ctx
->ac
, buffer
, value
, 1, addr
, base
,
590 4 * buffer_store_offset
, ac_glc
);
593 /* Write tess factors into VGPRs for the epilog. */
594 if (is_tess_factor
&& ctx
->shader
->selector
->info
.tessfactors_are_def_in_all_invocs
) {
595 if (!is_tess_inner
) {
596 LLVMBuildStore(ctx
->ac
.builder
, value
, /* outer */
597 ctx
->invoc0_tess_factors
[chan
]);
598 } else if (chan
< 2) {
599 LLVMBuildStore(ctx
->ac
.builder
, value
, /* inner */
600 ctx
->invoc0_tess_factors
[4 + chan
]);
605 if (writemask
== 0xF && !is_tess_factor
) {
606 LLVMValueRef value
= ac_build_gather_values(&ctx
->ac
, values
, 4);
607 ac_build_buffer_store_dword(&ctx
->ac
, buffer
, value
, 4, addr
, base
, 0, ac_glc
);
611 static LLVMValueRef
si_load_tess_coord(struct ac_shader_abi
*abi
)
613 struct si_shader_context
*ctx
= si_shader_context_from_abi(abi
);
614 LLVMValueRef coord
[4] = {ac_get_arg(&ctx
->ac
, ctx
->tes_u
), ac_get_arg(&ctx
->ac
, ctx
->tes_v
),
615 ctx
->ac
.f32_0
, ctx
->ac
.f32_0
};
617 /* For triangles, the vector should be (u, v, 1-u-v). */
618 if (ctx
->shader
->selector
->info
.properties
[TGSI_PROPERTY_TES_PRIM_MODE
] == PIPE_PRIM_TRIANGLES
) {
619 coord
[2] = LLVMBuildFSub(ctx
->ac
.builder
, ctx
->ac
.f32_1
,
620 LLVMBuildFAdd(ctx
->ac
.builder
, coord
[0], coord
[1], ""), "");
622 return ac_build_gather_values(&ctx
->ac
, coord
, 4);
625 static LLVMValueRef
load_tess_level(struct si_shader_context
*ctx
, unsigned semantic
)
627 LLVMValueRef base
, addr
;
629 int param
= si_shader_io_get_unique_index_patch(semantic
);
631 base
= ac_get_arg(&ctx
->ac
, ctx
->tcs_offchip_offset
);
632 addr
= get_tcs_tes_buffer_address(ctx
, get_rel_patch_id(ctx
), NULL
,
633 LLVMConstInt(ctx
->ac
.i32
, param
, 0));
635 return buffer_load(ctx
, ctx
->ac
.f32
, ~0, ctx
->tess_offchip_ring
, base
, addr
, true);
638 static LLVMValueRef
load_tess_level_default(struct si_shader_context
*ctx
, unsigned sysval
)
640 LLVMValueRef buf
, slot
, val
[4];
643 slot
= LLVMConstInt(ctx
->ac
.i32
, SI_HS_CONST_DEFAULT_TESS_LEVELS
, 0);
644 buf
= ac_get_arg(&ctx
->ac
, ctx
->rw_buffers
);
645 buf
= ac_build_load_to_sgpr(&ctx
->ac
, buf
, slot
);
646 offset
= sysval
== SYSTEM_VALUE_TESS_LEVEL_INNER_DEFAULT
? 4 : 0;
648 for (i
= 0; i
< 4; i
++)
649 val
[i
] = si_buffer_load_const(ctx
, buf
, LLVMConstInt(ctx
->ac
.i32
, (offset
+ i
) * 4, 0));
650 return ac_build_gather_values(&ctx
->ac
, val
, 4);
653 static LLVMValueRef
si_load_tess_level(struct ac_shader_abi
*abi
, unsigned varying_id
,
654 bool load_default_state
)
656 struct si_shader_context
*ctx
= si_shader_context_from_abi(abi
);
659 if (load_default_state
) {
660 switch (varying_id
) {
661 case VARYING_SLOT_TESS_LEVEL_INNER
:
662 semantic
= SYSTEM_VALUE_TESS_LEVEL_INNER_DEFAULT
;
664 case VARYING_SLOT_TESS_LEVEL_OUTER
:
665 semantic
= SYSTEM_VALUE_TESS_LEVEL_OUTER_DEFAULT
;
668 unreachable("unknown tess level");
670 return load_tess_level_default(ctx
, semantic
);
673 switch (varying_id
) {
674 case VARYING_SLOT_TESS_LEVEL_INNER
:
675 semantic
= VARYING_SLOT_TESS_LEVEL_INNER
;
677 case VARYING_SLOT_TESS_LEVEL_OUTER
:
678 semantic
= VARYING_SLOT_TESS_LEVEL_OUTER
;
681 unreachable("unknown tess level");
684 return load_tess_level(ctx
, semantic
);
687 static LLVMValueRef
si_load_patch_vertices_in(struct ac_shader_abi
*abi
)
689 struct si_shader_context
*ctx
= si_shader_context_from_abi(abi
);
690 if (ctx
->stage
== MESA_SHADER_TESS_CTRL
)
691 return si_unpack_param(ctx
, ctx
->tcs_out_lds_layout
, 13, 6);
692 else if (ctx
->stage
== MESA_SHADER_TESS_EVAL
)
693 return get_num_tcs_out_vertices(ctx
);
695 unreachable("invalid shader stage for VERTICESIN");
699 * Forward all outputs from the vertex shader to the TES. This is only used
700 * for the fixed function TCS.
702 static void si_copy_tcs_inputs(struct si_shader_context
*ctx
)
704 LLVMValueRef invocation_id
, buffer
, buffer_offset
;
705 LLVMValueRef lds_vertex_stride
, lds_base
;
708 invocation_id
= si_unpack_param(ctx
, ctx
->args
.tcs_rel_ids
, 8, 5);
709 buffer
= get_tess_ring_descriptor(ctx
, TESS_OFFCHIP_RING_TCS
);
710 buffer_offset
= ac_get_arg(&ctx
->ac
, ctx
->tcs_offchip_offset
);
712 lds_vertex_stride
= get_tcs_in_vertex_dw_stride(ctx
);
713 lds_base
= get_tcs_in_current_patch_offset(ctx
);
714 lds_base
= ac_build_imad(&ctx
->ac
, invocation_id
, lds_vertex_stride
, lds_base
);
716 inputs
= ctx
->shader
->key
.mono
.u
.ff_tcs_inputs_to_copy
;
718 unsigned i
= u_bit_scan64(&inputs
);
720 LLVMValueRef lds_ptr
=
721 LLVMBuildAdd(ctx
->ac
.builder
, lds_base
, LLVMConstInt(ctx
->ac
.i32
, 4 * i
, 0), "");
723 LLVMValueRef buffer_addr
= get_tcs_tes_buffer_address(
724 ctx
, get_rel_patch_id(ctx
), invocation_id
, LLVMConstInt(ctx
->ac
.i32
, i
, 0));
726 LLVMValueRef value
= lshs_lds_load(ctx
, ctx
->ac
.i32
, ~0, lds_ptr
);
728 ac_build_buffer_store_dword(&ctx
->ac
, buffer
, value
, 4, buffer_addr
, buffer_offset
, 0,
733 static void si_write_tess_factors(struct si_shader_context
*ctx
, LLVMValueRef rel_patch_id
,
734 LLVMValueRef invocation_id
,
735 LLVMValueRef tcs_out_current_patch_data_offset
,
736 LLVMValueRef invoc0_tf_outer
[4], LLVMValueRef invoc0_tf_inner
[2])
738 struct si_shader
*shader
= ctx
->shader
;
739 unsigned tess_inner_index
, tess_outer_index
;
740 LLVMValueRef lds_base
, lds_inner
, lds_outer
, byteoffset
, buffer
;
741 LLVMValueRef out
[6], vec0
, vec1
, tf_base
, inner
[4], outer
[4];
742 unsigned stride
, outer_comps
, inner_comps
, i
, offset
;
744 /* Add a barrier before loading tess factors from LDS. */
745 if (!shader
->key
.part
.tcs
.epilog
.invoc0_tess_factors_are_def
)
746 si_llvm_emit_barrier(ctx
);
748 /* Do this only for invocation 0, because the tess levels are per-patch,
751 * This can't jump, because invocation 0 executes this. It should
752 * at least mask out the loads and stores for other invocations.
754 ac_build_ifcc(&ctx
->ac
,
755 LLVMBuildICmp(ctx
->ac
.builder
, LLVMIntEQ
, invocation_id
, ctx
->ac
.i32_0
, ""), 6503);
757 /* Determine the layout of one tess factor element in the buffer. */
758 switch (shader
->key
.part
.tcs
.epilog
.prim_mode
) {
759 case PIPE_PRIM_LINES
:
760 stride
= 2; /* 2 dwords, 1 vec2 store */
764 case PIPE_PRIM_TRIANGLES
:
765 stride
= 4; /* 4 dwords, 1 vec4 store */
769 case PIPE_PRIM_QUADS
:
770 stride
= 6; /* 6 dwords, 2 stores (vec4 + vec2) */
779 for (i
= 0; i
< 4; i
++) {
780 inner
[i
] = LLVMGetUndef(ctx
->ac
.i32
);
781 outer
[i
] = LLVMGetUndef(ctx
->ac
.i32
);
784 if (shader
->key
.part
.tcs
.epilog
.invoc0_tess_factors_are_def
) {
785 /* Tess factors are in VGPRs. */
786 for (i
= 0; i
< outer_comps
; i
++)
787 outer
[i
] = out
[i
] = invoc0_tf_outer
[i
];
788 for (i
= 0; i
< inner_comps
; i
++)
789 inner
[i
] = out
[outer_comps
+ i
] = invoc0_tf_inner
[i
];
791 /* Load tess_inner and tess_outer from LDS.
792 * Any invocation can write them, so we can't get them from a temporary.
794 tess_inner_index
= si_shader_io_get_unique_index_patch(VARYING_SLOT_TESS_LEVEL_INNER
);
795 tess_outer_index
= si_shader_io_get_unique_index_patch(VARYING_SLOT_TESS_LEVEL_OUTER
);
797 lds_base
= tcs_out_current_patch_data_offset
;
798 lds_inner
= LLVMBuildAdd(ctx
->ac
.builder
, lds_base
,
799 LLVMConstInt(ctx
->ac
.i32
, tess_inner_index
* 4, 0), "");
800 lds_outer
= LLVMBuildAdd(ctx
->ac
.builder
, lds_base
,
801 LLVMConstInt(ctx
->ac
.i32
, tess_outer_index
* 4, 0), "");
803 for (i
= 0; i
< outer_comps
; i
++) {
804 outer
[i
] = out
[i
] = lshs_lds_load(ctx
, ctx
->ac
.i32
, i
, lds_outer
);
806 for (i
= 0; i
< inner_comps
; i
++) {
807 inner
[i
] = out
[outer_comps
+ i
] = lshs_lds_load(ctx
, ctx
->ac
.i32
, i
, lds_inner
);
811 if (shader
->key
.part
.tcs
.epilog
.prim_mode
== PIPE_PRIM_LINES
) {
812 /* For isolines, the hardware expects tess factors in the
813 * reverse order from what NIR specifies.
815 LLVMValueRef tmp
= out
[0];
820 /* Convert the outputs to vectors for stores. */
821 vec0
= ac_build_gather_values(&ctx
->ac
, out
, MIN2(stride
, 4));
825 vec1
= ac_build_gather_values(&ctx
->ac
, out
+ 4, stride
- 4);
827 /* Get the buffer. */
828 buffer
= get_tess_ring_descriptor(ctx
, TCS_FACTOR_RING
);
830 /* Get the offset. */
831 tf_base
= ac_get_arg(&ctx
->ac
, ctx
->tcs_factor_offset
);
833 LLVMBuildMul(ctx
->ac
.builder
, rel_patch_id
, LLVMConstInt(ctx
->ac
.i32
, 4 * stride
, 0), "");
835 ac_build_ifcc(&ctx
->ac
,
836 LLVMBuildICmp(ctx
->ac
.builder
, LLVMIntEQ
, rel_patch_id
, ctx
->ac
.i32_0
, ""), 6504);
838 /* Store the dynamic HS control word. */
840 if (ctx
->screen
->info
.chip_class
<= GFX8
) {
841 ac_build_buffer_store_dword(&ctx
->ac
, buffer
, LLVMConstInt(ctx
->ac
.i32
, 0x80000000, 0), 1,
842 ctx
->ac
.i32_0
, tf_base
, offset
, ac_glc
);
846 ac_build_endif(&ctx
->ac
, 6504);
848 /* Store the tessellation factors. */
849 ac_build_buffer_store_dword(&ctx
->ac
, buffer
, vec0
, MIN2(stride
, 4), byteoffset
, tf_base
, offset
,
853 ac_build_buffer_store_dword(&ctx
->ac
, buffer
, vec1
, stride
- 4, byteoffset
, tf_base
, offset
,
856 /* Store the tess factors into the offchip buffer if TES reads them. */
857 if (shader
->key
.part
.tcs
.epilog
.tes_reads_tess_factors
) {
858 LLVMValueRef buf
, base
, inner_vec
, outer_vec
, tf_outer_offset
;
859 LLVMValueRef tf_inner_offset
;
860 unsigned param_outer
, param_inner
;
862 buf
= get_tess_ring_descriptor(ctx
, TESS_OFFCHIP_RING_TCS
);
863 base
= ac_get_arg(&ctx
->ac
, ctx
->tcs_offchip_offset
);
865 param_outer
= si_shader_io_get_unique_index_patch(VARYING_SLOT_TESS_LEVEL_OUTER
);
866 tf_outer_offset
= get_tcs_tes_buffer_address(ctx
, rel_patch_id
, NULL
,
867 LLVMConstInt(ctx
->ac
.i32
, param_outer
, 0));
869 unsigned outer_vec_size
= ac_has_vec3_support(ctx
->screen
->info
.chip_class
, false)
871 : util_next_power_of_two(outer_comps
);
872 outer_vec
= ac_build_gather_values(&ctx
->ac
, outer
, outer_vec_size
);
874 ac_build_buffer_store_dword(&ctx
->ac
, buf
, outer_vec
, outer_comps
, tf_outer_offset
, base
, 0,
877 param_inner
= si_shader_io_get_unique_index_patch(VARYING_SLOT_TESS_LEVEL_INNER
);
878 tf_inner_offset
= get_tcs_tes_buffer_address(ctx
, rel_patch_id
, NULL
,
879 LLVMConstInt(ctx
->ac
.i32
, param_inner
, 0));
882 inner_comps
== 1 ? inner
[0] : ac_build_gather_values(&ctx
->ac
, inner
, inner_comps
);
883 ac_build_buffer_store_dword(&ctx
->ac
, buf
, inner_vec
, inner_comps
, tf_inner_offset
, base
,
888 ac_build_endif(&ctx
->ac
, 6503);
891 /* This only writes the tessellation factor levels. */
892 static void si_llvm_emit_tcs_epilogue(struct ac_shader_abi
*abi
, unsigned max_outputs
,
895 struct si_shader_context
*ctx
= si_shader_context_from_abi(abi
);
896 LLVMBuilderRef builder
= ctx
->ac
.builder
;
897 LLVMValueRef rel_patch_id
, invocation_id
, tf_lds_offset
;
899 si_copy_tcs_inputs(ctx
);
901 rel_patch_id
= get_rel_patch_id(ctx
);
902 invocation_id
= si_unpack_param(ctx
, ctx
->args
.tcs_rel_ids
, 8, 5);
903 tf_lds_offset
= get_tcs_out_current_patch_data_offset(ctx
);
905 if (ctx
->screen
->info
.chip_class
>= GFX9
) {
906 LLVMBasicBlockRef blocks
[2] = {LLVMGetInsertBlock(builder
), ctx
->merged_wrap_if_entry_block
};
907 LLVMValueRef values
[2];
909 ac_build_endif(&ctx
->ac
, ctx
->merged_wrap_if_label
);
911 values
[0] = rel_patch_id
;
912 values
[1] = LLVMGetUndef(ctx
->ac
.i32
);
913 rel_patch_id
= ac_build_phi(&ctx
->ac
, ctx
->ac
.i32
, 2, values
, blocks
);
915 values
[0] = tf_lds_offset
;
916 values
[1] = LLVMGetUndef(ctx
->ac
.i32
);
917 tf_lds_offset
= ac_build_phi(&ctx
->ac
, ctx
->ac
.i32
, 2, values
, blocks
);
919 values
[0] = invocation_id
;
920 values
[1] = ctx
->ac
.i32_1
; /* cause the epilog to skip threads */
921 invocation_id
= ac_build_phi(&ctx
->ac
, ctx
->ac
.i32
, 2, values
, blocks
);
924 /* Return epilog parameters from this function. */
925 LLVMValueRef ret
= ctx
->return_value
;
928 if (ctx
->screen
->info
.chip_class
>= GFX9
) {
930 si_insert_input_ret(ctx
, ret
, ctx
->tcs_offchip_layout
, 8 + GFX9_SGPR_TCS_OFFCHIP_LAYOUT
);
931 ret
= si_insert_input_ret(ctx
, ret
, ctx
->tcs_out_lds_layout
, 8 + GFX9_SGPR_TCS_OUT_LAYOUT
);
932 /* Tess offchip and tess factor offsets are at the beginning. */
933 ret
= si_insert_input_ret(ctx
, ret
, ctx
->tcs_offchip_offset
, 2);
934 ret
= si_insert_input_ret(ctx
, ret
, ctx
->tcs_factor_offset
, 4);
935 vgpr
= 8 + GFX9_SGPR_TCS_OUT_LAYOUT
+ 1;
937 ret
= si_insert_input_ret(ctx
, ret
, ctx
->tcs_offchip_layout
, GFX6_SGPR_TCS_OFFCHIP_LAYOUT
);
938 ret
= si_insert_input_ret(ctx
, ret
, ctx
->tcs_out_lds_layout
, GFX6_SGPR_TCS_OUT_LAYOUT
);
939 /* Tess offchip and tess factor offsets are after user SGPRs. */
940 ret
= si_insert_input_ret(ctx
, ret
, ctx
->tcs_offchip_offset
, GFX6_TCS_NUM_USER_SGPR
);
941 ret
= si_insert_input_ret(ctx
, ret
, ctx
->tcs_factor_offset
, GFX6_TCS_NUM_USER_SGPR
+ 1);
942 vgpr
= GFX6_TCS_NUM_USER_SGPR
+ 2;
946 rel_patch_id
= ac_to_float(&ctx
->ac
, rel_patch_id
);
947 invocation_id
= ac_to_float(&ctx
->ac
, invocation_id
);
948 tf_lds_offset
= ac_to_float(&ctx
->ac
, tf_lds_offset
);
950 /* Leave a hole corresponding to the two input VGPRs. This ensures that
951 * the invocation_id output does not alias the tcs_rel_ids input,
952 * which saves a V_MOV on gfx9.
956 ret
= LLVMBuildInsertValue(builder
, ret
, rel_patch_id
, vgpr
++, "");
957 ret
= LLVMBuildInsertValue(builder
, ret
, invocation_id
, vgpr
++, "");
959 if (ctx
->shader
->selector
->info
.tessfactors_are_def_in_all_invocs
) {
960 vgpr
++; /* skip the tess factor LDS offset */
961 for (unsigned i
= 0; i
< 6; i
++) {
962 LLVMValueRef value
= LLVMBuildLoad(builder
, ctx
->invoc0_tess_factors
[i
], "");
963 value
= ac_to_float(&ctx
->ac
, value
);
964 ret
= LLVMBuildInsertValue(builder
, ret
, value
, vgpr
++, "");
967 ret
= LLVMBuildInsertValue(builder
, ret
, tf_lds_offset
, vgpr
++, "");
969 ctx
->return_value
= ret
;
972 /* Pass TCS inputs from LS to TCS on GFX9. */
973 static void si_set_ls_return_value_for_tcs(struct si_shader_context
*ctx
)
975 LLVMValueRef ret
= ctx
->return_value
;
977 ret
= si_insert_input_ptr(ctx
, ret
, ctx
->other_const_and_shader_buffers
, 0);
978 ret
= si_insert_input_ptr(ctx
, ret
, ctx
->other_samplers_and_images
, 1);
979 ret
= si_insert_input_ret(ctx
, ret
, ctx
->tcs_offchip_offset
, 2);
980 ret
= si_insert_input_ret(ctx
, ret
, ctx
->merged_wave_info
, 3);
981 ret
= si_insert_input_ret(ctx
, ret
, ctx
->tcs_factor_offset
, 4);
982 ret
= si_insert_input_ret(ctx
, ret
, ctx
->merged_scratch_offset
, 5);
984 ret
= si_insert_input_ptr(ctx
, ret
, ctx
->rw_buffers
, 8 + SI_SGPR_RW_BUFFERS
);
985 ret
= si_insert_input_ptr(ctx
, ret
, ctx
->bindless_samplers_and_images
,
986 8 + SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES
);
988 ret
= si_insert_input_ret(ctx
, ret
, ctx
->vs_state_bits
, 8 + SI_SGPR_VS_STATE_BITS
);
990 ret
= si_insert_input_ret(ctx
, ret
, ctx
->tcs_offchip_layout
, 8 + GFX9_SGPR_TCS_OFFCHIP_LAYOUT
);
991 ret
= si_insert_input_ret(ctx
, ret
, ctx
->tcs_out_lds_offsets
, 8 + GFX9_SGPR_TCS_OUT_OFFSETS
);
992 ret
= si_insert_input_ret(ctx
, ret
, ctx
->tcs_out_lds_layout
, 8 + GFX9_SGPR_TCS_OUT_LAYOUT
);
994 unsigned vgpr
= 8 + GFX9_TCS_NUM_USER_SGPR
;
995 ret
= LLVMBuildInsertValue(ctx
->ac
.builder
, ret
,
996 ac_to_float(&ctx
->ac
, ac_get_arg(&ctx
->ac
, ctx
->args
.tcs_patch_id
)),
998 ret
= LLVMBuildInsertValue(ctx
->ac
.builder
, ret
,
999 ac_to_float(&ctx
->ac
, ac_get_arg(&ctx
->ac
, ctx
->args
.tcs_rel_ids
)),
1001 ctx
->return_value
= ret
;
1004 void si_llvm_emit_ls_epilogue(struct ac_shader_abi
*abi
, unsigned max_outputs
, LLVMValueRef
*addrs
)
1006 struct si_shader_context
*ctx
= si_shader_context_from_abi(abi
);
1007 struct si_shader
*shader
= ctx
->shader
;
1008 struct si_shader_info
*info
= &shader
->selector
->info
;
1010 LLVMValueRef vertex_id
= ac_get_arg(&ctx
->ac
, ctx
->rel_auto_id
);
1011 LLVMValueRef vertex_dw_stride
= get_tcs_in_vertex_dw_stride(ctx
);
1012 LLVMValueRef base_dw_addr
= LLVMBuildMul(ctx
->ac
.builder
, vertex_id
, vertex_dw_stride
, "");
1014 /* Write outputs to LDS. The next shader (TCS aka HS) will read
1015 * its inputs from it. */
1016 for (i
= 0; i
< info
->num_outputs
; i
++) {
1017 unsigned semantic
= info
->output_semantic
[i
];
1019 /* The ARB_shader_viewport_layer_array spec contains the
1022 * 2) What happens if gl_ViewportIndex or gl_Layer is
1023 * written in the vertex shader and a geometry shader is
1026 * RESOLVED: The value written by the last vertex processing
1027 * stage is used. If the last vertex processing stage
1028 * (vertex, tessellation evaluation or geometry) does not
1029 * statically assign to gl_ViewportIndex or gl_Layer, index
1030 * or layer zero is assumed.
1032 * So writes to those outputs in VS-as-LS are simply ignored.
1034 if (semantic
== VARYING_SLOT_LAYER
|| semantic
== VARYING_SLOT_VIEWPORT
)
1037 int param
= si_shader_io_get_unique_index(semantic
, false);
1038 LLVMValueRef dw_addr
=
1039 LLVMBuildAdd(ctx
->ac
.builder
, base_dw_addr
, LLVMConstInt(ctx
->ac
.i32
, param
* 4, 0), "");
1041 for (chan
= 0; chan
< 4; chan
++) {
1042 if (!(info
->output_usagemask
[i
] & (1 << chan
)))
1045 lshs_lds_store(ctx
, chan
, dw_addr
,
1046 LLVMBuildLoad(ctx
->ac
.builder
, addrs
[4 * i
+ chan
], ""));
1050 if (ctx
->screen
->info
.chip_class
>= GFX9
)
1051 si_set_ls_return_value_for_tcs(ctx
);
1055 * Compile the TCS epilog function. This writes tesselation factors to memory
1056 * based on the output primitive type of the tesselator (determined by TES).
1058 void si_llvm_build_tcs_epilog(struct si_shader_context
*ctx
, union si_shader_part_key
*key
)
1060 memset(&ctx
->args
, 0, sizeof(ctx
->args
));
1062 if (ctx
->screen
->info
.chip_class
>= GFX9
) {
1063 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1064 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1065 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, &ctx
->tcs_offchip_offset
);
1066 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
); /* wave info */
1067 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, &ctx
->tcs_factor_offset
);
1068 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1069 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1070 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1071 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1072 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1073 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1074 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1075 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1076 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1077 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1078 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1079 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, &ctx
->tcs_offchip_layout
);
1080 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1081 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, &ctx
->tcs_out_lds_layout
);
1083 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1084 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1085 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1086 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1087 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, &ctx
->tcs_offchip_layout
);
1088 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1089 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, &ctx
->tcs_out_lds_layout
);
1090 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
1091 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, &ctx
->tcs_offchip_offset
);
1092 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, &ctx
->tcs_factor_offset
);
1095 ac_add_arg(&ctx
->args
, AC_ARG_VGPR
, 1, AC_ARG_INT
, NULL
); /* VGPR gap */
1096 ac_add_arg(&ctx
->args
, AC_ARG_VGPR
, 1, AC_ARG_INT
, NULL
); /* VGPR gap */
1097 struct ac_arg rel_patch_id
; /* patch index within the wave (REL_PATCH_ID) */
1098 ac_add_arg(&ctx
->args
, AC_ARG_VGPR
, 1, AC_ARG_INT
, &rel_patch_id
);
1099 struct ac_arg invocation_id
; /* invocation ID within the patch */
1100 ac_add_arg(&ctx
->args
, AC_ARG_VGPR
, 1, AC_ARG_INT
, &invocation_id
);
1102 tcs_out_current_patch_data_offset
; /* LDS offset where tess factors should be loaded from */
1103 ac_add_arg(&ctx
->args
, AC_ARG_VGPR
, 1, AC_ARG_INT
, &tcs_out_current_patch_data_offset
);
1105 struct ac_arg tess_factors
[6];
1106 for (unsigned i
= 0; i
< 6; i
++)
1107 ac_add_arg(&ctx
->args
, AC_ARG_VGPR
, 1, AC_ARG_INT
, &tess_factors
[i
]);
1109 /* Create the function. */
1110 si_llvm_create_func(ctx
, "tcs_epilog", NULL
, 0, ctx
->screen
->info
.chip_class
>= GFX7
? 128 : 0);
1111 ac_declare_lds_as_pointer(&ctx
->ac
);
1113 LLVMValueRef invoc0_tess_factors
[6];
1114 for (unsigned i
= 0; i
< 6; i
++)
1115 invoc0_tess_factors
[i
] = ac_get_arg(&ctx
->ac
, tess_factors
[i
]);
1117 si_write_tess_factors(ctx
, ac_get_arg(&ctx
->ac
, rel_patch_id
),
1118 ac_get_arg(&ctx
->ac
, invocation_id
),
1119 ac_get_arg(&ctx
->ac
, tcs_out_current_patch_data_offset
),
1120 invoc0_tess_factors
, invoc0_tess_factors
+ 4);
1122 LLVMBuildRetVoid(ctx
->ac
.builder
);
1125 void si_llvm_init_tcs_callbacks(struct si_shader_context
*ctx
)
1127 ctx
->abi
.load_tess_varyings
= si_nir_load_tcs_varyings
;
1128 ctx
->abi
.load_tess_level
= si_load_tess_level
;
1129 ctx
->abi
.store_tcs_outputs
= si_nir_store_output_tcs
;
1130 ctx
->abi
.emit_outputs
= si_llvm_emit_tcs_epilogue
;
1131 ctx
->abi
.load_patch_vertices_in
= si_load_patch_vertices_in
;
1134 void si_llvm_init_tes_callbacks(struct si_shader_context
*ctx
, bool ngg_cull_shader
)
1136 ctx
->abi
.load_tess_varyings
= si_nir_load_input_tes
;
1137 ctx
->abi
.load_tess_coord
= si_load_tess_coord
;
1138 ctx
->abi
.load_tess_level
= si_load_tess_level
;
1139 ctx
->abi
.load_patch_vertices_in
= si_load_patch_vertices_in
;
1141 if (ctx
->shader
->key
.as_es
)
1142 ctx
->abi
.emit_outputs
= si_llvm_emit_es_epilogue
;
1143 else if (ngg_cull_shader
)
1144 ctx
->abi
.emit_outputs
= gfx10_emit_ngg_culling_epilogue
;
1145 else if (ctx
->shader
->key
.as_ngg
)
1146 ctx
->abi
.emit_outputs
= gfx10_emit_ngg_epilogue
;
1148 ctx
->abi
.emit_outputs
= si_llvm_emit_vs_epilogue
;