2 * Copyright 2020 Advanced Micro Devices, Inc.
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
25 #include "si_shader_internal.h"
28 #include "util/u_memory.h"
30 LLVMValueRef
si_is_es_thread(struct si_shader_context
*ctx
)
32 /* Return true if the current thread should execute an ES thread. */
33 return LLVMBuildICmp(ctx
->ac
.builder
, LLVMIntULT
,
34 ac_get_thread_id(&ctx
->ac
),
35 si_unpack_param(ctx
, ctx
->merged_wave_info
, 0, 8), "");
38 LLVMValueRef
si_is_gs_thread(struct si_shader_context
*ctx
)
40 /* Return true if the current thread should execute a GS thread. */
41 return LLVMBuildICmp(ctx
->ac
.builder
, LLVMIntULT
,
42 ac_get_thread_id(&ctx
->ac
),
43 si_unpack_param(ctx
, ctx
->merged_wave_info
, 8, 8), "");
46 static LLVMValueRef
si_llvm_load_input_gs(struct ac_shader_abi
*abi
,
48 unsigned vtx_offset_param
,
52 struct si_shader_context
*ctx
= si_shader_context_from_abi(abi
);
53 struct si_shader
*shader
= ctx
->shader
;
54 LLVMValueRef vtx_offset
, soffset
;
55 struct si_shader_info
*info
= &shader
->selector
->info
;
56 unsigned semantic_name
= info
->input_semantic_name
[input_index
];
57 unsigned semantic_index
= info
->input_semantic_index
[input_index
];
61 param
= si_shader_io_get_unique_index(semantic_name
, semantic_index
, false);
63 /* GFX9 has the ESGS ring in LDS. */
64 if (ctx
->screen
->info
.chip_class
>= GFX9
) {
65 unsigned index
= vtx_offset_param
;
69 vtx_offset
= si_unpack_param(ctx
, ctx
->gs_vtx01_offset
,
70 index
% 2 ? 16 : 0, 16);
73 vtx_offset
= si_unpack_param(ctx
, ctx
->gs_vtx23_offset
,
74 index
% 2 ? 16 : 0, 16);
77 vtx_offset
= si_unpack_param(ctx
, ctx
->gs_vtx45_offset
,
78 index
% 2 ? 16 : 0, 16);
85 unsigned offset
= param
* 4 + swizzle
;
86 vtx_offset
= LLVMBuildAdd(ctx
->ac
.builder
, vtx_offset
,
87 LLVMConstInt(ctx
->ac
.i32
, offset
, false), "");
89 LLVMValueRef ptr
= ac_build_gep0(&ctx
->ac
, ctx
->esgs_ring
, vtx_offset
);
90 LLVMValueRef value
= LLVMBuildLoad(ctx
->ac
.builder
, ptr
, "");
91 if (ac_get_type_size(type
) == 8) {
92 ptr
= LLVMBuildGEP(ctx
->ac
.builder
, ptr
,
93 &ctx
->ac
.i32_1
, 1, "");
94 LLVMValueRef values
[2] = {
96 LLVMBuildLoad(ctx
->ac
.builder
, ptr
, "")
98 value
= ac_build_gather_values(&ctx
->ac
, values
, 2);
100 return LLVMBuildBitCast(ctx
->ac
.builder
, value
, type
, "");
103 /* GFX6: input load from the ESGS ring in memory. */
105 LLVMValueRef values
[4];
107 for (chan
= 0; chan
< 4; chan
++) {
108 values
[chan
] = si_llvm_load_input_gs(abi
, input_index
, vtx_offset_param
,
111 return ac_build_gather_values(&ctx
->ac
, values
, 4);
114 /* Get the vertex offset parameter on GFX6. */
115 LLVMValueRef gs_vtx_offset
= ac_get_arg(&ctx
->ac
,
116 ctx
->gs_vtx_offset
[vtx_offset_param
]);
118 vtx_offset
= LLVMBuildMul(ctx
->ac
.builder
, gs_vtx_offset
,
119 LLVMConstInt(ctx
->ac
.i32
, 4, 0), "");
121 soffset
= LLVMConstInt(ctx
->ac
.i32
, (param
* 4 + swizzle
) * 256, 0);
123 value
= ac_build_buffer_load(&ctx
->ac
, ctx
->esgs_ring
, 1, ctx
->ac
.i32_0
,
124 vtx_offset
, soffset
, 0, ac_glc
, true, false);
125 if (ac_get_type_size(type
) == 8) {
127 soffset
= LLVMConstInt(ctx
->ac
.i32
, (param
* 4 + swizzle
+ 1) * 256, 0);
129 value2
= ac_build_buffer_load(&ctx
->ac
, ctx
->esgs_ring
, 1,
130 ctx
->ac
.i32_0
, vtx_offset
, soffset
,
131 0, ac_glc
, true, false);
132 return si_build_gather_64bit(ctx
, type
, value
, value2
);
134 return LLVMBuildBitCast(ctx
->ac
.builder
, value
, type
, "");
137 static LLVMValueRef
si_nir_load_input_gs(struct ac_shader_abi
*abi
,
139 unsigned driver_location
,
141 unsigned num_components
,
142 unsigned vertex_index
,
143 unsigned const_index
,
146 struct si_shader_context
*ctx
= si_shader_context_from_abi(abi
);
148 LLVMValueRef value
[4];
149 for (unsigned i
= 0; i
< num_components
; i
++) {
151 if (ac_get_type_size(type
) == 8)
155 value
[i
+ component
] = si_llvm_load_input_gs(&ctx
->abi
, driver_location
/ 4 + const_index
,
156 vertex_index
, type
, offset
);
159 return ac_build_varying_gather_values(&ctx
->ac
, value
, num_components
, component
);
162 /* Pass GS inputs from ES to GS on GFX9. */
163 static void si_set_es_return_value_for_gs(struct si_shader_context
*ctx
)
165 LLVMValueRef ret
= ctx
->return_value
;
167 ret
= si_insert_input_ptr(ctx
, ret
, ctx
->other_const_and_shader_buffers
, 0);
168 ret
= si_insert_input_ptr(ctx
, ret
, ctx
->other_samplers_and_images
, 1);
169 if (ctx
->shader
->key
.as_ngg
)
170 ret
= si_insert_input_ptr(ctx
, ret
, ctx
->gs_tg_info
, 2);
172 ret
= si_insert_input_ret(ctx
, ret
, ctx
->gs2vs_offset
, 2);
173 ret
= si_insert_input_ret(ctx
, ret
, ctx
->merged_wave_info
, 3);
174 ret
= si_insert_input_ret(ctx
, ret
, ctx
->merged_scratch_offset
, 5);
176 ret
= si_insert_input_ptr(ctx
, ret
, ctx
->rw_buffers
,
177 8 + SI_SGPR_RW_BUFFERS
);
178 ret
= si_insert_input_ptr(ctx
, ret
,
179 ctx
->bindless_samplers_and_images
,
180 8 + SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES
);
181 if (ctx
->screen
->use_ngg
) {
182 ret
= si_insert_input_ptr(ctx
, ret
, ctx
->vs_state_bits
,
183 8 + SI_SGPR_VS_STATE_BITS
);
187 if (ctx
->type
== PIPE_SHADER_VERTEX
)
188 vgpr
= 8 + GFX9_VSGS_NUM_USER_SGPR
;
190 vgpr
= 8 + GFX9_TESGS_NUM_USER_SGPR
;
192 ret
= si_insert_input_ret_float(ctx
, ret
, ctx
->gs_vtx01_offset
, vgpr
++);
193 ret
= si_insert_input_ret_float(ctx
, ret
, ctx
->gs_vtx23_offset
, vgpr
++);
194 ret
= si_insert_input_ret_float(ctx
, ret
, ctx
->args
.gs_prim_id
, vgpr
++);
195 ret
= si_insert_input_ret_float(ctx
, ret
, ctx
->args
.gs_invocation_id
, vgpr
++);
196 ret
= si_insert_input_ret_float(ctx
, ret
, ctx
->gs_vtx45_offset
, vgpr
++);
197 ctx
->return_value
= ret
;
200 void si_llvm_emit_es_epilogue(struct ac_shader_abi
*abi
, unsigned max_outputs
,
203 struct si_shader_context
*ctx
= si_shader_context_from_abi(abi
);
204 struct si_shader
*es
= ctx
->shader
;
205 struct si_shader_info
*info
= &es
->selector
->info
;
206 LLVMValueRef lds_base
= NULL
;
210 if (ctx
->screen
->info
.chip_class
>= GFX9
&& info
->num_outputs
) {
211 unsigned itemsize_dw
= es
->selector
->esgs_itemsize
/ 4;
212 LLVMValueRef vertex_idx
= ac_get_thread_id(&ctx
->ac
);
213 LLVMValueRef wave_idx
= si_unpack_param(ctx
, ctx
->merged_wave_info
, 24, 4);
214 vertex_idx
= LLVMBuildOr(ctx
->ac
.builder
, vertex_idx
,
215 LLVMBuildMul(ctx
->ac
.builder
, wave_idx
,
216 LLVMConstInt(ctx
->ac
.i32
, ctx
->ac
.wave_size
, false), ""), "");
217 lds_base
= LLVMBuildMul(ctx
->ac
.builder
, vertex_idx
,
218 LLVMConstInt(ctx
->ac
.i32
, itemsize_dw
, 0), "");
221 for (i
= 0; i
< info
->num_outputs
; i
++) {
224 if (info
->output_semantic_name
[i
] == TGSI_SEMANTIC_VIEWPORT_INDEX
||
225 info
->output_semantic_name
[i
] == TGSI_SEMANTIC_LAYER
)
228 param
= si_shader_io_get_unique_index(info
->output_semantic_name
[i
],
229 info
->output_semantic_index
[i
], false);
231 for (chan
= 0; chan
< 4; chan
++) {
232 if (!(info
->output_usagemask
[i
] & (1 << chan
)))
235 LLVMValueRef out_val
= LLVMBuildLoad(ctx
->ac
.builder
, addrs
[4 * i
+ chan
], "");
236 out_val
= ac_to_integer(&ctx
->ac
, out_val
);
238 /* GFX9 has the ESGS ring in LDS. */
239 if (ctx
->screen
->info
.chip_class
>= GFX9
) {
240 LLVMValueRef idx
= LLVMConstInt(ctx
->ac
.i32
, param
* 4 + chan
, false);
241 idx
= LLVMBuildAdd(ctx
->ac
.builder
, lds_base
, idx
, "");
242 ac_build_indexed_store(&ctx
->ac
, ctx
->esgs_ring
, idx
, out_val
);
246 ac_build_buffer_store_dword(&ctx
->ac
,
249 ac_get_arg(&ctx
->ac
, ctx
->es2gs_offset
),
250 (4 * param
+ chan
) * 4,
251 ac_glc
| ac_slc
| ac_swizzled
);
255 if (ctx
->screen
->info
.chip_class
>= GFX9
)
256 si_set_es_return_value_for_gs(ctx
);
259 static LLVMValueRef
si_get_gs_wave_id(struct si_shader_context
*ctx
)
261 if (ctx
->screen
->info
.chip_class
>= GFX9
)
262 return si_unpack_param(ctx
, ctx
->merged_wave_info
, 16, 8);
264 return ac_get_arg(&ctx
->ac
, ctx
->gs_wave_id
);
267 static void emit_gs_epilogue(struct si_shader_context
*ctx
)
269 if (ctx
->shader
->key
.as_ngg
) {
270 gfx10_ngg_gs_emit_epilogue(ctx
);
274 if (ctx
->screen
->info
.chip_class
>= GFX10
)
275 LLVMBuildFence(ctx
->ac
.builder
, LLVMAtomicOrderingRelease
, false, "");
277 ac_build_sendmsg(&ctx
->ac
, AC_SENDMSG_GS_OP_NOP
| AC_SENDMSG_GS_DONE
,
278 si_get_gs_wave_id(ctx
));
280 if (ctx
->screen
->info
.chip_class
>= GFX9
)
281 ac_build_endif(&ctx
->ac
, ctx
->merged_wrap_if_label
);
284 static void si_llvm_emit_gs_epilogue(struct ac_shader_abi
*abi
,
285 unsigned max_outputs
,
288 struct si_shader_context
*ctx
= si_shader_context_from_abi(abi
);
289 struct si_shader_info UNUSED
*info
= &ctx
->shader
->selector
->info
;
291 assert(info
->num_outputs
<= max_outputs
);
293 emit_gs_epilogue(ctx
);
296 /* Emit one vertex from the geometry shader */
297 static void si_llvm_emit_vertex(struct ac_shader_abi
*abi
,
301 struct si_shader_context
*ctx
= si_shader_context_from_abi(abi
);
303 if (ctx
->shader
->key
.as_ngg
) {
304 gfx10_ngg_gs_emit_vertex(ctx
, stream
, addrs
);
308 struct si_shader_info
*info
= &ctx
->shader
->selector
->info
;
309 struct si_shader
*shader
= ctx
->shader
;
310 LLVMValueRef soffset
= ac_get_arg(&ctx
->ac
, ctx
->gs2vs_offset
);
311 LLVMValueRef gs_next_vertex
;
312 LLVMValueRef can_emit
;
313 unsigned chan
, offset
;
316 /* Write vertex attribute values to GSVS ring */
317 gs_next_vertex
= LLVMBuildLoad(ctx
->ac
.builder
,
318 ctx
->gs_next_vertex
[stream
],
321 /* If this thread has already emitted the declared maximum number of
322 * vertices, skip the write: excessive vertex emissions are not
323 * supposed to have any effect.
325 * If the shader has no writes to memory, kill it instead. This skips
326 * further memory loads and may allow LLVM to skip to the end
329 can_emit
= LLVMBuildICmp(ctx
->ac
.builder
, LLVMIntULT
, gs_next_vertex
,
330 LLVMConstInt(ctx
->ac
.i32
,
331 shader
->selector
->gs_max_out_vertices
, 0), "");
333 bool use_kill
= !info
->writes_memory
;
335 ac_build_kill_if_false(&ctx
->ac
, can_emit
);
337 ac_build_ifcc(&ctx
->ac
, can_emit
, 6505);
341 for (i
= 0; i
< info
->num_outputs
; i
++) {
342 for (chan
= 0; chan
< 4; chan
++) {
343 if (!(info
->output_usagemask
[i
] & (1 << chan
)) ||
344 ((info
->output_streams
[i
] >> (2 * chan
)) & 3) != stream
)
347 LLVMValueRef out_val
= LLVMBuildLoad(ctx
->ac
.builder
, addrs
[4 * i
+ chan
], "");
348 LLVMValueRef voffset
=
349 LLVMConstInt(ctx
->ac
.i32
, offset
*
350 shader
->selector
->gs_max_out_vertices
, 0);
353 voffset
= LLVMBuildAdd(ctx
->ac
.builder
, voffset
, gs_next_vertex
, "");
354 voffset
= LLVMBuildMul(ctx
->ac
.builder
, voffset
,
355 LLVMConstInt(ctx
->ac
.i32
, 4, 0), "");
357 out_val
= ac_to_integer(&ctx
->ac
, out_val
);
359 ac_build_buffer_store_dword(&ctx
->ac
,
360 ctx
->gsvs_ring
[stream
],
363 ac_glc
| ac_slc
| ac_swizzled
);
367 gs_next_vertex
= LLVMBuildAdd(ctx
->ac
.builder
, gs_next_vertex
, ctx
->ac
.i32_1
, "");
368 LLVMBuildStore(ctx
->ac
.builder
, gs_next_vertex
, ctx
->gs_next_vertex
[stream
]);
370 /* Signal vertex emission if vertex data was written. */
372 ac_build_sendmsg(&ctx
->ac
, AC_SENDMSG_GS_OP_EMIT
| AC_SENDMSG_GS
| (stream
<< 8),
373 si_get_gs_wave_id(ctx
));
377 ac_build_endif(&ctx
->ac
, 6505);
380 /* Cut one primitive from the geometry shader */
381 static void si_llvm_emit_primitive(struct ac_shader_abi
*abi
,
384 struct si_shader_context
*ctx
= si_shader_context_from_abi(abi
);
386 if (ctx
->shader
->key
.as_ngg
) {
387 LLVMBuildStore(ctx
->ac
.builder
, ctx
->ac
.i32_0
, ctx
->gs_curprim_verts
[stream
]);
391 /* Signal primitive cut */
392 ac_build_sendmsg(&ctx
->ac
, AC_SENDMSG_GS_OP_CUT
| AC_SENDMSG_GS
| (stream
<< 8),
393 si_get_gs_wave_id(ctx
));
396 void si_preload_esgs_ring(struct si_shader_context
*ctx
)
398 if (ctx
->screen
->info
.chip_class
<= GFX8
) {
400 ctx
->type
== PIPE_SHADER_GEOMETRY
? SI_GS_RING_ESGS
402 LLVMValueRef offset
= LLVMConstInt(ctx
->ac
.i32
, ring
, 0);
403 LLVMValueRef buf_ptr
= ac_get_arg(&ctx
->ac
, ctx
->rw_buffers
);
406 ac_build_load_to_sgpr(&ctx
->ac
, buf_ptr
, offset
);
408 if (USE_LDS_SYMBOLS
&& LLVM_VERSION_MAJOR
>= 9) {
409 /* Declare the ESGS ring as an explicit LDS symbol. */
410 si_llvm_declare_esgs_ring(ctx
);
412 ac_declare_lds_as_pointer(&ctx
->ac
);
413 ctx
->esgs_ring
= ctx
->ac
.lds
;
418 void si_preload_gs_rings(struct si_shader_context
*ctx
)
420 const struct si_shader_selector
*sel
= ctx
->shader
->selector
;
421 LLVMBuilderRef builder
= ctx
->ac
.builder
;
422 LLVMValueRef offset
= LLVMConstInt(ctx
->ac
.i32
, SI_RING_GSVS
, 0);
423 LLVMValueRef buf_ptr
= ac_get_arg(&ctx
->ac
, ctx
->rw_buffers
);
424 LLVMValueRef base_ring
= ac_build_load_to_sgpr(&ctx
->ac
, buf_ptr
, offset
);
426 /* The conceptual layout of the GSVS ring is
427 * v0c0 .. vLv0 v0c1 .. vLc1 ..
428 * but the real memory layout is swizzled across
430 * t0v0c0 .. t15v0c0 t0v1c0 .. t15v1c0 ... t15vLcL
432 * Override the buffer descriptor accordingly.
434 LLVMTypeRef v2i64
= LLVMVectorType(ctx
->ac
.i64
, 2);
435 uint64_t stream_offset
= 0;
437 for (unsigned stream
= 0; stream
< 4; ++stream
) {
438 unsigned num_components
;
440 unsigned num_records
;
441 LLVMValueRef ring
, tmp
;
443 num_components
= sel
->info
.num_stream_output_components
[stream
];
447 stride
= 4 * num_components
* sel
->gs_max_out_vertices
;
449 /* Limit on the stride field for <= GFX7. */
450 assert(stride
< (1 << 14));
452 num_records
= ctx
->ac
.wave_size
;
454 ring
= LLVMBuildBitCast(builder
, base_ring
, v2i64
, "");
455 tmp
= LLVMBuildExtractElement(builder
, ring
, ctx
->ac
.i32_0
, "");
456 tmp
= LLVMBuildAdd(builder
, tmp
,
457 LLVMConstInt(ctx
->ac
.i64
,
458 stream_offset
, 0), "");
459 stream_offset
+= stride
* ctx
->ac
.wave_size
;
461 ring
= LLVMBuildInsertElement(builder
, ring
, tmp
, ctx
->ac
.i32_0
, "");
462 ring
= LLVMBuildBitCast(builder
, ring
, ctx
->ac
.v4i32
, "");
463 tmp
= LLVMBuildExtractElement(builder
, ring
, ctx
->ac
.i32_1
, "");
464 tmp
= LLVMBuildOr(builder
, tmp
,
465 LLVMConstInt(ctx
->ac
.i32
,
466 S_008F04_STRIDE(stride
) |
467 S_008F04_SWIZZLE_ENABLE(1), 0), "");
468 ring
= LLVMBuildInsertElement(builder
, ring
, tmp
, ctx
->ac
.i32_1
, "");
469 ring
= LLVMBuildInsertElement(builder
, ring
,
470 LLVMConstInt(ctx
->ac
.i32
, num_records
, 0),
471 LLVMConstInt(ctx
->ac
.i32
, 2, 0), "");
474 S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X
) |
475 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y
) |
476 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z
) |
477 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W
) |
478 S_008F0C_INDEX_STRIDE(1) | /* index_stride = 16 (elements) */
479 S_008F0C_ADD_TID_ENABLE(1);
481 if (ctx
->ac
.chip_class
>= GFX10
) {
482 rsrc3
|= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT
) |
483 S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED
) |
484 S_008F0C_RESOURCE_LEVEL(1);
486 rsrc3
|= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT
) |
487 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32
) |
488 S_008F0C_ELEMENT_SIZE(1); /* element_size = 4 (bytes) */
491 ring
= LLVMBuildInsertElement(builder
, ring
,
492 LLVMConstInt(ctx
->ac
.i32
, rsrc3
, false),
493 LLVMConstInt(ctx
->ac
.i32
, 3, 0), "");
495 ctx
->gsvs_ring
[stream
] = ring
;
499 /* Generate code for the hardware VS shader stage to go with a geometry shader */
501 si_generate_gs_copy_shader(struct si_screen
*sscreen
,
502 struct ac_llvm_compiler
*compiler
,
503 struct si_shader_selector
*gs_selector
,
504 struct pipe_debug_callback
*debug
)
506 struct si_shader_context ctx
;
507 struct si_shader
*shader
;
508 LLVMBuilderRef builder
;
509 struct si_shader_output_values outputs
[SI_MAX_VS_OUTPUTS
];
510 struct si_shader_info
*gsinfo
= &gs_selector
->info
;
514 shader
= CALLOC_STRUCT(si_shader
);
518 /* We can leave the fence as permanently signaled because the GS copy
519 * shader only becomes visible globally after it has been compiled. */
520 util_queue_fence_init(&shader
->ready
);
522 shader
->selector
= gs_selector
;
523 shader
->is_gs_copy_shader
= true;
525 si_llvm_context_init(&ctx
, sscreen
, compiler
,
526 si_get_wave_size(sscreen
, PIPE_SHADER_VERTEX
, false, false));
528 ctx
.type
= PIPE_SHADER_VERTEX
;
530 builder
= ctx
.ac
.builder
;
532 si_create_function(&ctx
, false);
534 LLVMValueRef buf_ptr
= ac_get_arg(&ctx
.ac
, ctx
.rw_buffers
);
535 ctx
.gsvs_ring
[0] = ac_build_load_to_sgpr(&ctx
.ac
, buf_ptr
,
536 LLVMConstInt(ctx
.ac
.i32
, SI_RING_GSVS
, 0));
538 LLVMValueRef voffset
=
539 LLVMBuildMul(ctx
.ac
.builder
, ctx
.abi
.vertex_id
,
540 LLVMConstInt(ctx
.ac
.i32
, 4, 0), "");
542 /* Fetch the vertex stream ID.*/
543 LLVMValueRef stream_id
;
545 if (!sscreen
->use_ngg_streamout
&& gs_selector
->so
.num_outputs
)
546 stream_id
= si_unpack_param(&ctx
, ctx
.streamout_config
, 24, 2);
548 stream_id
= ctx
.ac
.i32_0
;
550 /* Fill in output information. */
551 for (i
= 0; i
< gsinfo
->num_outputs
; ++i
) {
552 outputs
[i
].semantic_name
= gsinfo
->output_semantic_name
[i
];
553 outputs
[i
].semantic_index
= gsinfo
->output_semantic_index
[i
];
555 for (int chan
= 0; chan
< 4; chan
++) {
556 outputs
[i
].vertex_stream
[chan
] =
557 (gsinfo
->output_streams
[i
] >> (2 * chan
)) & 3;
561 LLVMBasicBlockRef end_bb
;
562 LLVMValueRef switch_inst
;
564 end_bb
= LLVMAppendBasicBlockInContext(ctx
.ac
.context
, ctx
.main_fn
, "end");
565 switch_inst
= LLVMBuildSwitch(builder
, stream_id
, end_bb
, 4);
567 for (int stream
= 0; stream
< 4; stream
++) {
568 LLVMBasicBlockRef bb
;
571 if (!gsinfo
->num_stream_output_components
[stream
])
574 if (stream
> 0 && !gs_selector
->so
.num_outputs
)
577 bb
= LLVMInsertBasicBlockInContext(ctx
.ac
.context
, end_bb
, "out");
578 LLVMAddCase(switch_inst
, LLVMConstInt(ctx
.ac
.i32
, stream
, 0), bb
);
579 LLVMPositionBuilderAtEnd(builder
, bb
);
581 /* Fetch vertex data from GSVS ring */
583 for (i
= 0; i
< gsinfo
->num_outputs
; ++i
) {
584 for (unsigned chan
= 0; chan
< 4; chan
++) {
585 if (!(gsinfo
->output_usagemask
[i
] & (1 << chan
)) ||
586 outputs
[i
].vertex_stream
[chan
] != stream
) {
587 outputs
[i
].values
[chan
] = LLVMGetUndef(ctx
.ac
.f32
);
591 LLVMValueRef soffset
= LLVMConstInt(ctx
.ac
.i32
,
592 offset
* gs_selector
->gs_max_out_vertices
* 16 * 4, 0);
595 outputs
[i
].values
[chan
] =
596 ac_build_buffer_load(&ctx
.ac
,
598 ctx
.ac
.i32_0
, voffset
,
599 soffset
, 0, ac_glc
| ac_slc
,
604 /* Streamout and exports. */
605 if (!sscreen
->use_ngg_streamout
&& gs_selector
->so
.num_outputs
) {
606 si_llvm_emit_streamout(&ctx
, outputs
,
612 si_llvm_build_vs_exports(&ctx
, outputs
, gsinfo
->num_outputs
);
614 LLVMBuildBr(builder
, end_bb
);
617 LLVMPositionBuilderAtEnd(builder
, end_bb
);
619 LLVMBuildRetVoid(ctx
.ac
.builder
);
621 ctx
.type
= PIPE_SHADER_GEOMETRY
; /* override for shader dumping */
622 si_llvm_optimize_module(&ctx
);
625 if (si_compile_llvm(sscreen
, &ctx
.shader
->binary
,
626 &ctx
.shader
->config
, ctx
.compiler
, &ctx
.ac
,
627 debug
, PIPE_SHADER_GEOMETRY
,
628 "GS Copy Shader", false)) {
629 if (si_can_dump_shader(sscreen
, PIPE_SHADER_GEOMETRY
))
630 fprintf(stderr
, "GS Copy Shader:\n");
631 si_shader_dump(sscreen
, ctx
.shader
, debug
, stderr
, true);
633 if (!ctx
.shader
->config
.scratch_bytes_per_wave
)
634 ok
= si_shader_binary_upload(sscreen
, ctx
.shader
, 0);
639 si_llvm_dispose(&ctx
);
645 si_fix_resource_usage(sscreen
, shader
);
651 * Build the GS prolog function. Rotate the input vertices for triangle strips
654 void si_llvm_build_gs_prolog(struct si_shader_context
*ctx
,
655 union si_shader_part_key
*key
)
657 unsigned num_sgprs
, num_vgprs
;
658 LLVMBuilderRef builder
= ctx
->ac
.builder
;
659 LLVMTypeRef returns
[AC_MAX_ARGS
];
660 LLVMValueRef func
, ret
;
662 memset(&ctx
->args
, 0, sizeof(ctx
->args
));
664 if (ctx
->screen
->info
.chip_class
>= GFX9
) {
665 if (key
->gs_prolog
.states
.gfx9_prev_is_vs
)
666 num_sgprs
= 8 + GFX9_VSGS_NUM_USER_SGPR
;
668 num_sgprs
= 8 + GFX9_TESGS_NUM_USER_SGPR
;
669 num_vgprs
= 5; /* ES inputs are not needed by GS */
671 num_sgprs
= GFX6_GS_NUM_USER_SGPR
+ 2;
675 for (unsigned i
= 0; i
< num_sgprs
; ++i
) {
676 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
677 returns
[i
] = ctx
->ac
.i32
;
680 for (unsigned i
= 0; i
< num_vgprs
; ++i
) {
681 ac_add_arg(&ctx
->args
, AC_ARG_VGPR
, 1, AC_ARG_INT
, NULL
);
682 returns
[num_sgprs
+ i
] = ctx
->ac
.f32
;
685 /* Create the function. */
686 si_llvm_create_func(ctx
, "gs_prolog", returns
, num_sgprs
+ num_vgprs
, 0);
689 /* Set the full EXEC mask for the prolog, because we are only fiddling
690 * with registers here. The main shader part will set the correct EXEC
693 if (ctx
->screen
->info
.chip_class
>= GFX9
&& !key
->gs_prolog
.is_monolithic
)
694 ac_init_exec_full_mask(&ctx
->ac
);
696 /* Copy inputs to outputs. This should be no-op, as the registers match,
697 * but it will prevent the compiler from overwriting them unintentionally.
699 ret
= ctx
->return_value
;
700 for (unsigned i
= 0; i
< num_sgprs
; i
++) {
701 LLVMValueRef p
= LLVMGetParam(func
, i
);
702 ret
= LLVMBuildInsertValue(builder
, ret
, p
, i
, "");
704 for (unsigned i
= 0; i
< num_vgprs
; i
++) {
705 LLVMValueRef p
= LLVMGetParam(func
, num_sgprs
+ i
);
706 p
= ac_to_float(&ctx
->ac
, p
);
707 ret
= LLVMBuildInsertValue(builder
, ret
, p
, num_sgprs
+ i
, "");
710 if (key
->gs_prolog
.states
.tri_strip_adj_fix
) {
711 /* Remap the input vertices for every other primitive. */
712 const struct ac_arg gfx6_vtx_params
[6] = {
713 { .used
= true, .arg_index
= num_sgprs
},
714 { .used
= true, .arg_index
= num_sgprs
+ 1 },
715 { .used
= true, .arg_index
= num_sgprs
+ 3 },
716 { .used
= true, .arg_index
= num_sgprs
+ 4 },
717 { .used
= true, .arg_index
= num_sgprs
+ 5 },
718 { .used
= true, .arg_index
= num_sgprs
+ 6 },
720 const struct ac_arg gfx9_vtx_params
[3] = {
721 { .used
= true, .arg_index
= num_sgprs
},
722 { .used
= true, .arg_index
= num_sgprs
+ 1 },
723 { .used
= true, .arg_index
= num_sgprs
+ 4 },
725 LLVMValueRef vtx_in
[6], vtx_out
[6];
726 LLVMValueRef prim_id
, rotate
;
728 if (ctx
->screen
->info
.chip_class
>= GFX9
) {
729 for (unsigned i
= 0; i
< 3; i
++) {
730 vtx_in
[i
*2] = si_unpack_param(ctx
, gfx9_vtx_params
[i
], 0, 16);
731 vtx_in
[i
*2+1] = si_unpack_param(ctx
, gfx9_vtx_params
[i
], 16, 16);
734 for (unsigned i
= 0; i
< 6; i
++)
735 vtx_in
[i
] = ac_get_arg(&ctx
->ac
, gfx6_vtx_params
[i
]);
738 prim_id
= LLVMGetParam(func
, num_sgprs
+ 2);
739 rotate
= LLVMBuildTrunc(builder
, prim_id
, ctx
->ac
.i1
, "");
741 for (unsigned i
= 0; i
< 6; ++i
) {
742 LLVMValueRef base
, rotated
;
744 rotated
= vtx_in
[(i
+ 4) % 6];
745 vtx_out
[i
] = LLVMBuildSelect(builder
, rotate
, rotated
, base
, "");
748 if (ctx
->screen
->info
.chip_class
>= GFX9
) {
749 for (unsigned i
= 0; i
< 3; i
++) {
750 LLVMValueRef hi
, out
;
752 hi
= LLVMBuildShl(builder
, vtx_out
[i
*2+1],
753 LLVMConstInt(ctx
->ac
.i32
, 16, 0), "");
754 out
= LLVMBuildOr(builder
, vtx_out
[i
*2], hi
, "");
755 out
= ac_to_float(&ctx
->ac
, out
);
756 ret
= LLVMBuildInsertValue(builder
, ret
, out
,
757 gfx9_vtx_params
[i
].arg_index
, "");
760 for (unsigned i
= 0; i
< 6; i
++) {
763 out
= ac_to_float(&ctx
->ac
, vtx_out
[i
]);
764 ret
= LLVMBuildInsertValue(builder
, ret
, out
,
765 gfx6_vtx_params
[i
].arg_index
, "");
770 LLVMBuildRet(builder
, ret
);
773 void si_llvm_init_gs_callbacks(struct si_shader_context
*ctx
)
775 ctx
->abi
.load_inputs
= si_nir_load_input_gs
;
776 ctx
->abi
.emit_vertex
= si_llvm_emit_vertex
;
777 ctx
->abi
.emit_primitive
= si_llvm_emit_primitive
;
778 ctx
->abi
.emit_outputs
= si_llvm_emit_gs_epilogue
;