2 * Copyright 2020 Advanced Micro Devices, Inc.
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
26 #include "si_shader_internal.h"
28 #include "util/u_memory.h"
30 LLVMValueRef
si_is_es_thread(struct si_shader_context
*ctx
)
32 /* Return true if the current thread should execute an ES thread. */
33 return LLVMBuildICmp(ctx
->ac
.builder
, LLVMIntULT
, ac_get_thread_id(&ctx
->ac
),
34 si_unpack_param(ctx
, ctx
->merged_wave_info
, 0, 8), "");
37 LLVMValueRef
si_is_gs_thread(struct si_shader_context
*ctx
)
39 /* Return true if the current thread should execute a GS thread. */
40 return LLVMBuildICmp(ctx
->ac
.builder
, LLVMIntULT
, ac_get_thread_id(&ctx
->ac
),
41 si_unpack_param(ctx
, ctx
->merged_wave_info
, 8, 8), "");
44 static LLVMValueRef
si_llvm_load_input_gs(struct ac_shader_abi
*abi
, unsigned input_index
,
45 unsigned vtx_offset_param
, LLVMTypeRef type
,
48 struct si_shader_context
*ctx
= si_shader_context_from_abi(abi
);
49 struct si_shader
*shader
= ctx
->shader
;
50 LLVMValueRef vtx_offset
, soffset
;
51 struct si_shader_info
*info
= &shader
->selector
->info
;
52 unsigned semantic_name
= info
->input_semantic_name
[input_index
];
53 unsigned semantic_index
= info
->input_semantic_index
[input_index
];
57 param
= si_shader_io_get_unique_index(semantic_name
, semantic_index
, false);
59 /* GFX9 has the ESGS ring in LDS. */
60 if (ctx
->screen
->info
.chip_class
>= GFX9
) {
61 unsigned index
= vtx_offset_param
;
65 vtx_offset
= si_unpack_param(ctx
, ctx
->gs_vtx01_offset
, index
% 2 ? 16 : 0, 16);
68 vtx_offset
= si_unpack_param(ctx
, ctx
->gs_vtx23_offset
, index
% 2 ? 16 : 0, 16);
71 vtx_offset
= si_unpack_param(ctx
, ctx
->gs_vtx45_offset
, index
% 2 ? 16 : 0, 16);
78 unsigned offset
= param
* 4 + swizzle
;
80 LLVMBuildAdd(ctx
->ac
.builder
, vtx_offset
, LLVMConstInt(ctx
->ac
.i32
, offset
, false), "");
82 LLVMValueRef ptr
= ac_build_gep0(&ctx
->ac
, ctx
->esgs_ring
, vtx_offset
);
83 LLVMValueRef value
= LLVMBuildLoad(ctx
->ac
.builder
, ptr
, "");
84 if (ac_get_type_size(type
) == 8) {
85 ptr
= LLVMBuildGEP(ctx
->ac
.builder
, ptr
, &ctx
->ac
.i32_1
, 1, "");
86 LLVMValueRef values
[2] = {value
, LLVMBuildLoad(ctx
->ac
.builder
, ptr
, "")};
87 value
= ac_build_gather_values(&ctx
->ac
, values
, 2);
89 return LLVMBuildBitCast(ctx
->ac
.builder
, value
, type
, "");
92 /* GFX6: input load from the ESGS ring in memory. */
94 LLVMValueRef values
[4];
96 for (chan
= 0; chan
< 4; chan
++) {
97 values
[chan
] = si_llvm_load_input_gs(abi
, input_index
, vtx_offset_param
, type
, chan
);
99 return ac_build_gather_values(&ctx
->ac
, values
, 4);
102 /* Get the vertex offset parameter on GFX6. */
103 LLVMValueRef gs_vtx_offset
= ac_get_arg(&ctx
->ac
, ctx
->gs_vtx_offset
[vtx_offset_param
]);
105 vtx_offset
= LLVMBuildMul(ctx
->ac
.builder
, gs_vtx_offset
, LLVMConstInt(ctx
->ac
.i32
, 4, 0), "");
107 soffset
= LLVMConstInt(ctx
->ac
.i32
, (param
* 4 + swizzle
) * 256, 0);
109 value
= ac_build_buffer_load(&ctx
->ac
, ctx
->esgs_ring
, 1, ctx
->ac
.i32_0
, vtx_offset
, soffset
, 0,
110 ac_glc
, true, false);
111 if (ac_get_type_size(type
) == 8) {
113 soffset
= LLVMConstInt(ctx
->ac
.i32
, (param
* 4 + swizzle
+ 1) * 256, 0);
115 value2
= ac_build_buffer_load(&ctx
->ac
, ctx
->esgs_ring
, 1, ctx
->ac
.i32_0
, vtx_offset
, soffset
,
116 0, ac_glc
, true, false);
117 return si_build_gather_64bit(ctx
, type
, value
, value2
);
119 return LLVMBuildBitCast(ctx
->ac
.builder
, value
, type
, "");
122 static LLVMValueRef
si_nir_load_input_gs(struct ac_shader_abi
*abi
, unsigned location
,
123 unsigned driver_location
, unsigned component
,
124 unsigned num_components
, unsigned vertex_index
,
125 unsigned const_index
, LLVMTypeRef type
)
127 struct si_shader_context
*ctx
= si_shader_context_from_abi(abi
);
129 LLVMValueRef value
[4];
130 for (unsigned i
= 0; i
< num_components
; i
++) {
132 if (ac_get_type_size(type
) == 8)
136 value
[i
+ component
] = si_llvm_load_input_gs(&ctx
->abi
, driver_location
/ 4 + const_index
,
137 vertex_index
, type
, offset
);
140 return ac_build_varying_gather_values(&ctx
->ac
, value
, num_components
, component
);
143 /* Pass GS inputs from ES to GS on GFX9. */
144 static void si_set_es_return_value_for_gs(struct si_shader_context
*ctx
)
146 LLVMValueRef ret
= ctx
->return_value
;
148 ret
= si_insert_input_ptr(ctx
, ret
, ctx
->other_const_and_shader_buffers
, 0);
149 ret
= si_insert_input_ptr(ctx
, ret
, ctx
->other_samplers_and_images
, 1);
150 if (ctx
->shader
->key
.as_ngg
)
151 ret
= si_insert_input_ptr(ctx
, ret
, ctx
->gs_tg_info
, 2);
153 ret
= si_insert_input_ret(ctx
, ret
, ctx
->gs2vs_offset
, 2);
154 ret
= si_insert_input_ret(ctx
, ret
, ctx
->merged_wave_info
, 3);
155 ret
= si_insert_input_ret(ctx
, ret
, ctx
->merged_scratch_offset
, 5);
157 ret
= si_insert_input_ptr(ctx
, ret
, ctx
->rw_buffers
, 8 + SI_SGPR_RW_BUFFERS
);
158 ret
= si_insert_input_ptr(ctx
, ret
, ctx
->bindless_samplers_and_images
,
159 8 + SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES
);
160 if (ctx
->screen
->use_ngg
) {
161 ret
= si_insert_input_ptr(ctx
, ret
, ctx
->vs_state_bits
, 8 + SI_SGPR_VS_STATE_BITS
);
165 if (ctx
->stage
== MESA_SHADER_VERTEX
)
166 vgpr
= 8 + GFX9_VSGS_NUM_USER_SGPR
;
168 vgpr
= 8 + GFX9_TESGS_NUM_USER_SGPR
;
170 ret
= si_insert_input_ret_float(ctx
, ret
, ctx
->gs_vtx01_offset
, vgpr
++);
171 ret
= si_insert_input_ret_float(ctx
, ret
, ctx
->gs_vtx23_offset
, vgpr
++);
172 ret
= si_insert_input_ret_float(ctx
, ret
, ctx
->args
.gs_prim_id
, vgpr
++);
173 ret
= si_insert_input_ret_float(ctx
, ret
, ctx
->args
.gs_invocation_id
, vgpr
++);
174 ret
= si_insert_input_ret_float(ctx
, ret
, ctx
->gs_vtx45_offset
, vgpr
++);
175 ctx
->return_value
= ret
;
178 void si_llvm_emit_es_epilogue(struct ac_shader_abi
*abi
, unsigned max_outputs
, LLVMValueRef
*addrs
)
180 struct si_shader_context
*ctx
= si_shader_context_from_abi(abi
);
181 struct si_shader
*es
= ctx
->shader
;
182 struct si_shader_info
*info
= &es
->selector
->info
;
183 LLVMValueRef lds_base
= NULL
;
187 if (ctx
->screen
->info
.chip_class
>= GFX9
&& info
->num_outputs
) {
188 unsigned itemsize_dw
= es
->selector
->esgs_itemsize
/ 4;
189 LLVMValueRef vertex_idx
= ac_get_thread_id(&ctx
->ac
);
190 LLVMValueRef wave_idx
= si_unpack_param(ctx
, ctx
->merged_wave_info
, 24, 4);
192 LLVMBuildOr(ctx
->ac
.builder
, vertex_idx
,
193 LLVMBuildMul(ctx
->ac
.builder
, wave_idx
,
194 LLVMConstInt(ctx
->ac
.i32
, ctx
->ac
.wave_size
, false), ""),
197 LLVMBuildMul(ctx
->ac
.builder
, vertex_idx
, LLVMConstInt(ctx
->ac
.i32
, itemsize_dw
, 0), "");
200 for (i
= 0; i
< info
->num_outputs
; i
++) {
203 if (info
->output_semantic_name
[i
] == TGSI_SEMANTIC_VIEWPORT_INDEX
||
204 info
->output_semantic_name
[i
] == TGSI_SEMANTIC_LAYER
)
207 param
= si_shader_io_get_unique_index(info
->output_semantic_name
[i
],
208 info
->output_semantic_index
[i
], false);
210 for (chan
= 0; chan
< 4; chan
++) {
211 if (!(info
->output_usagemask
[i
] & (1 << chan
)))
214 LLVMValueRef out_val
= LLVMBuildLoad(ctx
->ac
.builder
, addrs
[4 * i
+ chan
], "");
215 out_val
= ac_to_integer(&ctx
->ac
, out_val
);
217 /* GFX9 has the ESGS ring in LDS. */
218 if (ctx
->screen
->info
.chip_class
>= GFX9
) {
219 LLVMValueRef idx
= LLVMConstInt(ctx
->ac
.i32
, param
* 4 + chan
, false);
220 idx
= LLVMBuildAdd(ctx
->ac
.builder
, lds_base
, idx
, "");
221 ac_build_indexed_store(&ctx
->ac
, ctx
->esgs_ring
, idx
, out_val
);
225 ac_build_buffer_store_dword(&ctx
->ac
, ctx
->esgs_ring
, out_val
, 1, NULL
,
226 ac_get_arg(&ctx
->ac
, ctx
->es2gs_offset
),
227 (4 * param
+ chan
) * 4, ac_glc
| ac_slc
| ac_swizzled
);
231 if (ctx
->screen
->info
.chip_class
>= GFX9
)
232 si_set_es_return_value_for_gs(ctx
);
235 static LLVMValueRef
si_get_gs_wave_id(struct si_shader_context
*ctx
)
237 if (ctx
->screen
->info
.chip_class
>= GFX9
)
238 return si_unpack_param(ctx
, ctx
->merged_wave_info
, 16, 8);
240 return ac_get_arg(&ctx
->ac
, ctx
->gs_wave_id
);
243 static void emit_gs_epilogue(struct si_shader_context
*ctx
)
245 if (ctx
->shader
->key
.as_ngg
) {
246 gfx10_ngg_gs_emit_epilogue(ctx
);
250 if (ctx
->screen
->info
.chip_class
>= GFX10
)
251 LLVMBuildFence(ctx
->ac
.builder
, LLVMAtomicOrderingRelease
, false, "");
253 ac_build_sendmsg(&ctx
->ac
, AC_SENDMSG_GS_OP_NOP
| AC_SENDMSG_GS_DONE
, si_get_gs_wave_id(ctx
));
255 if (ctx
->screen
->info
.chip_class
>= GFX9
)
256 ac_build_endif(&ctx
->ac
, ctx
->merged_wrap_if_label
);
259 static void si_llvm_emit_gs_epilogue(struct ac_shader_abi
*abi
, unsigned max_outputs
,
262 struct si_shader_context
*ctx
= si_shader_context_from_abi(abi
);
263 struct si_shader_info UNUSED
*info
= &ctx
->shader
->selector
->info
;
265 assert(info
->num_outputs
<= max_outputs
);
267 emit_gs_epilogue(ctx
);
270 /* Emit one vertex from the geometry shader */
271 static void si_llvm_emit_vertex(struct ac_shader_abi
*abi
, unsigned stream
, LLVMValueRef
*addrs
)
273 struct si_shader_context
*ctx
= si_shader_context_from_abi(abi
);
275 if (ctx
->shader
->key
.as_ngg
) {
276 gfx10_ngg_gs_emit_vertex(ctx
, stream
, addrs
);
280 struct si_shader_info
*info
= &ctx
->shader
->selector
->info
;
281 struct si_shader
*shader
= ctx
->shader
;
282 LLVMValueRef soffset
= ac_get_arg(&ctx
->ac
, ctx
->gs2vs_offset
);
283 LLVMValueRef gs_next_vertex
;
284 LLVMValueRef can_emit
;
285 unsigned chan
, offset
;
288 /* Write vertex attribute values to GSVS ring */
289 gs_next_vertex
= LLVMBuildLoad(ctx
->ac
.builder
, ctx
->gs_next_vertex
[stream
], "");
291 /* If this thread has already emitted the declared maximum number of
292 * vertices, skip the write: excessive vertex emissions are not
293 * supposed to have any effect.
295 * If the shader has no writes to memory, kill it instead. This skips
296 * further memory loads and may allow LLVM to skip to the end
300 LLVMBuildICmp(ctx
->ac
.builder
, LLVMIntULT
, gs_next_vertex
,
301 LLVMConstInt(ctx
->ac
.i32
, shader
->selector
->gs_max_out_vertices
, 0), "");
303 bool use_kill
= !info
->writes_memory
;
305 ac_build_kill_if_false(&ctx
->ac
, can_emit
);
307 ac_build_ifcc(&ctx
->ac
, can_emit
, 6505);
311 for (i
= 0; i
< info
->num_outputs
; i
++) {
312 for (chan
= 0; chan
< 4; chan
++) {
313 if (!(info
->output_usagemask
[i
] & (1 << chan
)) ||
314 ((info
->output_streams
[i
] >> (2 * chan
)) & 3) != stream
)
317 LLVMValueRef out_val
= LLVMBuildLoad(ctx
->ac
.builder
, addrs
[4 * i
+ chan
], "");
318 LLVMValueRef voffset
=
319 LLVMConstInt(ctx
->ac
.i32
, offset
* shader
->selector
->gs_max_out_vertices
, 0);
322 voffset
= LLVMBuildAdd(ctx
->ac
.builder
, voffset
, gs_next_vertex
, "");
323 voffset
= LLVMBuildMul(ctx
->ac
.builder
, voffset
, LLVMConstInt(ctx
->ac
.i32
, 4, 0), "");
325 out_val
= ac_to_integer(&ctx
->ac
, out_val
);
327 ac_build_buffer_store_dword(&ctx
->ac
, ctx
->gsvs_ring
[stream
], out_val
, 1, voffset
, soffset
,
328 0, ac_glc
| ac_slc
| ac_swizzled
);
332 gs_next_vertex
= LLVMBuildAdd(ctx
->ac
.builder
, gs_next_vertex
, ctx
->ac
.i32_1
, "");
333 LLVMBuildStore(ctx
->ac
.builder
, gs_next_vertex
, ctx
->gs_next_vertex
[stream
]);
335 /* Signal vertex emission if vertex data was written. */
337 ac_build_sendmsg(&ctx
->ac
, AC_SENDMSG_GS_OP_EMIT
| AC_SENDMSG_GS
| (stream
<< 8),
338 si_get_gs_wave_id(ctx
));
342 ac_build_endif(&ctx
->ac
, 6505);
345 /* Cut one primitive from the geometry shader */
346 static void si_llvm_emit_primitive(struct ac_shader_abi
*abi
, unsigned stream
)
348 struct si_shader_context
*ctx
= si_shader_context_from_abi(abi
);
350 if (ctx
->shader
->key
.as_ngg
) {
351 LLVMBuildStore(ctx
->ac
.builder
, ctx
->ac
.i32_0
, ctx
->gs_curprim_verts
[stream
]);
355 /* Signal primitive cut */
356 ac_build_sendmsg(&ctx
->ac
, AC_SENDMSG_GS_OP_CUT
| AC_SENDMSG_GS
| (stream
<< 8),
357 si_get_gs_wave_id(ctx
));
360 void si_preload_esgs_ring(struct si_shader_context
*ctx
)
362 if (ctx
->screen
->info
.chip_class
<= GFX8
) {
363 unsigned ring
= ctx
->stage
== MESA_SHADER_GEOMETRY
? SI_GS_RING_ESGS
: SI_ES_RING_ESGS
;
364 LLVMValueRef offset
= LLVMConstInt(ctx
->ac
.i32
, ring
, 0);
365 LLVMValueRef buf_ptr
= ac_get_arg(&ctx
->ac
, ctx
->rw_buffers
);
367 ctx
->esgs_ring
= ac_build_load_to_sgpr(&ctx
->ac
, buf_ptr
, offset
);
369 if (USE_LDS_SYMBOLS
&& LLVM_VERSION_MAJOR
>= 9) {
370 /* Declare the ESGS ring as an explicit LDS symbol. */
371 si_llvm_declare_esgs_ring(ctx
);
373 ac_declare_lds_as_pointer(&ctx
->ac
);
374 ctx
->esgs_ring
= ctx
->ac
.lds
;
379 void si_preload_gs_rings(struct si_shader_context
*ctx
)
381 const struct si_shader_selector
*sel
= ctx
->shader
->selector
;
382 LLVMBuilderRef builder
= ctx
->ac
.builder
;
383 LLVMValueRef offset
= LLVMConstInt(ctx
->ac
.i32
, SI_RING_GSVS
, 0);
384 LLVMValueRef buf_ptr
= ac_get_arg(&ctx
->ac
, ctx
->rw_buffers
);
385 LLVMValueRef base_ring
= ac_build_load_to_sgpr(&ctx
->ac
, buf_ptr
, offset
);
387 /* The conceptual layout of the GSVS ring is
388 * v0c0 .. vLv0 v0c1 .. vLc1 ..
389 * but the real memory layout is swizzled across
391 * t0v0c0 .. t15v0c0 t0v1c0 .. t15v1c0 ... t15vLcL
393 * Override the buffer descriptor accordingly.
395 LLVMTypeRef v2i64
= LLVMVectorType(ctx
->ac
.i64
, 2);
396 uint64_t stream_offset
= 0;
398 for (unsigned stream
= 0; stream
< 4; ++stream
) {
399 unsigned num_components
;
401 unsigned num_records
;
402 LLVMValueRef ring
, tmp
;
404 num_components
= sel
->info
.num_stream_output_components
[stream
];
408 stride
= 4 * num_components
* sel
->gs_max_out_vertices
;
410 /* Limit on the stride field for <= GFX7. */
411 assert(stride
< (1 << 14));
413 num_records
= ctx
->ac
.wave_size
;
415 ring
= LLVMBuildBitCast(builder
, base_ring
, v2i64
, "");
416 tmp
= LLVMBuildExtractElement(builder
, ring
, ctx
->ac
.i32_0
, "");
417 tmp
= LLVMBuildAdd(builder
, tmp
, LLVMConstInt(ctx
->ac
.i64
, stream_offset
, 0), "");
418 stream_offset
+= stride
* ctx
->ac
.wave_size
;
420 ring
= LLVMBuildInsertElement(builder
, ring
, tmp
, ctx
->ac
.i32_0
, "");
421 ring
= LLVMBuildBitCast(builder
, ring
, ctx
->ac
.v4i32
, "");
422 tmp
= LLVMBuildExtractElement(builder
, ring
, ctx
->ac
.i32_1
, "");
425 LLVMConstInt(ctx
->ac
.i32
, S_008F04_STRIDE(stride
) | S_008F04_SWIZZLE_ENABLE(1), 0), "");
426 ring
= LLVMBuildInsertElement(builder
, ring
, tmp
, ctx
->ac
.i32_1
, "");
427 ring
= LLVMBuildInsertElement(builder
, ring
, LLVMConstInt(ctx
->ac
.i32
, num_records
, 0),
428 LLVMConstInt(ctx
->ac
.i32
, 2, 0), "");
431 S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X
) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y
) |
432 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z
) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W
) |
433 S_008F0C_INDEX_STRIDE(1) | /* index_stride = 16 (elements) */
434 S_008F0C_ADD_TID_ENABLE(1);
436 if (ctx
->ac
.chip_class
>= GFX10
) {
437 rsrc3
|= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT
) |
438 S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED
) | S_008F0C_RESOURCE_LEVEL(1);
440 rsrc3
|= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT
) |
441 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32
) |
442 S_008F0C_ELEMENT_SIZE(1); /* element_size = 4 (bytes) */
445 ring
= LLVMBuildInsertElement(builder
, ring
, LLVMConstInt(ctx
->ac
.i32
, rsrc3
, false),
446 LLVMConstInt(ctx
->ac
.i32
, 3, 0), "");
448 ctx
->gsvs_ring
[stream
] = ring
;
452 /* Generate code for the hardware VS shader stage to go with a geometry shader */
453 struct si_shader
*si_generate_gs_copy_shader(struct si_screen
*sscreen
,
454 struct ac_llvm_compiler
*compiler
,
455 struct si_shader_selector
*gs_selector
,
456 struct pipe_debug_callback
*debug
)
458 struct si_shader_context ctx
;
459 struct si_shader
*shader
;
460 LLVMBuilderRef builder
;
461 struct si_shader_output_values outputs
[SI_MAX_VS_OUTPUTS
];
462 struct si_shader_info
*gsinfo
= &gs_selector
->info
;
465 shader
= CALLOC_STRUCT(si_shader
);
469 /* We can leave the fence as permanently signaled because the GS copy
470 * shader only becomes visible globally after it has been compiled. */
471 util_queue_fence_init(&shader
->ready
);
473 shader
->selector
= gs_selector
;
474 shader
->is_gs_copy_shader
= true;
476 si_llvm_context_init(&ctx
, sscreen
, compiler
,
477 si_get_wave_size(sscreen
, MESA_SHADER_VERTEX
,
478 false, false, false, false));
480 ctx
.type
= PIPE_SHADER_VERTEX
;
481 ctx
.stage
= MESA_SHADER_VERTEX
;
483 builder
= ctx
.ac
.builder
;
485 si_create_function(&ctx
, false);
487 LLVMValueRef buf_ptr
= ac_get_arg(&ctx
.ac
, ctx
.rw_buffers
);
489 ac_build_load_to_sgpr(&ctx
.ac
, buf_ptr
, LLVMConstInt(ctx
.ac
.i32
, SI_RING_GSVS
, 0));
491 LLVMValueRef voffset
=
492 LLVMBuildMul(ctx
.ac
.builder
, ctx
.abi
.vertex_id
, LLVMConstInt(ctx
.ac
.i32
, 4, 0), "");
494 /* Fetch the vertex stream ID.*/
495 LLVMValueRef stream_id
;
497 if (!sscreen
->use_ngg_streamout
&& gs_selector
->so
.num_outputs
)
498 stream_id
= si_unpack_param(&ctx
, ctx
.streamout_config
, 24, 2);
500 stream_id
= ctx
.ac
.i32_0
;
502 /* Fill in output information. */
503 for (i
= 0; i
< gsinfo
->num_outputs
; ++i
) {
504 outputs
[i
].semantic_name
= gsinfo
->output_semantic_name
[i
];
505 outputs
[i
].semantic_index
= gsinfo
->output_semantic_index
[i
];
507 for (int chan
= 0; chan
< 4; chan
++) {
508 outputs
[i
].vertex_stream
[chan
] = (gsinfo
->output_streams
[i
] >> (2 * chan
)) & 3;
512 LLVMBasicBlockRef end_bb
;
513 LLVMValueRef switch_inst
;
515 end_bb
= LLVMAppendBasicBlockInContext(ctx
.ac
.context
, ctx
.main_fn
, "end");
516 switch_inst
= LLVMBuildSwitch(builder
, stream_id
, end_bb
, 4);
518 for (int stream
= 0; stream
< 4; stream
++) {
519 LLVMBasicBlockRef bb
;
522 if (!gsinfo
->num_stream_output_components
[stream
])
525 if (stream
> 0 && !gs_selector
->so
.num_outputs
)
528 bb
= LLVMInsertBasicBlockInContext(ctx
.ac
.context
, end_bb
, "out");
529 LLVMAddCase(switch_inst
, LLVMConstInt(ctx
.ac
.i32
, stream
, 0), bb
);
530 LLVMPositionBuilderAtEnd(builder
, bb
);
532 /* Fetch vertex data from GSVS ring */
534 for (i
= 0; i
< gsinfo
->num_outputs
; ++i
) {
535 for (unsigned chan
= 0; chan
< 4; chan
++) {
536 if (!(gsinfo
->output_usagemask
[i
] & (1 << chan
)) ||
537 outputs
[i
].vertex_stream
[chan
] != stream
) {
538 outputs
[i
].values
[chan
] = LLVMGetUndef(ctx
.ac
.f32
);
542 LLVMValueRef soffset
=
543 LLVMConstInt(ctx
.ac
.i32
, offset
* gs_selector
->gs_max_out_vertices
* 16 * 4, 0);
546 outputs
[i
].values
[chan
] =
547 ac_build_buffer_load(&ctx
.ac
, ctx
.gsvs_ring
[0], 1, ctx
.ac
.i32_0
, voffset
, soffset
, 0,
548 ac_glc
| ac_slc
, true, false);
552 /* Streamout and exports. */
553 if (!sscreen
->use_ngg_streamout
&& gs_selector
->so
.num_outputs
) {
554 si_llvm_emit_streamout(&ctx
, outputs
, gsinfo
->num_outputs
, stream
);
558 si_llvm_build_vs_exports(&ctx
, outputs
, gsinfo
->num_outputs
);
560 LLVMBuildBr(builder
, end_bb
);
563 LLVMPositionBuilderAtEnd(builder
, end_bb
);
565 LLVMBuildRetVoid(ctx
.ac
.builder
);
567 ctx
.type
= PIPE_SHADER_GEOMETRY
; /* override for shader dumping */
568 ctx
.stage
= MESA_SHADER_GEOMETRY
; /* override for shader dumping */
569 si_llvm_optimize_module(&ctx
);
572 if (si_compile_llvm(sscreen
, &ctx
.shader
->binary
, &ctx
.shader
->config
, ctx
.compiler
, &ctx
.ac
,
573 debug
, MESA_SHADER_GEOMETRY
, "GS Copy Shader", false)) {
574 if (si_can_dump_shader(sscreen
, MESA_SHADER_GEOMETRY
))
575 fprintf(stderr
, "GS Copy Shader:\n");
576 si_shader_dump(sscreen
, ctx
.shader
, debug
, stderr
, true);
578 if (!ctx
.shader
->config
.scratch_bytes_per_wave
)
579 ok
= si_shader_binary_upload(sscreen
, ctx
.shader
, 0);
584 si_llvm_dispose(&ctx
);
590 si_fix_resource_usage(sscreen
, shader
);
596 * Build the GS prolog function. Rotate the input vertices for triangle strips
599 void si_llvm_build_gs_prolog(struct si_shader_context
*ctx
, union si_shader_part_key
*key
)
601 unsigned num_sgprs
, num_vgprs
;
602 LLVMBuilderRef builder
= ctx
->ac
.builder
;
603 LLVMTypeRef returns
[AC_MAX_ARGS
];
604 LLVMValueRef func
, ret
;
606 memset(&ctx
->args
, 0, sizeof(ctx
->args
));
608 if (ctx
->screen
->info
.chip_class
>= GFX9
) {
609 if (key
->gs_prolog
.states
.gfx9_prev_is_vs
)
610 num_sgprs
= 8 + GFX9_VSGS_NUM_USER_SGPR
;
612 num_sgprs
= 8 + GFX9_TESGS_NUM_USER_SGPR
;
613 num_vgprs
= 5; /* ES inputs are not needed by GS */
615 num_sgprs
= GFX6_GS_NUM_USER_SGPR
+ 2;
619 for (unsigned i
= 0; i
< num_sgprs
; ++i
) {
620 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
621 returns
[i
] = ctx
->ac
.i32
;
624 for (unsigned i
= 0; i
< num_vgprs
; ++i
) {
625 ac_add_arg(&ctx
->args
, AC_ARG_VGPR
, 1, AC_ARG_INT
, NULL
);
626 returns
[num_sgprs
+ i
] = ctx
->ac
.f32
;
629 /* Create the function. */
630 si_llvm_create_func(ctx
, "gs_prolog", returns
, num_sgprs
+ num_vgprs
, 0);
633 /* Set the full EXEC mask for the prolog, because we are only fiddling
634 * with registers here. The main shader part will set the correct EXEC
637 if (ctx
->screen
->info
.chip_class
>= GFX9
&& !key
->gs_prolog
.is_monolithic
)
638 ac_init_exec_full_mask(&ctx
->ac
);
640 /* Copy inputs to outputs. This should be no-op, as the registers match,
641 * but it will prevent the compiler from overwriting them unintentionally.
643 ret
= ctx
->return_value
;
644 for (unsigned i
= 0; i
< num_sgprs
; i
++) {
645 LLVMValueRef p
= LLVMGetParam(func
, i
);
646 ret
= LLVMBuildInsertValue(builder
, ret
, p
, i
, "");
648 for (unsigned i
= 0; i
< num_vgprs
; i
++) {
649 LLVMValueRef p
= LLVMGetParam(func
, num_sgprs
+ i
);
650 p
= ac_to_float(&ctx
->ac
, p
);
651 ret
= LLVMBuildInsertValue(builder
, ret
, p
, num_sgprs
+ i
, "");
654 if (key
->gs_prolog
.states
.tri_strip_adj_fix
) {
655 /* Remap the input vertices for every other primitive. */
656 const struct ac_arg gfx6_vtx_params
[6] = {
657 {.used
= true, .arg_index
= num_sgprs
}, {.used
= true, .arg_index
= num_sgprs
+ 1},
658 {.used
= true, .arg_index
= num_sgprs
+ 3}, {.used
= true, .arg_index
= num_sgprs
+ 4},
659 {.used
= true, .arg_index
= num_sgprs
+ 5}, {.used
= true, .arg_index
= num_sgprs
+ 6},
661 const struct ac_arg gfx9_vtx_params
[3] = {
662 {.used
= true, .arg_index
= num_sgprs
},
663 {.used
= true, .arg_index
= num_sgprs
+ 1},
664 {.used
= true, .arg_index
= num_sgprs
+ 4},
666 LLVMValueRef vtx_in
[6], vtx_out
[6];
667 LLVMValueRef prim_id
, rotate
;
669 if (ctx
->screen
->info
.chip_class
>= GFX9
) {
670 for (unsigned i
= 0; i
< 3; i
++) {
671 vtx_in
[i
* 2] = si_unpack_param(ctx
, gfx9_vtx_params
[i
], 0, 16);
672 vtx_in
[i
* 2 + 1] = si_unpack_param(ctx
, gfx9_vtx_params
[i
], 16, 16);
675 for (unsigned i
= 0; i
< 6; i
++)
676 vtx_in
[i
] = ac_get_arg(&ctx
->ac
, gfx6_vtx_params
[i
]);
679 prim_id
= LLVMGetParam(func
, num_sgprs
+ 2);
680 rotate
= LLVMBuildTrunc(builder
, prim_id
, ctx
->ac
.i1
, "");
682 for (unsigned i
= 0; i
< 6; ++i
) {
683 LLVMValueRef base
, rotated
;
685 rotated
= vtx_in
[(i
+ 4) % 6];
686 vtx_out
[i
] = LLVMBuildSelect(builder
, rotate
, rotated
, base
, "");
689 if (ctx
->screen
->info
.chip_class
>= GFX9
) {
690 for (unsigned i
= 0; i
< 3; i
++) {
691 LLVMValueRef hi
, out
;
693 hi
= LLVMBuildShl(builder
, vtx_out
[i
* 2 + 1], LLVMConstInt(ctx
->ac
.i32
, 16, 0), "");
694 out
= LLVMBuildOr(builder
, vtx_out
[i
* 2], hi
, "");
695 out
= ac_to_float(&ctx
->ac
, out
);
696 ret
= LLVMBuildInsertValue(builder
, ret
, out
, gfx9_vtx_params
[i
].arg_index
, "");
699 for (unsigned i
= 0; i
< 6; i
++) {
702 out
= ac_to_float(&ctx
->ac
, vtx_out
[i
]);
703 ret
= LLVMBuildInsertValue(builder
, ret
, out
, gfx6_vtx_params
[i
].arg_index
, "");
708 LLVMBuildRet(builder
, ret
);
711 void si_llvm_init_gs_callbacks(struct si_shader_context
*ctx
)
713 ctx
->abi
.load_inputs
= si_nir_load_input_gs
;
714 ctx
->abi
.emit_vertex
= si_llvm_emit_vertex
;
715 ctx
->abi
.emit_primitive
= si_llvm_emit_primitive
;
716 ctx
->abi
.emit_outputs
= si_llvm_emit_gs_epilogue
;