2 * Copyright 2020 Advanced Micro Devices, Inc.
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
26 #include "si_shader_internal.h"
28 #include "util/u_memory.h"
30 static LLVMValueRef
unpack_sint16(struct si_shader_context
*ctx
, LLVMValueRef i32
, unsigned index
)
35 return LLVMBuildAShr(ctx
->ac
.builder
, i32
, LLVMConstInt(ctx
->ac
.i32
, 16, 0), "");
37 return LLVMBuildSExt(ctx
->ac
.builder
, LLVMBuildTrunc(ctx
->ac
.builder
, i32
, ctx
->ac
.i16
, ""),
41 static void load_input_vs(struct si_shader_context
*ctx
, unsigned input_index
, LLVMValueRef out
[4])
43 const struct si_shader_info
*info
= &ctx
->shader
->selector
->info
;
44 unsigned vs_blit_property
= info
->properties
[TGSI_PROPERTY_VS_BLIT_SGPRS_AMD
];
46 if (vs_blit_property
) {
47 LLVMValueRef vertex_id
= ctx
->abi
.vertex_id
;
49 LLVMBuildICmp(ctx
->ac
.builder
, LLVMIntULE
, vertex_id
, ctx
->ac
.i32_1
, "");
50 /* Use LLVMIntNE, because we have 3 vertices and only
51 * the middle one should use y2.
53 LLVMValueRef sel_y1
= LLVMBuildICmp(ctx
->ac
.builder
, LLVMIntNE
, vertex_id
, ctx
->ac
.i32_1
, "");
55 unsigned param_vs_blit_inputs
= ctx
->vs_blit_inputs
.arg_index
;
56 if (input_index
== 0) {
58 LLVMValueRef x1y1
= LLVMGetParam(ctx
->main_fn
, param_vs_blit_inputs
);
59 LLVMValueRef x2y2
= LLVMGetParam(ctx
->main_fn
, param_vs_blit_inputs
+ 1);
61 LLVMValueRef x1
= unpack_sint16(ctx
, x1y1
, 0);
62 LLVMValueRef y1
= unpack_sint16(ctx
, x1y1
, 1);
63 LLVMValueRef x2
= unpack_sint16(ctx
, x2y2
, 0);
64 LLVMValueRef y2
= unpack_sint16(ctx
, x2y2
, 1);
66 LLVMValueRef x
= LLVMBuildSelect(ctx
->ac
.builder
, sel_x1
, x1
, x2
, "");
67 LLVMValueRef y
= LLVMBuildSelect(ctx
->ac
.builder
, sel_y1
, y1
, y2
, "");
69 out
[0] = LLVMBuildSIToFP(ctx
->ac
.builder
, x
, ctx
->ac
.f32
, "");
70 out
[1] = LLVMBuildSIToFP(ctx
->ac
.builder
, y
, ctx
->ac
.f32
, "");
71 out
[2] = LLVMGetParam(ctx
->main_fn
, param_vs_blit_inputs
+ 2);
72 out
[3] = ctx
->ac
.f32_1
;
76 /* Color or texture coordinates: */
77 assert(input_index
== 1);
79 if (vs_blit_property
== SI_VS_BLIT_SGPRS_POS_COLOR
) {
80 for (int i
= 0; i
< 4; i
++) {
81 out
[i
] = LLVMGetParam(ctx
->main_fn
, param_vs_blit_inputs
+ 3 + i
);
84 assert(vs_blit_property
== SI_VS_BLIT_SGPRS_POS_TEXCOORD
);
85 LLVMValueRef x1
= LLVMGetParam(ctx
->main_fn
, param_vs_blit_inputs
+ 3);
86 LLVMValueRef y1
= LLVMGetParam(ctx
->main_fn
, param_vs_blit_inputs
+ 4);
87 LLVMValueRef x2
= LLVMGetParam(ctx
->main_fn
, param_vs_blit_inputs
+ 5);
88 LLVMValueRef y2
= LLVMGetParam(ctx
->main_fn
, param_vs_blit_inputs
+ 6);
90 out
[0] = LLVMBuildSelect(ctx
->ac
.builder
, sel_x1
, x1
, x2
, "");
91 out
[1] = LLVMBuildSelect(ctx
->ac
.builder
, sel_y1
, y1
, y2
, "");
92 out
[2] = LLVMGetParam(ctx
->main_fn
, param_vs_blit_inputs
+ 7);
93 out
[3] = LLVMGetParam(ctx
->main_fn
, param_vs_blit_inputs
+ 8);
98 unsigned num_vbos_in_user_sgprs
= ctx
->shader
->selector
->num_vbos_in_user_sgprs
;
99 union si_vs_fix_fetch fix_fetch
;
100 LLVMValueRef vb_desc
;
101 LLVMValueRef vertex_index
;
104 if (input_index
< num_vbos_in_user_sgprs
) {
105 vb_desc
= ac_get_arg(&ctx
->ac
, ctx
->vb_descriptors
[input_index
]);
107 unsigned index
= input_index
- num_vbos_in_user_sgprs
;
108 vb_desc
= ac_build_load_to_sgpr(&ctx
->ac
, ac_get_arg(&ctx
->ac
, ctx
->vertex_buffers
),
109 LLVMConstInt(ctx
->ac
.i32
, index
, 0));
112 vertex_index
= LLVMGetParam(ctx
->main_fn
, ctx
->vertex_index0
.arg_index
+ input_index
);
114 /* Use the open-coded implementation for all loads of doubles and
115 * of dword-sized data that needs fixups. We need to insert conversion
116 * code anyway, and the amd/common code does it for us.
118 * Note: On LLVM <= 8, we can only open-code formats with
119 * channel size >= 4 bytes.
121 bool opencode
= ctx
->shader
->key
.mono
.vs_fetch_opencode
& (1 << input_index
);
122 fix_fetch
.bits
= ctx
->shader
->key
.mono
.vs_fix_fetch
[input_index
].bits
;
123 if (opencode
|| (fix_fetch
.u
.log_size
== 3 && fix_fetch
.u
.format
== AC_FETCH_FORMAT_FLOAT
) ||
124 (fix_fetch
.u
.log_size
== 2)) {
125 tmp
= ac_build_opencoded_load_format(&ctx
->ac
, fix_fetch
.u
.log_size
,
126 fix_fetch
.u
.num_channels_m1
+ 1, fix_fetch
.u
.format
,
127 fix_fetch
.u
.reverse
, !opencode
, vb_desc
, vertex_index
,
128 ctx
->ac
.i32_0
, ctx
->ac
.i32_0
, 0, true);
129 for (unsigned i
= 0; i
< 4; ++i
)
131 LLVMBuildExtractElement(ctx
->ac
.builder
, tmp
, LLVMConstInt(ctx
->ac
.i32
, i
, false), "");
135 unsigned required_channels
= util_last_bit(info
->input_usage_mask
[input_index
]);
136 if (required_channels
== 0) {
137 for (unsigned i
= 0; i
< 4; ++i
)
138 out
[i
] = LLVMGetUndef(ctx
->ac
.f32
);
142 /* Do multiple loads for special formats. */
143 LLVMValueRef fetches
[4];
144 unsigned num_fetches
;
145 unsigned fetch_stride
;
146 unsigned channels_per_fetch
;
148 if (fix_fetch
.u
.log_size
<= 1 && fix_fetch
.u
.num_channels_m1
== 2) {
149 num_fetches
= MIN2(required_channels
, 3);
150 fetch_stride
= 1 << fix_fetch
.u
.log_size
;
151 channels_per_fetch
= 1;
155 channels_per_fetch
= required_channels
;
158 for (unsigned i
= 0; i
< num_fetches
; ++i
) {
159 LLVMValueRef voffset
= LLVMConstInt(ctx
->ac
.i32
, fetch_stride
* i
, 0);
160 fetches
[i
] = ac_build_buffer_load_format(&ctx
->ac
, vb_desc
, vertex_index
, voffset
,
161 channels_per_fetch
, 0, true, false);
164 if (num_fetches
== 1 && channels_per_fetch
> 1) {
165 LLVMValueRef fetch
= fetches
[0];
166 for (unsigned i
= 0; i
< channels_per_fetch
; ++i
) {
167 tmp
= LLVMConstInt(ctx
->ac
.i32
, i
, false);
168 fetches
[i
] = LLVMBuildExtractElement(ctx
->ac
.builder
, fetch
, tmp
, "");
170 num_fetches
= channels_per_fetch
;
171 channels_per_fetch
= 1;
174 for (unsigned i
= num_fetches
; i
< 4; ++i
)
175 fetches
[i
] = LLVMGetUndef(ctx
->ac
.f32
);
177 if (fix_fetch
.u
.log_size
<= 1 && fix_fetch
.u
.num_channels_m1
== 2 && required_channels
== 4) {
178 if (fix_fetch
.u
.format
== AC_FETCH_FORMAT_UINT
|| fix_fetch
.u
.format
== AC_FETCH_FORMAT_SINT
)
179 fetches
[3] = ctx
->ac
.i32_1
;
181 fetches
[3] = ctx
->ac
.f32_1
;
182 } else if (fix_fetch
.u
.log_size
== 3 &&
183 (fix_fetch
.u
.format
== AC_FETCH_FORMAT_SNORM
||
184 fix_fetch
.u
.format
== AC_FETCH_FORMAT_SSCALED
||
185 fix_fetch
.u
.format
== AC_FETCH_FORMAT_SINT
) &&
186 required_channels
== 4) {
187 /* For 2_10_10_10, the hardware returns an unsigned value;
188 * convert it to a signed one.
190 LLVMValueRef tmp
= fetches
[3];
191 LLVMValueRef c30
= LLVMConstInt(ctx
->ac
.i32
, 30, 0);
193 /* First, recover the sign-extended signed integer value. */
194 if (fix_fetch
.u
.format
== AC_FETCH_FORMAT_SSCALED
)
195 tmp
= LLVMBuildFPToUI(ctx
->ac
.builder
, tmp
, ctx
->ac
.i32
, "");
197 tmp
= ac_to_integer(&ctx
->ac
, tmp
);
199 /* For the integer-like cases, do a natural sign extension.
201 * For the SNORM case, the values are 0.0, 0.333, 0.666, 1.0
202 * and happen to contain 0, 1, 2, 3 as the two LSBs of the
206 ctx
->ac
.builder
, tmp
,
207 fix_fetch
.u
.format
== AC_FETCH_FORMAT_SNORM
? LLVMConstInt(ctx
->ac
.i32
, 7, 0) : c30
, "");
208 tmp
= LLVMBuildAShr(ctx
->ac
.builder
, tmp
, c30
, "");
210 /* Convert back to the right type. */
211 if (fix_fetch
.u
.format
== AC_FETCH_FORMAT_SNORM
) {
213 LLVMValueRef neg_one
= LLVMConstReal(ctx
->ac
.f32
, -1.0);
214 tmp
= LLVMBuildSIToFP(ctx
->ac
.builder
, tmp
, ctx
->ac
.f32
, "");
215 clamp
= LLVMBuildFCmp(ctx
->ac
.builder
, LLVMRealULT
, tmp
, neg_one
, "");
216 tmp
= LLVMBuildSelect(ctx
->ac
.builder
, clamp
, neg_one
, tmp
, "");
217 } else if (fix_fetch
.u
.format
== AC_FETCH_FORMAT_SSCALED
) {
218 tmp
= LLVMBuildSIToFP(ctx
->ac
.builder
, tmp
, ctx
->ac
.f32
, "");
224 for (unsigned i
= 0; i
< 4; ++i
)
225 out
[i
] = ac_to_float(&ctx
->ac
, fetches
[i
]);
228 void si_llvm_load_vs_inputs(struct si_shader_context
*ctx
, struct nir_shader
*nir
)
230 const struct si_shader_info
*info
= &ctx
->shader
->selector
->info
;
232 for (unsigned i
= 0; i
< info
->num_inputs
; i
++) {
233 LLVMValueRef values
[4];
235 load_input_vs(ctx
, i
, values
);
237 for (unsigned chan
= 0; chan
< 4; chan
++) {
238 ctx
->inputs
[i
* 4 + chan
] =
239 LLVMBuildBitCast(ctx
->ac
.builder
, values
[chan
], ctx
->ac
.i32
, "");
244 void si_llvm_streamout_store_output(struct si_shader_context
*ctx
, LLVMValueRef
const *so_buffers
,
245 LLVMValueRef
const *so_write_offsets
,
246 struct pipe_stream_output
*stream_out
,
247 struct si_shader_output_values
*shader_out
)
249 unsigned buf_idx
= stream_out
->output_buffer
;
250 unsigned start
= stream_out
->start_component
;
251 unsigned num_comps
= stream_out
->num_components
;
254 assert(num_comps
&& num_comps
<= 4);
255 if (!num_comps
|| num_comps
> 4)
258 /* Load the output as int. */
259 for (int j
= 0; j
< num_comps
; j
++) {
260 assert(stream_out
->stream
== shader_out
->vertex_stream
[start
+ j
]);
262 out
[j
] = ac_to_integer(&ctx
->ac
, shader_out
->values
[start
+ j
]);
265 /* Pack the output. */
266 LLVMValueRef vdata
= NULL
;
272 case 2: /* as v2i32 */
273 case 3: /* as v3i32 */
274 if (ac_has_vec3_support(ctx
->screen
->info
.chip_class
, false)) {
275 vdata
= ac_build_gather_values(&ctx
->ac
, out
, num_comps
);
278 /* as v4i32 (aligned to 4) */
279 out
[3] = LLVMGetUndef(ctx
->ac
.i32
);
281 case 4: /* as v4i32 */
282 vdata
= ac_build_gather_values(&ctx
->ac
, out
, util_next_power_of_two(num_comps
));
286 ac_build_buffer_store_dword(&ctx
->ac
, so_buffers
[buf_idx
], vdata
, num_comps
,
287 so_write_offsets
[buf_idx
], ctx
->ac
.i32_0
, stream_out
->dst_offset
* 4,
292 * Write streamout data to buffers for vertex stream @p stream (different
293 * vertex streams can occur for GS copy shaders).
295 void si_llvm_emit_streamout(struct si_shader_context
*ctx
, struct si_shader_output_values
*outputs
,
296 unsigned noutput
, unsigned stream
)
298 struct si_shader_selector
*sel
= ctx
->shader
->selector
;
299 struct pipe_stream_output_info
*so
= &sel
->so
;
300 LLVMBuilderRef builder
= ctx
->ac
.builder
;
303 /* Get bits [22:16], i.e. (so_param >> 16) & 127; */
304 LLVMValueRef so_vtx_count
= si_unpack_param(ctx
, ctx
->streamout_config
, 16, 7);
306 LLVMValueRef tid
= ac_get_thread_id(&ctx
->ac
);
308 /* can_emit = tid < so_vtx_count; */
309 LLVMValueRef can_emit
= LLVMBuildICmp(builder
, LLVMIntULT
, tid
, so_vtx_count
, "");
311 /* Emit the streamout code conditionally. This actually avoids
312 * out-of-bounds buffer access. The hw tells us via the SGPR
313 * (so_vtx_count) which threads are allowed to emit streamout data. */
314 ac_build_ifcc(&ctx
->ac
, can_emit
, 6501);
316 /* The buffer offset is computed as follows:
317 * ByteOffset = streamout_offset[buffer_id]*4 +
318 * (streamout_write_index + thread_id)*stride[buffer_id] +
322 LLVMValueRef so_write_index
= ac_get_arg(&ctx
->ac
, ctx
->streamout_write_index
);
324 /* Compute (streamout_write_index + thread_id). */
325 so_write_index
= LLVMBuildAdd(builder
, so_write_index
, tid
, "");
327 /* Load the descriptor and compute the write offset for each
329 LLVMValueRef so_write_offset
[4] = {};
330 LLVMValueRef so_buffers
[4];
331 LLVMValueRef buf_ptr
= ac_get_arg(&ctx
->ac
, ctx
->rw_buffers
);
333 for (i
= 0; i
< 4; i
++) {
337 LLVMValueRef offset
= LLVMConstInt(ctx
->ac
.i32
, SI_VS_STREAMOUT_BUF0
+ i
, 0);
339 so_buffers
[i
] = ac_build_load_to_sgpr(&ctx
->ac
, buf_ptr
, offset
);
341 LLVMValueRef so_offset
= ac_get_arg(&ctx
->ac
, ctx
->streamout_offset
[i
]);
342 so_offset
= LLVMBuildMul(builder
, so_offset
, LLVMConstInt(ctx
->ac
.i32
, 4, 0), "");
344 so_write_offset
[i
] = ac_build_imad(
345 &ctx
->ac
, so_write_index
, LLVMConstInt(ctx
->ac
.i32
, so
->stride
[i
] * 4, 0), so_offset
);
348 /* Write streamout data. */
349 for (i
= 0; i
< so
->num_outputs
; i
++) {
350 unsigned reg
= so
->output
[i
].register_index
;
355 if (stream
!= so
->output
[i
].stream
)
358 si_llvm_streamout_store_output(ctx
, so_buffers
, so_write_offset
, &so
->output
[i
],
362 ac_build_endif(&ctx
->ac
, 6501);
365 static void si_llvm_emit_clipvertex(struct si_shader_context
*ctx
, struct ac_export_args
*pos
,
366 LLVMValueRef
*out_elts
)
371 LLVMValueRef base_elt
;
372 LLVMValueRef ptr
= ac_get_arg(&ctx
->ac
, ctx
->rw_buffers
);
373 LLVMValueRef constbuf_index
= LLVMConstInt(ctx
->ac
.i32
, SI_VS_CONST_CLIP_PLANES
, 0);
374 LLVMValueRef const_resource
= ac_build_load_to_sgpr(&ctx
->ac
, ptr
, constbuf_index
);
376 for (reg_index
= 0; reg_index
< 2; reg_index
++) {
377 struct ac_export_args
*args
= &pos
[2 + reg_index
];
379 args
->out
[0] = args
->out
[1] = args
->out
[2] = args
->out
[3] = LLVMConstReal(ctx
->ac
.f32
, 0.0f
);
381 /* Compute dot products of position and user clip plane vectors */
382 for (chan
= 0; chan
< 4; chan
++) {
383 for (const_chan
= 0; const_chan
< 4; const_chan
++) {
385 LLVMConstInt(ctx
->ac
.i32
, ((reg_index
* 4 + chan
) * 4 + const_chan
) * 4, 0);
386 base_elt
= si_buffer_load_const(ctx
, const_resource
, addr
);
388 ac_build_fmad(&ctx
->ac
, base_elt
, out_elts
[const_chan
], args
->out
[chan
]);
392 args
->enabled_channels
= 0xf;
393 args
->valid_mask
= 0;
395 args
->target
= V_008DFC_SQ_EXP_POS
+ 2 + reg_index
;
400 /* Initialize arguments for the shader export intrinsic */
401 static void si_llvm_init_vs_export_args(struct si_shader_context
*ctx
, LLVMValueRef
*values
,
402 unsigned target
, struct ac_export_args
*args
)
404 args
->enabled_channels
= 0xf; /* writemask - default is 0xf */
405 args
->valid_mask
= 0; /* Specify whether the EXEC mask represents the valid mask */
406 args
->done
= 0; /* Specify whether this is the last export */
407 args
->target
= target
; /* Specify the target we are exporting */
410 memcpy(&args
->out
[0], values
, sizeof(values
[0]) * 4);
413 static void si_export_param(struct si_shader_context
*ctx
, unsigned index
, LLVMValueRef
*values
)
415 struct ac_export_args args
;
417 si_llvm_init_vs_export_args(ctx
, values
, V_008DFC_SQ_EXP_PARAM
+ index
, &args
);
418 ac_build_export(&ctx
->ac
, &args
);
421 static void si_build_param_exports(struct si_shader_context
*ctx
,
422 struct si_shader_output_values
*outputs
, unsigned noutput
)
424 struct si_shader
*shader
= ctx
->shader
;
425 unsigned param_count
= 0;
427 for (unsigned i
= 0; i
< noutput
; i
++) {
428 unsigned semantic_name
= outputs
[i
].semantic_name
;
429 unsigned semantic_index
= outputs
[i
].semantic_index
;
431 if (outputs
[i
].vertex_stream
[0] != 0 && outputs
[i
].vertex_stream
[1] != 0 &&
432 outputs
[i
].vertex_stream
[2] != 0 && outputs
[i
].vertex_stream
[3] != 0)
435 switch (semantic_name
) {
436 case TGSI_SEMANTIC_LAYER
:
437 case TGSI_SEMANTIC_VIEWPORT_INDEX
:
438 case TGSI_SEMANTIC_CLIPDIST
:
439 case TGSI_SEMANTIC_COLOR
:
440 case TGSI_SEMANTIC_BCOLOR
:
441 case TGSI_SEMANTIC_PRIMID
:
442 case TGSI_SEMANTIC_FOG
:
443 case TGSI_SEMANTIC_TEXCOORD
:
444 case TGSI_SEMANTIC_GENERIC
:
450 if ((semantic_name
!= TGSI_SEMANTIC_GENERIC
|| semantic_index
< SI_MAX_IO_GENERIC
) &&
451 shader
->key
.opt
.kill_outputs
&
452 (1ull << si_shader_io_get_unique_index(semantic_name
, semantic_index
, true)))
455 si_export_param(ctx
, param_count
, outputs
[i
].values
);
457 assert(i
< ARRAY_SIZE(shader
->info
.vs_output_param_offset
));
458 shader
->info
.vs_output_param_offset
[i
] = param_count
++;
461 shader
->info
.nr_param_exports
= param_count
;
465 * Vertex color clamping.
467 * This uses a state constant loaded in a user data SGPR and
468 * an IF statement is added that clamps all colors if the constant
471 static void si_vertex_color_clamping(struct si_shader_context
*ctx
,
472 struct si_shader_output_values
*outputs
, unsigned noutput
)
474 LLVMValueRef addr
[SI_MAX_VS_OUTPUTS
][4];
475 bool has_colors
= false;
477 /* Store original colors to alloca variables. */
478 for (unsigned i
= 0; i
< noutput
; i
++) {
479 if (outputs
[i
].semantic_name
!= TGSI_SEMANTIC_COLOR
&&
480 outputs
[i
].semantic_name
!= TGSI_SEMANTIC_BCOLOR
)
483 for (unsigned j
= 0; j
< 4; j
++) {
484 addr
[i
][j
] = ac_build_alloca_undef(&ctx
->ac
, ctx
->ac
.f32
, "");
485 LLVMBuildStore(ctx
->ac
.builder
, outputs
[i
].values
[j
], addr
[i
][j
]);
493 /* The state is in the first bit of the user SGPR. */
494 LLVMValueRef cond
= ac_get_arg(&ctx
->ac
, ctx
->vs_state_bits
);
495 cond
= LLVMBuildTrunc(ctx
->ac
.builder
, cond
, ctx
->ac
.i1
, "");
497 ac_build_ifcc(&ctx
->ac
, cond
, 6502);
499 /* Store clamped colors to alloca variables within the conditional block. */
500 for (unsigned i
= 0; i
< noutput
; i
++) {
501 if (outputs
[i
].semantic_name
!= TGSI_SEMANTIC_COLOR
&&
502 outputs
[i
].semantic_name
!= TGSI_SEMANTIC_BCOLOR
)
505 for (unsigned j
= 0; j
< 4; j
++) {
506 LLVMBuildStore(ctx
->ac
.builder
, ac_build_clamp(&ctx
->ac
, outputs
[i
].values
[j
]),
510 ac_build_endif(&ctx
->ac
, 6502);
512 /* Load clamped colors */
513 for (unsigned i
= 0; i
< noutput
; i
++) {
514 if (outputs
[i
].semantic_name
!= TGSI_SEMANTIC_COLOR
&&
515 outputs
[i
].semantic_name
!= TGSI_SEMANTIC_BCOLOR
)
518 for (unsigned j
= 0; j
< 4; j
++) {
519 outputs
[i
].values
[j
] = LLVMBuildLoad(ctx
->ac
.builder
, addr
[i
][j
], "");
524 /* Generate export instructions for hardware VS shader stage or NGG GS stage
525 * (position and parameter data only).
527 void si_llvm_build_vs_exports(struct si_shader_context
*ctx
,
528 struct si_shader_output_values
*outputs
, unsigned noutput
)
530 struct si_shader
*shader
= ctx
->shader
;
531 struct ac_export_args pos_args
[4] = {};
532 LLVMValueRef psize_value
= NULL
, edgeflag_value
= NULL
, layer_value
= NULL
,
533 viewport_index_value
= NULL
;
537 si_vertex_color_clamping(ctx
, outputs
, noutput
);
539 /* Build position exports. */
540 for (i
= 0; i
< noutput
; i
++) {
541 switch (outputs
[i
].semantic_name
) {
542 case TGSI_SEMANTIC_POSITION
:
543 si_llvm_init_vs_export_args(ctx
, outputs
[i
].values
, V_008DFC_SQ_EXP_POS
, &pos_args
[0]);
545 case TGSI_SEMANTIC_PSIZE
:
546 psize_value
= outputs
[i
].values
[0];
548 case TGSI_SEMANTIC_LAYER
:
549 layer_value
= outputs
[i
].values
[0];
551 case TGSI_SEMANTIC_VIEWPORT_INDEX
:
552 viewport_index_value
= outputs
[i
].values
[0];
554 case TGSI_SEMANTIC_EDGEFLAG
:
555 edgeflag_value
= outputs
[i
].values
[0];
557 case TGSI_SEMANTIC_CLIPDIST
:
558 if (!shader
->key
.opt
.clip_disable
) {
559 unsigned index
= 2 + outputs
[i
].semantic_index
;
560 si_llvm_init_vs_export_args(ctx
, outputs
[i
].values
, V_008DFC_SQ_EXP_POS
+ index
,
564 case TGSI_SEMANTIC_CLIPVERTEX
:
565 if (!shader
->key
.opt
.clip_disable
) {
566 si_llvm_emit_clipvertex(ctx
, pos_args
, outputs
[i
].values
);
572 /* We need to add the position output manually if it's missing. */
573 if (!pos_args
[0].out
[0]) {
574 pos_args
[0].enabled_channels
= 0xf; /* writemask */
575 pos_args
[0].valid_mask
= 0; /* EXEC mask */
576 pos_args
[0].done
= 0; /* last export? */
577 pos_args
[0].target
= V_008DFC_SQ_EXP_POS
;
578 pos_args
[0].compr
= 0; /* COMPR flag */
579 pos_args
[0].out
[0] = ctx
->ac
.f32_0
; /* X */
580 pos_args
[0].out
[1] = ctx
->ac
.f32_0
; /* Y */
581 pos_args
[0].out
[2] = ctx
->ac
.f32_0
; /* Z */
582 pos_args
[0].out
[3] = ctx
->ac
.f32_1
; /* W */
585 bool pos_writes_edgeflag
= shader
->selector
->info
.writes_edgeflag
&& !shader
->key
.as_ngg
;
587 /* Write the misc vector (point size, edgeflag, layer, viewport). */
588 if (shader
->selector
->info
.writes_psize
|| pos_writes_edgeflag
||
589 shader
->selector
->info
.writes_viewport_index
|| shader
->selector
->info
.writes_layer
) {
590 pos_args
[1].enabled_channels
= shader
->selector
->info
.writes_psize
|
591 (pos_writes_edgeflag
<< 1) |
592 (shader
->selector
->info
.writes_layer
<< 2);
594 pos_args
[1].valid_mask
= 0; /* EXEC mask */
595 pos_args
[1].done
= 0; /* last export? */
596 pos_args
[1].target
= V_008DFC_SQ_EXP_POS
+ 1;
597 pos_args
[1].compr
= 0; /* COMPR flag */
598 pos_args
[1].out
[0] = ctx
->ac
.f32_0
; /* X */
599 pos_args
[1].out
[1] = ctx
->ac
.f32_0
; /* Y */
600 pos_args
[1].out
[2] = ctx
->ac
.f32_0
; /* Z */
601 pos_args
[1].out
[3] = ctx
->ac
.f32_0
; /* W */
603 if (shader
->selector
->info
.writes_psize
)
604 pos_args
[1].out
[0] = psize_value
;
606 if (pos_writes_edgeflag
) {
607 /* The output is a float, but the hw expects an integer
608 * with the first bit containing the edge flag. */
609 edgeflag_value
= LLVMBuildFPToUI(ctx
->ac
.builder
, edgeflag_value
, ctx
->ac
.i32
, "");
610 edgeflag_value
= ac_build_umin(&ctx
->ac
, edgeflag_value
, ctx
->ac
.i32_1
);
612 /* The LLVM intrinsic expects a float. */
613 pos_args
[1].out
[1] = ac_to_float(&ctx
->ac
, edgeflag_value
);
616 if (ctx
->screen
->info
.chip_class
>= GFX9
) {
617 /* GFX9 has the layer in out.z[10:0] and the viewport
618 * index in out.z[19:16].
620 if (shader
->selector
->info
.writes_layer
)
621 pos_args
[1].out
[2] = layer_value
;
623 if (shader
->selector
->info
.writes_viewport_index
) {
624 LLVMValueRef v
= viewport_index_value
;
626 v
= ac_to_integer(&ctx
->ac
, v
);
627 v
= LLVMBuildShl(ctx
->ac
.builder
, v
, LLVMConstInt(ctx
->ac
.i32
, 16, 0), "");
628 v
= LLVMBuildOr(ctx
->ac
.builder
, v
, ac_to_integer(&ctx
->ac
, pos_args
[1].out
[2]), "");
629 pos_args
[1].out
[2] = ac_to_float(&ctx
->ac
, v
);
630 pos_args
[1].enabled_channels
|= 1 << 2;
633 if (shader
->selector
->info
.writes_layer
)
634 pos_args
[1].out
[2] = layer_value
;
636 if (shader
->selector
->info
.writes_viewport_index
) {
637 pos_args
[1].out
[3] = viewport_index_value
;
638 pos_args
[1].enabled_channels
|= 1 << 3;
643 for (i
= 0; i
< 4; i
++)
644 if (pos_args
[i
].out
[0])
645 shader
->info
.nr_pos_exports
++;
647 /* GFX10 (Navi1x) skip POS0 exports if EXEC=0 and DONE=0, causing a hang.
648 * Setting valid_mask=1 prevents it and has no other effect.
650 if (ctx
->screen
->info
.chip_class
== GFX10
)
651 pos_args
[0].valid_mask
= 1;
654 for (i
= 0; i
< 4; i
++) {
655 if (!pos_args
[i
].out
[0])
658 /* Specify the target we are exporting */
659 pos_args
[i
].target
= V_008DFC_SQ_EXP_POS
+ pos_idx
++;
661 if (pos_idx
== shader
->info
.nr_pos_exports
)
662 /* Specify that this is the last export */
663 pos_args
[i
].done
= 1;
665 ac_build_export(&ctx
->ac
, &pos_args
[i
]);
668 /* Build parameter exports. */
669 si_build_param_exports(ctx
, outputs
, noutput
);
672 void si_llvm_emit_vs_epilogue(struct ac_shader_abi
*abi
, unsigned max_outputs
, LLVMValueRef
*addrs
)
674 struct si_shader_context
*ctx
= si_shader_context_from_abi(abi
);
675 struct si_shader_info
*info
= &ctx
->shader
->selector
->info
;
676 struct si_shader_output_values
*outputs
= NULL
;
679 assert(!ctx
->shader
->is_gs_copy_shader
);
680 assert(info
->num_outputs
<= max_outputs
);
682 outputs
= MALLOC((info
->num_outputs
+ 1) * sizeof(outputs
[0]));
684 for (i
= 0; i
< info
->num_outputs
; i
++) {
685 outputs
[i
].semantic_name
= info
->output_semantic_name
[i
];
686 outputs
[i
].semantic_index
= info
->output_semantic_index
[i
];
688 for (j
= 0; j
< 4; j
++) {
689 outputs
[i
].values
[j
] = LLVMBuildLoad(ctx
->ac
.builder
, addrs
[4 * i
+ j
], "");
690 outputs
[i
].vertex_stream
[j
] = (info
->output_streams
[i
] >> (2 * j
)) & 3;
694 if (!ctx
->screen
->use_ngg_streamout
&& ctx
->shader
->selector
->so
.num_outputs
)
695 si_llvm_emit_streamout(ctx
, outputs
, i
, 0);
697 /* Export PrimitiveID. */
698 if (ctx
->shader
->key
.mono
.u
.vs_export_prim_id
) {
699 outputs
[i
].semantic_name
= TGSI_SEMANTIC_PRIMID
;
700 outputs
[i
].semantic_index
= 0;
701 outputs
[i
].values
[0] = ac_to_float(&ctx
->ac
, si_get_primitive_id(ctx
, 0));
702 for (j
= 1; j
< 4; j
++)
703 outputs
[i
].values
[j
] = LLVMConstReal(ctx
->ac
.f32
, 0);
705 memset(outputs
[i
].vertex_stream
, 0, sizeof(outputs
[i
].vertex_stream
));
709 si_llvm_build_vs_exports(ctx
, outputs
, i
);
713 static void si_llvm_emit_prim_discard_cs_epilogue(struct ac_shader_abi
*abi
, unsigned max_outputs
,
716 struct si_shader_context
*ctx
= si_shader_context_from_abi(abi
);
717 struct si_shader_info
*info
= &ctx
->shader
->selector
->info
;
718 LLVMValueRef pos
[4] = {};
720 assert(info
->num_outputs
<= max_outputs
);
722 for (unsigned i
= 0; i
< info
->num_outputs
; i
++) {
723 if (info
->output_semantic_name
[i
] != TGSI_SEMANTIC_POSITION
)
726 for (unsigned chan
= 0; chan
< 4; chan
++)
727 pos
[chan
] = LLVMBuildLoad(ctx
->ac
.builder
, addrs
[4 * i
+ chan
], "");
730 assert(pos
[0] != NULL
);
732 /* Return the position output. */
733 LLVMValueRef ret
= ctx
->return_value
;
734 for (unsigned chan
= 0; chan
< 4; chan
++)
735 ret
= LLVMBuildInsertValue(ctx
->ac
.builder
, ret
, pos
[chan
], chan
, "");
736 ctx
->return_value
= ret
;
740 * Build the vertex shader prolog function.
742 * The inputs are the same as VS (a lot of SGPRs and 4 VGPR system values).
743 * All inputs are returned unmodified. The vertex load indices are
744 * stored after them, which will be used by the API VS for fetching inputs.
746 * For example, the expected outputs for instance_divisors[] = {0, 1, 2} are:
751 * (VertexID + BaseVertex),
752 * (InstanceID + StartInstance),
753 * (InstanceID / 2 + StartInstance)
755 void si_llvm_build_vs_prolog(struct si_shader_context
*ctx
, union si_shader_part_key
*key
)
757 LLVMTypeRef
*returns
;
758 LLVMValueRef ret
, func
;
760 unsigned first_vs_vgpr
= key
->vs_prolog
.num_merged_next_stage_vgprs
;
761 unsigned num_input_vgprs
=
762 key
->vs_prolog
.num_merged_next_stage_vgprs
+ 4 + (key
->vs_prolog
.has_ngg_cull_inputs
? 1 : 0);
763 struct ac_arg input_sgpr_param
[key
->vs_prolog
.num_input_sgprs
];
764 struct ac_arg input_vgpr_param
[10];
765 LLVMValueRef input_vgprs
[10];
766 unsigned num_all_input_regs
= key
->vs_prolog
.num_input_sgprs
+ num_input_vgprs
;
767 unsigned user_sgpr_base
= key
->vs_prolog
.num_merged_next_stage_vgprs
? 8 : 0;
769 memset(&ctx
->args
, 0, sizeof(ctx
->args
));
771 /* 4 preloaded VGPRs + vertex load indices as prolog outputs */
772 returns
= alloca((num_all_input_regs
+ key
->vs_prolog
.num_inputs
) * sizeof(LLVMTypeRef
));
775 /* Declare input and output SGPRs. */
776 for (i
= 0; i
< key
->vs_prolog
.num_input_sgprs
; i
++) {
777 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, &input_sgpr_param
[i
]);
778 returns
[num_returns
++] = ctx
->ac
.i32
;
781 struct ac_arg merged_wave_info
= input_sgpr_param
[3];
783 /* Preloaded VGPRs (outputs must be floats) */
784 for (i
= 0; i
< num_input_vgprs
; i
++) {
785 ac_add_arg(&ctx
->args
, AC_ARG_VGPR
, 1, AC_ARG_INT
, &input_vgpr_param
[i
]);
786 returns
[num_returns
++] = ctx
->ac
.f32
;
789 /* Vertex load indices. */
790 for (i
= 0; i
< key
->vs_prolog
.num_inputs
; i
++)
791 returns
[num_returns
++] = ctx
->ac
.f32
;
793 /* Create the function. */
794 si_llvm_create_func(ctx
, "vs_prolog", returns
, num_returns
, 0);
797 for (i
= 0; i
< num_input_vgprs
; i
++) {
798 input_vgprs
[i
] = ac_get_arg(&ctx
->ac
, input_vgpr_param
[i
]);
801 if (key
->vs_prolog
.num_merged_next_stage_vgprs
) {
802 if (!key
->vs_prolog
.is_monolithic
)
803 si_init_exec_from_input(ctx
, merged_wave_info
, 0);
805 if (key
->vs_prolog
.as_ls
&& ctx
->screen
->info
.has_ls_vgpr_init_bug
) {
806 /* If there are no HS threads, SPI loads the LS VGPRs
807 * starting at VGPR 0. Shift them back to where they
810 LLVMValueRef has_hs_threads
=
811 LLVMBuildICmp(ctx
->ac
.builder
, LLVMIntNE
,
812 si_unpack_param(ctx
, input_sgpr_param
[3], 8, 8), ctx
->ac
.i32_0
, "");
814 for (i
= 4; i
> 0; --i
) {
815 input_vgprs
[i
+ 1] = LLVMBuildSelect(ctx
->ac
.builder
, has_hs_threads
,
816 input_vgprs
[i
+ 1], input_vgprs
[i
- 1], "");
821 if (key
->vs_prolog
.gs_fast_launch_tri_list
|| key
->vs_prolog
.gs_fast_launch_tri_strip
) {
822 LLVMValueRef wave_id
, thread_id_in_tg
;
824 wave_id
= si_unpack_param(ctx
, input_sgpr_param
[3], 24, 4);
826 ac_build_imad(&ctx
->ac
, wave_id
, LLVMConstInt(ctx
->ac
.i32
, ctx
->ac
.wave_size
, false),
827 ac_get_thread_id(&ctx
->ac
));
829 /* The GS fast launch initializes all VGPRs to the value of
830 * the first thread, so we have to add the thread ID.
832 * Only these are initialized by the hw:
833 * VGPR2: Base Primitive ID
834 * VGPR5: Base Vertex ID
838 /* Put the vertex thread IDs into VGPRs as-is instead of packing them.
839 * The NGG cull shader will read them from there.
841 if (key
->vs_prolog
.gs_fast_launch_tri_list
) {
842 input_vgprs
[0] = ac_build_imad(&ctx
->ac
, thread_id_in_tg
, /* gs_vtx01_offset */
843 LLVMConstInt(ctx
->ac
.i32
, 3, 0), /* Vertex 0 */
844 LLVMConstInt(ctx
->ac
.i32
, 0, 0));
845 input_vgprs
[1] = ac_build_imad(&ctx
->ac
, thread_id_in_tg
, /* gs_vtx23_offset */
846 LLVMConstInt(ctx
->ac
.i32
, 3, 0), /* Vertex 1 */
847 LLVMConstInt(ctx
->ac
.i32
, 1, 0));
848 input_vgprs
[4] = ac_build_imad(&ctx
->ac
, thread_id_in_tg
, /* gs_vtx45_offset */
849 LLVMConstInt(ctx
->ac
.i32
, 3, 0), /* Vertex 2 */
850 LLVMConstInt(ctx
->ac
.i32
, 2, 0));
852 assert(key
->vs_prolog
.gs_fast_launch_tri_strip
);
853 LLVMBuilderRef builder
= ctx
->ac
.builder
;
854 /* Triangle indices: */
855 LLVMValueRef index
[3] = {
857 LLVMBuildAdd(builder
, thread_id_in_tg
, LLVMConstInt(ctx
->ac
.i32
, 1, 0), ""),
858 LLVMBuildAdd(builder
, thread_id_in_tg
, LLVMConstInt(ctx
->ac
.i32
, 2, 0), ""),
860 LLVMValueRef is_odd
= LLVMBuildTrunc(ctx
->ac
.builder
, thread_id_in_tg
, ctx
->ac
.i1
, "");
861 LLVMValueRef flatshade_first
= LLVMBuildICmp(
862 builder
, LLVMIntEQ
, si_unpack_param(ctx
, ctx
->vs_state_bits
, 4, 2), ctx
->ac
.i32_0
, "");
864 ac_build_triangle_strip_indices_to_triangle(&ctx
->ac
, is_odd
, flatshade_first
, index
);
865 input_vgprs
[0] = index
[0];
866 input_vgprs
[1] = index
[1];
867 input_vgprs
[4] = index
[2];
870 /* Triangles always have all edge flags set initially. */
871 input_vgprs
[3] = LLVMConstInt(ctx
->ac
.i32
, 0x7 << 8, 0);
874 LLVMBuildAdd(ctx
->ac
.builder
, input_vgprs
[2], thread_id_in_tg
, ""); /* PrimID */
876 LLVMBuildAdd(ctx
->ac
.builder
, input_vgprs
[5], thread_id_in_tg
, ""); /* VertexID */
877 input_vgprs
[8] = input_vgprs
[6]; /* InstanceID */
880 unsigned vertex_id_vgpr
= first_vs_vgpr
;
881 unsigned instance_id_vgpr
= ctx
->screen
->info
.chip_class
>= GFX10
883 : first_vs_vgpr
+ (key
->vs_prolog
.as_ls
? 2 : 1);
885 ctx
->abi
.vertex_id
= input_vgprs
[vertex_id_vgpr
];
886 ctx
->abi
.instance_id
= input_vgprs
[instance_id_vgpr
];
888 /* InstanceID = VertexID >> 16;
889 * VertexID = VertexID & 0xffff;
891 if (key
->vs_prolog
.states
.unpack_instance_id_from_vertex_id
) {
892 ctx
->abi
.instance_id
=
893 LLVMBuildLShr(ctx
->ac
.builder
, ctx
->abi
.vertex_id
, LLVMConstInt(ctx
->ac
.i32
, 16, 0), "");
894 ctx
->abi
.vertex_id
= LLVMBuildAnd(ctx
->ac
.builder
, ctx
->abi
.vertex_id
,
895 LLVMConstInt(ctx
->ac
.i32
, 0xffff, 0), "");
898 /* Copy inputs to outputs. This should be no-op, as the registers match,
899 * but it will prevent the compiler from overwriting them unintentionally.
901 ret
= ctx
->return_value
;
902 for (i
= 0; i
< key
->vs_prolog
.num_input_sgprs
; i
++) {
903 LLVMValueRef p
= LLVMGetParam(func
, i
);
904 ret
= LLVMBuildInsertValue(ctx
->ac
.builder
, ret
, p
, i
, "");
906 for (i
= 0; i
< num_input_vgprs
; i
++) {
907 LLVMValueRef p
= input_vgprs
[i
];
909 if (i
== vertex_id_vgpr
)
910 p
= ctx
->abi
.vertex_id
;
911 else if (i
== instance_id_vgpr
)
912 p
= ctx
->abi
.instance_id
;
914 p
= ac_to_float(&ctx
->ac
, p
);
915 ret
= LLVMBuildInsertValue(ctx
->ac
.builder
, ret
, p
, key
->vs_prolog
.num_input_sgprs
+ i
, "");
918 /* Compute vertex load indices from instance divisors. */
919 LLVMValueRef instance_divisor_constbuf
= NULL
;
921 if (key
->vs_prolog
.states
.instance_divisor_is_fetched
) {
922 LLVMValueRef list
= si_prolog_get_rw_buffers(ctx
);
923 LLVMValueRef buf_index
= LLVMConstInt(ctx
->ac
.i32
, SI_VS_CONST_INSTANCE_DIVISORS
, 0);
924 instance_divisor_constbuf
= ac_build_load_to_sgpr(&ctx
->ac
, list
, buf_index
);
927 for (i
= 0; i
< key
->vs_prolog
.num_inputs
; i
++) {
928 bool divisor_is_one
= key
->vs_prolog
.states
.instance_divisor_is_one
& (1u << i
);
929 bool divisor_is_fetched
= key
->vs_prolog
.states
.instance_divisor_is_fetched
& (1u << i
);
930 LLVMValueRef index
= NULL
;
932 if (divisor_is_one
) {
933 index
= ctx
->abi
.instance_id
;
934 } else if (divisor_is_fetched
) {
935 LLVMValueRef udiv_factors
[4];
937 for (unsigned j
= 0; j
< 4; j
++) {
938 udiv_factors
[j
] = si_buffer_load_const(ctx
, instance_divisor_constbuf
,
939 LLVMConstInt(ctx
->ac
.i32
, i
* 16 + j
* 4, 0));
940 udiv_factors
[j
] = ac_to_integer(&ctx
->ac
, udiv_factors
[j
]);
942 /* The faster NUW version doesn't work when InstanceID == UINT_MAX.
943 * Such InstanceID might not be achievable in a reasonable time though.
945 index
= ac_build_fast_udiv_nuw(&ctx
->ac
, ctx
->abi
.instance_id
, udiv_factors
[0],
946 udiv_factors
[1], udiv_factors
[2], udiv_factors
[3]);
949 if (divisor_is_one
|| divisor_is_fetched
) {
950 /* Add StartInstance. */
952 LLVMBuildAdd(ctx
->ac
.builder
, index
,
953 LLVMGetParam(ctx
->main_fn
, user_sgpr_base
+ SI_SGPR_START_INSTANCE
), "");
955 /* VertexID + BaseVertex */
956 index
= LLVMBuildAdd(ctx
->ac
.builder
, ctx
->abi
.vertex_id
,
957 LLVMGetParam(func
, user_sgpr_base
+ SI_SGPR_BASE_VERTEX
), "");
960 index
= ac_to_float(&ctx
->ac
, index
);
961 ret
= LLVMBuildInsertValue(ctx
->ac
.builder
, ret
, index
, ctx
->args
.arg_count
+ i
, "");
964 si_llvm_build_ret(ctx
, ret
);
967 static LLVMValueRef
get_base_vertex(struct ac_shader_abi
*abi
)
969 struct si_shader_context
*ctx
= si_shader_context_from_abi(abi
);
971 /* For non-indexed draws, the base vertex set by the driver
972 * (for direct draws) or the CP (for indirect draws) is the
973 * first vertex ID, but GLSL expects 0 to be returned.
975 LLVMValueRef vs_state
= ac_get_arg(&ctx
->ac
, ctx
->vs_state_bits
);
976 LLVMValueRef indexed
;
978 indexed
= LLVMBuildLShr(ctx
->ac
.builder
, vs_state
, ctx
->ac
.i32_1
, "");
979 indexed
= LLVMBuildTrunc(ctx
->ac
.builder
, indexed
, ctx
->ac
.i1
, "");
981 return LLVMBuildSelect(ctx
->ac
.builder
, indexed
, ac_get_arg(&ctx
->ac
, ctx
->args
.base_vertex
),
985 void si_llvm_init_vs_callbacks(struct si_shader_context
*ctx
, bool ngg_cull_shader
)
987 struct si_shader
*shader
= ctx
->shader
;
989 if (shader
->key
.as_ls
)
990 ctx
->abi
.emit_outputs
= si_llvm_emit_ls_epilogue
;
991 else if (shader
->key
.as_es
)
992 ctx
->abi
.emit_outputs
= si_llvm_emit_es_epilogue
;
993 else if (shader
->key
.opt
.vs_as_prim_discard_cs
)
994 ctx
->abi
.emit_outputs
= si_llvm_emit_prim_discard_cs_epilogue
;
995 else if (ngg_cull_shader
)
996 ctx
->abi
.emit_outputs
= gfx10_emit_ngg_culling_epilogue
;
997 else if (shader
->key
.as_ngg
)
998 ctx
->abi
.emit_outputs
= gfx10_emit_ngg_epilogue
;
1000 ctx
->abi
.emit_outputs
= si_llvm_emit_vs_epilogue
;
1002 ctx
->abi
.load_base_vertex
= get_base_vertex
;