radeonsi: kill point size VS output if it's not used by the rasterizer
[mesa.git] / src / gallium / drivers / radeonsi / si_shader_llvm_vs.c
1 /*
2 * Copyright 2020 Advanced Micro Devices, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 #include "si_pipe.h"
26 #include "si_shader_internal.h"
27 #include "sid.h"
28 #include "util/u_memory.h"
29
30 static LLVMValueRef unpack_sint16(struct si_shader_context *ctx, LLVMValueRef i32, unsigned index)
31 {
32 assert(index <= 1);
33
34 if (index == 1)
35 return LLVMBuildAShr(ctx->ac.builder, i32, LLVMConstInt(ctx->ac.i32, 16, 0), "");
36
37 return LLVMBuildSExt(ctx->ac.builder, LLVMBuildTrunc(ctx->ac.builder, i32, ctx->ac.i16, ""),
38 ctx->ac.i32, "");
39 }
40
41 static void load_input_vs(struct si_shader_context *ctx, unsigned input_index, LLVMValueRef out[4])
42 {
43 const struct si_shader_info *info = &ctx->shader->selector->info;
44 unsigned vs_blit_property = info->base.vs.blit_sgprs_amd;
45
46 if (vs_blit_property) {
47 LLVMValueRef vertex_id = ctx->abi.vertex_id;
48 LLVMValueRef sel_x1 =
49 LLVMBuildICmp(ctx->ac.builder, LLVMIntULE, vertex_id, ctx->ac.i32_1, "");
50 /* Use LLVMIntNE, because we have 3 vertices and only
51 * the middle one should use y2.
52 */
53 LLVMValueRef sel_y1 = LLVMBuildICmp(ctx->ac.builder, LLVMIntNE, vertex_id, ctx->ac.i32_1, "");
54
55 unsigned param_vs_blit_inputs = ctx->vs_blit_inputs.arg_index;
56 if (input_index == 0) {
57 /* Position: */
58 LLVMValueRef x1y1 = LLVMGetParam(ctx->main_fn, param_vs_blit_inputs);
59 LLVMValueRef x2y2 = LLVMGetParam(ctx->main_fn, param_vs_blit_inputs + 1);
60
61 LLVMValueRef x1 = unpack_sint16(ctx, x1y1, 0);
62 LLVMValueRef y1 = unpack_sint16(ctx, x1y1, 1);
63 LLVMValueRef x2 = unpack_sint16(ctx, x2y2, 0);
64 LLVMValueRef y2 = unpack_sint16(ctx, x2y2, 1);
65
66 LLVMValueRef x = LLVMBuildSelect(ctx->ac.builder, sel_x1, x1, x2, "");
67 LLVMValueRef y = LLVMBuildSelect(ctx->ac.builder, sel_y1, y1, y2, "");
68
69 out[0] = LLVMBuildSIToFP(ctx->ac.builder, x, ctx->ac.f32, "");
70 out[1] = LLVMBuildSIToFP(ctx->ac.builder, y, ctx->ac.f32, "");
71 out[2] = LLVMGetParam(ctx->main_fn, param_vs_blit_inputs + 2);
72 out[3] = ctx->ac.f32_1;
73 return;
74 }
75
76 /* Color or texture coordinates: */
77 assert(input_index == 1);
78
79 if (vs_blit_property == SI_VS_BLIT_SGPRS_POS_COLOR) {
80 for (int i = 0; i < 4; i++) {
81 out[i] = LLVMGetParam(ctx->main_fn, param_vs_blit_inputs + 3 + i);
82 }
83 } else {
84 assert(vs_blit_property == SI_VS_BLIT_SGPRS_POS_TEXCOORD);
85 LLVMValueRef x1 = LLVMGetParam(ctx->main_fn, param_vs_blit_inputs + 3);
86 LLVMValueRef y1 = LLVMGetParam(ctx->main_fn, param_vs_blit_inputs + 4);
87 LLVMValueRef x2 = LLVMGetParam(ctx->main_fn, param_vs_blit_inputs + 5);
88 LLVMValueRef y2 = LLVMGetParam(ctx->main_fn, param_vs_blit_inputs + 6);
89
90 out[0] = LLVMBuildSelect(ctx->ac.builder, sel_x1, x1, x2, "");
91 out[1] = LLVMBuildSelect(ctx->ac.builder, sel_y1, y1, y2, "");
92 out[2] = LLVMGetParam(ctx->main_fn, param_vs_blit_inputs + 7);
93 out[3] = LLVMGetParam(ctx->main_fn, param_vs_blit_inputs + 8);
94 }
95 return;
96 }
97
98 unsigned num_vbos_in_user_sgprs = ctx->shader->selector->num_vbos_in_user_sgprs;
99 union si_vs_fix_fetch fix_fetch;
100 LLVMValueRef vb_desc;
101 LLVMValueRef vertex_index;
102 LLVMValueRef tmp;
103
104 if (input_index < num_vbos_in_user_sgprs) {
105 vb_desc = ac_get_arg(&ctx->ac, ctx->vb_descriptors[input_index]);
106 } else {
107 unsigned index = input_index - num_vbos_in_user_sgprs;
108 vb_desc = ac_build_load_to_sgpr(&ctx->ac, ac_get_arg(&ctx->ac, ctx->vertex_buffers),
109 LLVMConstInt(ctx->ac.i32, index, 0));
110 }
111
112 vertex_index = LLVMGetParam(ctx->main_fn, ctx->vertex_index0.arg_index + input_index);
113
114 /* Use the open-coded implementation for all loads of doubles and
115 * of dword-sized data that needs fixups. We need to insert conversion
116 * code anyway, and the amd/common code does it for us.
117 *
118 * Note: On LLVM <= 8, we can only open-code formats with
119 * channel size >= 4 bytes.
120 */
121 bool opencode = ctx->shader->key.mono.vs_fetch_opencode & (1 << input_index);
122 fix_fetch.bits = ctx->shader->key.mono.vs_fix_fetch[input_index].bits;
123 if (opencode || (fix_fetch.u.log_size == 3 && fix_fetch.u.format == AC_FETCH_FORMAT_FLOAT) ||
124 (fix_fetch.u.log_size == 2)) {
125 tmp = ac_build_opencoded_load_format(&ctx->ac, fix_fetch.u.log_size,
126 fix_fetch.u.num_channels_m1 + 1, fix_fetch.u.format,
127 fix_fetch.u.reverse, !opencode, vb_desc, vertex_index,
128 ctx->ac.i32_0, ctx->ac.i32_0, 0, true);
129 for (unsigned i = 0; i < 4; ++i)
130 out[i] =
131 LLVMBuildExtractElement(ctx->ac.builder, tmp, LLVMConstInt(ctx->ac.i32, i, false), "");
132 return;
133 }
134
135 unsigned required_channels = util_last_bit(info->input_usage_mask[input_index]);
136 if (required_channels == 0) {
137 for (unsigned i = 0; i < 4; ++i)
138 out[i] = LLVMGetUndef(ctx->ac.f32);
139 return;
140 }
141
142 /* Do multiple loads for special formats. */
143 LLVMValueRef fetches[4];
144 unsigned num_fetches;
145 unsigned fetch_stride;
146 unsigned channels_per_fetch;
147
148 if (fix_fetch.u.log_size <= 1 && fix_fetch.u.num_channels_m1 == 2) {
149 num_fetches = MIN2(required_channels, 3);
150 fetch_stride = 1 << fix_fetch.u.log_size;
151 channels_per_fetch = 1;
152 } else {
153 num_fetches = 1;
154 fetch_stride = 0;
155 channels_per_fetch = required_channels;
156 }
157
158 for (unsigned i = 0; i < num_fetches; ++i) {
159 LLVMValueRef voffset = LLVMConstInt(ctx->ac.i32, fetch_stride * i, 0);
160 fetches[i] = ac_build_buffer_load_format(&ctx->ac, vb_desc, vertex_index, voffset,
161 channels_per_fetch, 0, true, false);
162 }
163
164 if (num_fetches == 1 && channels_per_fetch > 1) {
165 LLVMValueRef fetch = fetches[0];
166 for (unsigned i = 0; i < channels_per_fetch; ++i) {
167 tmp = LLVMConstInt(ctx->ac.i32, i, false);
168 fetches[i] = LLVMBuildExtractElement(ctx->ac.builder, fetch, tmp, "");
169 }
170 num_fetches = channels_per_fetch;
171 channels_per_fetch = 1;
172 }
173
174 for (unsigned i = num_fetches; i < 4; ++i)
175 fetches[i] = LLVMGetUndef(ctx->ac.f32);
176
177 if (fix_fetch.u.log_size <= 1 && fix_fetch.u.num_channels_m1 == 2 && required_channels == 4) {
178 if (fix_fetch.u.format == AC_FETCH_FORMAT_UINT || fix_fetch.u.format == AC_FETCH_FORMAT_SINT)
179 fetches[3] = ctx->ac.i32_1;
180 else
181 fetches[3] = ctx->ac.f32_1;
182 } else if (fix_fetch.u.log_size == 3 &&
183 (fix_fetch.u.format == AC_FETCH_FORMAT_SNORM ||
184 fix_fetch.u.format == AC_FETCH_FORMAT_SSCALED ||
185 fix_fetch.u.format == AC_FETCH_FORMAT_SINT) &&
186 required_channels == 4) {
187 /* For 2_10_10_10, the hardware returns an unsigned value;
188 * convert it to a signed one.
189 */
190 LLVMValueRef tmp = fetches[3];
191 LLVMValueRef c30 = LLVMConstInt(ctx->ac.i32, 30, 0);
192
193 /* First, recover the sign-extended signed integer value. */
194 if (fix_fetch.u.format == AC_FETCH_FORMAT_SSCALED)
195 tmp = LLVMBuildFPToUI(ctx->ac.builder, tmp, ctx->ac.i32, "");
196 else
197 tmp = ac_to_integer(&ctx->ac, tmp);
198
199 /* For the integer-like cases, do a natural sign extension.
200 *
201 * For the SNORM case, the values are 0.0, 0.333, 0.666, 1.0
202 * and happen to contain 0, 1, 2, 3 as the two LSBs of the
203 * exponent.
204 */
205 tmp = LLVMBuildShl(
206 ctx->ac.builder, tmp,
207 fix_fetch.u.format == AC_FETCH_FORMAT_SNORM ? LLVMConstInt(ctx->ac.i32, 7, 0) : c30, "");
208 tmp = LLVMBuildAShr(ctx->ac.builder, tmp, c30, "");
209
210 /* Convert back to the right type. */
211 if (fix_fetch.u.format == AC_FETCH_FORMAT_SNORM) {
212 LLVMValueRef clamp;
213 LLVMValueRef neg_one = LLVMConstReal(ctx->ac.f32, -1.0);
214 tmp = LLVMBuildSIToFP(ctx->ac.builder, tmp, ctx->ac.f32, "");
215 clamp = LLVMBuildFCmp(ctx->ac.builder, LLVMRealULT, tmp, neg_one, "");
216 tmp = LLVMBuildSelect(ctx->ac.builder, clamp, neg_one, tmp, "");
217 } else if (fix_fetch.u.format == AC_FETCH_FORMAT_SSCALED) {
218 tmp = LLVMBuildSIToFP(ctx->ac.builder, tmp, ctx->ac.f32, "");
219 }
220
221 fetches[3] = tmp;
222 }
223
224 for (unsigned i = 0; i < 4; ++i)
225 out[i] = ac_to_float(&ctx->ac, fetches[i]);
226 }
227
228 void si_llvm_load_vs_inputs(struct si_shader_context *ctx, struct nir_shader *nir)
229 {
230 const struct si_shader_info *info = &ctx->shader->selector->info;
231
232 for (unsigned i = 0; i < info->num_inputs; i++) {
233 LLVMValueRef values[4];
234
235 load_input_vs(ctx, i, values);
236
237 for (unsigned chan = 0; chan < 4; chan++) {
238 ctx->inputs[i * 4 + chan] =
239 LLVMBuildBitCast(ctx->ac.builder, values[chan], ctx->ac.i32, "");
240 }
241 }
242 }
243
244 void si_llvm_streamout_store_output(struct si_shader_context *ctx, LLVMValueRef const *so_buffers,
245 LLVMValueRef const *so_write_offsets,
246 struct pipe_stream_output *stream_out,
247 struct si_shader_output_values *shader_out)
248 {
249 unsigned buf_idx = stream_out->output_buffer;
250 unsigned start = stream_out->start_component;
251 unsigned num_comps = stream_out->num_components;
252 LLVMValueRef out[4];
253
254 assert(num_comps && num_comps <= 4);
255 if (!num_comps || num_comps > 4)
256 return;
257
258 /* Load the output as int. */
259 for (int j = 0; j < num_comps; j++) {
260 assert(stream_out->stream == shader_out->vertex_stream[start + j]);
261
262 out[j] = ac_to_integer(&ctx->ac, shader_out->values[start + j]);
263 }
264
265 /* Pack the output. */
266 LLVMValueRef vdata = NULL;
267
268 switch (num_comps) {
269 case 1: /* as i32 */
270 vdata = out[0];
271 break;
272 case 2: /* as v2i32 */
273 case 3: /* as v3i32 */
274 if (ac_has_vec3_support(ctx->screen->info.chip_class, false)) {
275 vdata = ac_build_gather_values(&ctx->ac, out, num_comps);
276 break;
277 }
278 /* as v4i32 (aligned to 4) */
279 out[3] = LLVMGetUndef(ctx->ac.i32);
280 /* fall through */
281 case 4: /* as v4i32 */
282 vdata = ac_build_gather_values(&ctx->ac, out, util_next_power_of_two(num_comps));
283 break;
284 }
285
286 ac_build_buffer_store_dword(&ctx->ac, so_buffers[buf_idx], vdata, num_comps,
287 so_write_offsets[buf_idx], ctx->ac.i32_0, stream_out->dst_offset * 4,
288 ac_glc | ac_slc);
289 }
290
291 /**
292 * Write streamout data to buffers for vertex stream @p stream (different
293 * vertex streams can occur for GS copy shaders).
294 */
295 void si_llvm_emit_streamout(struct si_shader_context *ctx, struct si_shader_output_values *outputs,
296 unsigned noutput, unsigned stream)
297 {
298 struct si_shader_selector *sel = ctx->shader->selector;
299 struct pipe_stream_output_info *so = &sel->so;
300 LLVMBuilderRef builder = ctx->ac.builder;
301 int i;
302
303 /* Get bits [22:16], i.e. (so_param >> 16) & 127; */
304 LLVMValueRef so_vtx_count = si_unpack_param(ctx, ctx->streamout_config, 16, 7);
305
306 LLVMValueRef tid = ac_get_thread_id(&ctx->ac);
307
308 /* can_emit = tid < so_vtx_count; */
309 LLVMValueRef can_emit = LLVMBuildICmp(builder, LLVMIntULT, tid, so_vtx_count, "");
310
311 /* Emit the streamout code conditionally. This actually avoids
312 * out-of-bounds buffer access. The hw tells us via the SGPR
313 * (so_vtx_count) which threads are allowed to emit streamout data. */
314 ac_build_ifcc(&ctx->ac, can_emit, 6501);
315 {
316 /* The buffer offset is computed as follows:
317 * ByteOffset = streamout_offset[buffer_id]*4 +
318 * (streamout_write_index + thread_id)*stride[buffer_id] +
319 * attrib_offset
320 */
321
322 LLVMValueRef so_write_index = ac_get_arg(&ctx->ac, ctx->streamout_write_index);
323
324 /* Compute (streamout_write_index + thread_id). */
325 so_write_index = LLVMBuildAdd(builder, so_write_index, tid, "");
326
327 /* Load the descriptor and compute the write offset for each
328 * enabled buffer. */
329 LLVMValueRef so_write_offset[4] = {};
330 LLVMValueRef so_buffers[4];
331 LLVMValueRef buf_ptr = ac_get_arg(&ctx->ac, ctx->rw_buffers);
332
333 for (i = 0; i < 4; i++) {
334 if (!so->stride[i])
335 continue;
336
337 LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, SI_VS_STREAMOUT_BUF0 + i, 0);
338
339 so_buffers[i] = ac_build_load_to_sgpr(&ctx->ac, buf_ptr, offset);
340
341 LLVMValueRef so_offset = ac_get_arg(&ctx->ac, ctx->streamout_offset[i]);
342 so_offset = LLVMBuildMul(builder, so_offset, LLVMConstInt(ctx->ac.i32, 4, 0), "");
343
344 so_write_offset[i] = ac_build_imad(
345 &ctx->ac, so_write_index, LLVMConstInt(ctx->ac.i32, so->stride[i] * 4, 0), so_offset);
346 }
347
348 /* Write streamout data. */
349 for (i = 0; i < so->num_outputs; i++) {
350 unsigned reg = so->output[i].register_index;
351
352 if (reg >= noutput)
353 continue;
354
355 if (stream != so->output[i].stream)
356 continue;
357
358 si_llvm_streamout_store_output(ctx, so_buffers, so_write_offset, &so->output[i],
359 &outputs[reg]);
360 }
361 }
362 ac_build_endif(&ctx->ac, 6501);
363 }
364
365 static void si_llvm_emit_clipvertex(struct si_shader_context *ctx, struct ac_export_args *pos,
366 LLVMValueRef *out_elts)
367 {
368 unsigned reg_index;
369 unsigned chan;
370 unsigned const_chan;
371 LLVMValueRef base_elt;
372 LLVMValueRef ptr = ac_get_arg(&ctx->ac, ctx->rw_buffers);
373 LLVMValueRef constbuf_index = LLVMConstInt(ctx->ac.i32, SI_VS_CONST_CLIP_PLANES, 0);
374 LLVMValueRef const_resource = ac_build_load_to_sgpr(&ctx->ac, ptr, constbuf_index);
375
376 for (reg_index = 0; reg_index < 2; reg_index++) {
377 struct ac_export_args *args = &pos[2 + reg_index];
378
379 args->out[0] = args->out[1] = args->out[2] = args->out[3] = LLVMConstReal(ctx->ac.f32, 0.0f);
380
381 /* Compute dot products of position and user clip plane vectors */
382 for (chan = 0; chan < 4; chan++) {
383 for (const_chan = 0; const_chan < 4; const_chan++) {
384 LLVMValueRef addr =
385 LLVMConstInt(ctx->ac.i32, ((reg_index * 4 + chan) * 4 + const_chan) * 4, 0);
386 base_elt = si_buffer_load_const(ctx, const_resource, addr);
387 args->out[chan] =
388 ac_build_fmad(&ctx->ac, base_elt, out_elts[const_chan], args->out[chan]);
389 }
390 }
391
392 args->enabled_channels = 0xf;
393 args->valid_mask = 0;
394 args->done = 0;
395 args->target = V_008DFC_SQ_EXP_POS + 2 + reg_index;
396 args->compr = 0;
397 }
398 }
399
400 /* Initialize arguments for the shader export intrinsic */
401 static void si_llvm_init_vs_export_args(struct si_shader_context *ctx, LLVMValueRef *values,
402 unsigned target, struct ac_export_args *args)
403 {
404 args->enabled_channels = 0xf; /* writemask - default is 0xf */
405 args->valid_mask = 0; /* Specify whether the EXEC mask represents the valid mask */
406 args->done = 0; /* Specify whether this is the last export */
407 args->target = target; /* Specify the target we are exporting */
408 args->compr = false;
409
410 memcpy(&args->out[0], values, sizeof(values[0]) * 4);
411 }
412
413 static void si_export_param(struct si_shader_context *ctx, unsigned index, LLVMValueRef *values)
414 {
415 struct ac_export_args args;
416
417 si_llvm_init_vs_export_args(ctx, values, V_008DFC_SQ_EXP_PARAM + index, &args);
418 ac_build_export(&ctx->ac, &args);
419 }
420
421 static void si_build_param_exports(struct si_shader_context *ctx,
422 struct si_shader_output_values *outputs, unsigned noutput)
423 {
424 struct si_shader *shader = ctx->shader;
425 unsigned param_count = 0;
426
427 for (unsigned i = 0; i < noutput; i++) {
428 unsigned semantic = outputs[i].semantic;
429
430 if (outputs[i].vertex_stream[0] != 0 && outputs[i].vertex_stream[1] != 0 &&
431 outputs[i].vertex_stream[2] != 0 && outputs[i].vertex_stream[3] != 0)
432 continue;
433
434 switch (semantic) {
435 case VARYING_SLOT_LAYER:
436 case VARYING_SLOT_VIEWPORT:
437 case VARYING_SLOT_CLIP_DIST0:
438 case VARYING_SLOT_CLIP_DIST1:
439 case VARYING_SLOT_COL0:
440 case VARYING_SLOT_COL1:
441 case VARYING_SLOT_BFC0:
442 case VARYING_SLOT_BFC1:
443 case VARYING_SLOT_PRIMITIVE_ID:
444 case VARYING_SLOT_FOGC:
445 break;
446 default:
447 if ((semantic >= VARYING_SLOT_TEX0 && semantic <= VARYING_SLOT_TEX7) ||
448 semantic >= VARYING_SLOT_VAR0)
449 break;
450 else
451 continue;
452 }
453
454 if (semantic < VARYING_SLOT_VAR0 + SI_MAX_IO_GENERIC &&
455 shader->key.opt.kill_outputs &
456 (1ull << si_shader_io_get_unique_index(semantic, true)))
457 continue;
458
459 si_export_param(ctx, param_count, outputs[i].values);
460
461 assert(i < ARRAY_SIZE(shader->info.vs_output_param_offset));
462 shader->info.vs_output_param_offset[i] = param_count++;
463 }
464
465 shader->info.nr_param_exports = param_count;
466 }
467
468 /**
469 * Vertex color clamping.
470 *
471 * This uses a state constant loaded in a user data SGPR and
472 * an IF statement is added that clamps all colors if the constant
473 * is true.
474 */
475 static void si_vertex_color_clamping(struct si_shader_context *ctx,
476 struct si_shader_output_values *outputs, unsigned noutput)
477 {
478 LLVMValueRef addr[SI_MAX_VS_OUTPUTS][4];
479 bool has_colors = false;
480
481 /* Store original colors to alloca variables. */
482 for (unsigned i = 0; i < noutput; i++) {
483 if (outputs[i].semantic != VARYING_SLOT_COL0 &&
484 outputs[i].semantic != VARYING_SLOT_COL1 &&
485 outputs[i].semantic != VARYING_SLOT_BFC0 &&
486 outputs[i].semantic != VARYING_SLOT_BFC1)
487 continue;
488
489 for (unsigned j = 0; j < 4; j++) {
490 addr[i][j] = ac_build_alloca_undef(&ctx->ac, ctx->ac.f32, "");
491 LLVMBuildStore(ctx->ac.builder, outputs[i].values[j], addr[i][j]);
492 }
493 has_colors = true;
494 }
495
496 if (!has_colors)
497 return;
498
499 /* The state is in the first bit of the user SGPR. */
500 LLVMValueRef cond = ac_get_arg(&ctx->ac, ctx->vs_state_bits);
501 cond = LLVMBuildTrunc(ctx->ac.builder, cond, ctx->ac.i1, "");
502
503 ac_build_ifcc(&ctx->ac, cond, 6502);
504
505 /* Store clamped colors to alloca variables within the conditional block. */
506 for (unsigned i = 0; i < noutput; i++) {
507 if (outputs[i].semantic != VARYING_SLOT_COL0 &&
508 outputs[i].semantic != VARYING_SLOT_COL1 &&
509 outputs[i].semantic != VARYING_SLOT_BFC0 &&
510 outputs[i].semantic != VARYING_SLOT_BFC1)
511 continue;
512
513 for (unsigned j = 0; j < 4; j++) {
514 LLVMBuildStore(ctx->ac.builder, ac_build_clamp(&ctx->ac, outputs[i].values[j]),
515 addr[i][j]);
516 }
517 }
518 ac_build_endif(&ctx->ac, 6502);
519
520 /* Load clamped colors */
521 for (unsigned i = 0; i < noutput; i++) {
522 if (outputs[i].semantic != VARYING_SLOT_COL0 &&
523 outputs[i].semantic != VARYING_SLOT_COL1 &&
524 outputs[i].semantic != VARYING_SLOT_BFC0 &&
525 outputs[i].semantic != VARYING_SLOT_BFC1)
526 continue;
527
528 for (unsigned j = 0; j < 4; j++) {
529 outputs[i].values[j] = LLVMBuildLoad(ctx->ac.builder, addr[i][j], "");
530 }
531 }
532 }
533
534 /* Generate export instructions for hardware VS shader stage or NGG GS stage
535 * (position and parameter data only).
536 */
537 void si_llvm_build_vs_exports(struct si_shader_context *ctx,
538 struct si_shader_output_values *outputs, unsigned noutput)
539 {
540 struct si_shader *shader = ctx->shader;
541 struct ac_export_args pos_args[4] = {};
542 LLVMValueRef psize_value = NULL, edgeflag_value = NULL, layer_value = NULL,
543 viewport_index_value = NULL;
544 unsigned pos_idx;
545 int i;
546
547 si_vertex_color_clamping(ctx, outputs, noutput);
548
549 /* Build position exports. */
550 for (i = 0; i < noutput; i++) {
551 switch (outputs[i].semantic) {
552 case VARYING_SLOT_POS:
553 si_llvm_init_vs_export_args(ctx, outputs[i].values, V_008DFC_SQ_EXP_POS, &pos_args[0]);
554 break;
555 case VARYING_SLOT_PSIZ:
556 psize_value = outputs[i].values[0];
557 break;
558 case VARYING_SLOT_LAYER:
559 layer_value = outputs[i].values[0];
560 break;
561 case VARYING_SLOT_VIEWPORT:
562 viewport_index_value = outputs[i].values[0];
563 break;
564 case VARYING_SLOT_EDGE:
565 edgeflag_value = outputs[i].values[0];
566 break;
567 case VARYING_SLOT_CLIP_DIST0:
568 case VARYING_SLOT_CLIP_DIST1:
569 if (!shader->key.opt.clip_disable) {
570 unsigned index = 2 + (outputs[i].semantic - VARYING_SLOT_CLIP_DIST0);
571 si_llvm_init_vs_export_args(ctx, outputs[i].values, V_008DFC_SQ_EXP_POS + index,
572 &pos_args[index]);
573 }
574 break;
575 case VARYING_SLOT_CLIP_VERTEX:
576 if (!shader->key.opt.clip_disable) {
577 si_llvm_emit_clipvertex(ctx, pos_args, outputs[i].values);
578 }
579 break;
580 }
581 }
582
583 /* We need to add the position output manually if it's missing. */
584 if (!pos_args[0].out[0]) {
585 pos_args[0].enabled_channels = 0xf; /* writemask */
586 pos_args[0].valid_mask = 0; /* EXEC mask */
587 pos_args[0].done = 0; /* last export? */
588 pos_args[0].target = V_008DFC_SQ_EXP_POS;
589 pos_args[0].compr = 0; /* COMPR flag */
590 pos_args[0].out[0] = ctx->ac.f32_0; /* X */
591 pos_args[0].out[1] = ctx->ac.f32_0; /* Y */
592 pos_args[0].out[2] = ctx->ac.f32_0; /* Z */
593 pos_args[0].out[3] = ctx->ac.f32_1; /* W */
594 }
595
596 bool writes_psize = shader->selector->info.writes_psize && !shader->key.opt.kill_pointsize;
597 bool pos_writes_edgeflag = shader->selector->info.writes_edgeflag && !shader->key.as_ngg;
598
599 /* Write the misc vector (point size, edgeflag, layer, viewport). */
600 if (writes_psize || pos_writes_edgeflag ||
601 shader->selector->info.writes_viewport_index || shader->selector->info.writes_layer) {
602 pos_args[1].enabled_channels = writes_psize |
603 (pos_writes_edgeflag << 1) |
604 (shader->selector->info.writes_layer << 2);
605
606 pos_args[1].valid_mask = 0; /* EXEC mask */
607 pos_args[1].done = 0; /* last export? */
608 pos_args[1].target = V_008DFC_SQ_EXP_POS + 1;
609 pos_args[1].compr = 0; /* COMPR flag */
610 pos_args[1].out[0] = ctx->ac.f32_0; /* X */
611 pos_args[1].out[1] = ctx->ac.f32_0; /* Y */
612 pos_args[1].out[2] = ctx->ac.f32_0; /* Z */
613 pos_args[1].out[3] = ctx->ac.f32_0; /* W */
614
615 if (writes_psize)
616 pos_args[1].out[0] = psize_value;
617
618 if (pos_writes_edgeflag) {
619 /* The output is a float, but the hw expects an integer
620 * with the first bit containing the edge flag. */
621 edgeflag_value = LLVMBuildFPToUI(ctx->ac.builder, edgeflag_value, ctx->ac.i32, "");
622 edgeflag_value = ac_build_umin(&ctx->ac, edgeflag_value, ctx->ac.i32_1);
623
624 /* The LLVM intrinsic expects a float. */
625 pos_args[1].out[1] = ac_to_float(&ctx->ac, edgeflag_value);
626 }
627
628 if (ctx->screen->info.chip_class >= GFX9) {
629 /* GFX9 has the layer in out.z[10:0] and the viewport
630 * index in out.z[19:16].
631 */
632 if (shader->selector->info.writes_layer)
633 pos_args[1].out[2] = layer_value;
634
635 if (shader->selector->info.writes_viewport_index) {
636 LLVMValueRef v = viewport_index_value;
637
638 v = ac_to_integer(&ctx->ac, v);
639 v = LLVMBuildShl(ctx->ac.builder, v, LLVMConstInt(ctx->ac.i32, 16, 0), "");
640 v = LLVMBuildOr(ctx->ac.builder, v, ac_to_integer(&ctx->ac, pos_args[1].out[2]), "");
641 pos_args[1].out[2] = ac_to_float(&ctx->ac, v);
642 pos_args[1].enabled_channels |= 1 << 2;
643 }
644 } else {
645 if (shader->selector->info.writes_layer)
646 pos_args[1].out[2] = layer_value;
647
648 if (shader->selector->info.writes_viewport_index) {
649 pos_args[1].out[3] = viewport_index_value;
650 pos_args[1].enabled_channels |= 1 << 3;
651 }
652 }
653 }
654
655 for (i = 0; i < 4; i++)
656 if (pos_args[i].out[0])
657 shader->info.nr_pos_exports++;
658
659 /* GFX10 (Navi1x) skip POS0 exports if EXEC=0 and DONE=0, causing a hang.
660 * Setting valid_mask=1 prevents it and has no other effect.
661 */
662 if (ctx->screen->info.chip_class == GFX10)
663 pos_args[0].valid_mask = 1;
664
665 pos_idx = 0;
666 for (i = 0; i < 4; i++) {
667 if (!pos_args[i].out[0])
668 continue;
669
670 /* Specify the target we are exporting */
671 pos_args[i].target = V_008DFC_SQ_EXP_POS + pos_idx++;
672
673 if (pos_idx == shader->info.nr_pos_exports)
674 /* Specify that this is the last export */
675 pos_args[i].done = 1;
676
677 ac_build_export(&ctx->ac, &pos_args[i]);
678 }
679
680 /* Build parameter exports. */
681 si_build_param_exports(ctx, outputs, noutput);
682 }
683
684 void si_llvm_emit_vs_epilogue(struct ac_shader_abi *abi, unsigned max_outputs, LLVMValueRef *addrs)
685 {
686 struct si_shader_context *ctx = si_shader_context_from_abi(abi);
687 struct si_shader_info *info = &ctx->shader->selector->info;
688 struct si_shader_output_values *outputs = NULL;
689 int i, j;
690
691 assert(!ctx->shader->is_gs_copy_shader);
692 assert(info->num_outputs <= max_outputs);
693
694 outputs = MALLOC((info->num_outputs + 1) * sizeof(outputs[0]));
695
696 for (i = 0; i < info->num_outputs; i++) {
697 outputs[i].semantic = info->output_semantic[i];
698
699 for (j = 0; j < 4; j++) {
700 outputs[i].values[j] = LLVMBuildLoad(ctx->ac.builder, addrs[4 * i + j], "");
701 outputs[i].vertex_stream[j] = (info->output_streams[i] >> (2 * j)) & 3;
702 }
703 }
704
705 if (!ctx->screen->use_ngg_streamout && ctx->shader->selector->so.num_outputs)
706 si_llvm_emit_streamout(ctx, outputs, i, 0);
707
708 /* Export PrimitiveID. */
709 if (ctx->shader->key.mono.u.vs_export_prim_id) {
710 outputs[i].semantic = VARYING_SLOT_PRIMITIVE_ID;
711 outputs[i].values[0] = ac_to_float(&ctx->ac, si_get_primitive_id(ctx, 0));
712 for (j = 1; j < 4; j++)
713 outputs[i].values[j] = LLVMConstReal(ctx->ac.f32, 0);
714
715 memset(outputs[i].vertex_stream, 0, sizeof(outputs[i].vertex_stream));
716 i++;
717 }
718
719 si_llvm_build_vs_exports(ctx, outputs, i);
720 FREE(outputs);
721 }
722
723 static void si_llvm_emit_prim_discard_cs_epilogue(struct ac_shader_abi *abi, unsigned max_outputs,
724 LLVMValueRef *addrs)
725 {
726 struct si_shader_context *ctx = si_shader_context_from_abi(abi);
727 struct si_shader_info *info = &ctx->shader->selector->info;
728 LLVMValueRef pos[4] = {};
729
730 assert(info->num_outputs <= max_outputs);
731
732 for (unsigned i = 0; i < info->num_outputs; i++) {
733 if (info->output_semantic[i] != VARYING_SLOT_POS)
734 continue;
735
736 for (unsigned chan = 0; chan < 4; chan++)
737 pos[chan] = LLVMBuildLoad(ctx->ac.builder, addrs[4 * i + chan], "");
738 break;
739 }
740 assert(pos[0] != NULL);
741
742 /* Return the position output. */
743 LLVMValueRef ret = ctx->return_value;
744 for (unsigned chan = 0; chan < 4; chan++)
745 ret = LLVMBuildInsertValue(ctx->ac.builder, ret, pos[chan], chan, "");
746 ctx->return_value = ret;
747 }
748
749 /**
750 * Build the vertex shader prolog function.
751 *
752 * The inputs are the same as VS (a lot of SGPRs and 4 VGPR system values).
753 * All inputs are returned unmodified. The vertex load indices are
754 * stored after them, which will be used by the API VS for fetching inputs.
755 *
756 * For example, the expected outputs for instance_divisors[] = {0, 1, 2} are:
757 * input_v0,
758 * input_v1,
759 * input_v2,
760 * input_v3,
761 * (VertexID + BaseVertex),
762 * (InstanceID + StartInstance),
763 * (InstanceID / 2 + StartInstance)
764 */
765 void si_llvm_build_vs_prolog(struct si_shader_context *ctx, union si_shader_part_key *key)
766 {
767 LLVMTypeRef *returns;
768 LLVMValueRef ret, func;
769 int num_returns, i;
770 unsigned first_vs_vgpr = key->vs_prolog.num_merged_next_stage_vgprs;
771 unsigned num_input_vgprs =
772 key->vs_prolog.num_merged_next_stage_vgprs + 4 + (key->vs_prolog.has_ngg_cull_inputs ? 1 : 0);
773 struct ac_arg input_sgpr_param[key->vs_prolog.num_input_sgprs];
774 struct ac_arg input_vgpr_param[10];
775 LLVMValueRef input_vgprs[10];
776 unsigned num_all_input_regs = key->vs_prolog.num_input_sgprs + num_input_vgprs;
777 unsigned user_sgpr_base = key->vs_prolog.num_merged_next_stage_vgprs ? 8 : 0;
778
779 memset(&ctx->args, 0, sizeof(ctx->args));
780
781 /* 4 preloaded VGPRs + vertex load indices as prolog outputs */
782 returns = alloca((num_all_input_regs + key->vs_prolog.num_inputs) * sizeof(LLVMTypeRef));
783 num_returns = 0;
784
785 /* Declare input and output SGPRs. */
786 for (i = 0; i < key->vs_prolog.num_input_sgprs; i++) {
787 ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &input_sgpr_param[i]);
788 returns[num_returns++] = ctx->ac.i32;
789 }
790
791 struct ac_arg merged_wave_info = input_sgpr_param[3];
792
793 /* Preloaded VGPRs (outputs must be floats) */
794 for (i = 0; i < num_input_vgprs; i++) {
795 ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &input_vgpr_param[i]);
796 returns[num_returns++] = ctx->ac.f32;
797 }
798
799 /* Vertex load indices. */
800 for (i = 0; i < key->vs_prolog.num_inputs; i++)
801 returns[num_returns++] = ctx->ac.f32;
802
803 /* Create the function. */
804 si_llvm_create_func(ctx, "vs_prolog", returns, num_returns, 0);
805 func = ctx->main_fn;
806
807 for (i = 0; i < num_input_vgprs; i++) {
808 input_vgprs[i] = ac_get_arg(&ctx->ac, input_vgpr_param[i]);
809 }
810
811 if (key->vs_prolog.num_merged_next_stage_vgprs) {
812 if (!key->vs_prolog.is_monolithic)
813 si_init_exec_from_input(ctx, merged_wave_info, 0);
814
815 if (key->vs_prolog.as_ls && ctx->screen->info.has_ls_vgpr_init_bug) {
816 /* If there are no HS threads, SPI loads the LS VGPRs
817 * starting at VGPR 0. Shift them back to where they
818 * belong.
819 */
820 LLVMValueRef has_hs_threads =
821 LLVMBuildICmp(ctx->ac.builder, LLVMIntNE,
822 si_unpack_param(ctx, input_sgpr_param[3], 8, 8), ctx->ac.i32_0, "");
823
824 for (i = 4; i > 0; --i) {
825 input_vgprs[i + 1] = LLVMBuildSelect(ctx->ac.builder, has_hs_threads,
826 input_vgprs[i + 1], input_vgprs[i - 1], "");
827 }
828 }
829 }
830
831 if (key->vs_prolog.gs_fast_launch_tri_list || key->vs_prolog.gs_fast_launch_tri_strip) {
832 LLVMValueRef wave_id, thread_id_in_tg;
833
834 wave_id = si_unpack_param(ctx, input_sgpr_param[3], 24, 4);
835 thread_id_in_tg =
836 ac_build_imad(&ctx->ac, wave_id, LLVMConstInt(ctx->ac.i32, ctx->ac.wave_size, false),
837 ac_get_thread_id(&ctx->ac));
838
839 /* The GS fast launch initializes all VGPRs to the value of
840 * the first thread, so we have to add the thread ID.
841 *
842 * Only these are initialized by the hw:
843 * VGPR2: Base Primitive ID
844 * VGPR5: Base Vertex ID
845 * VGPR6: Instance ID
846 */
847
848 /* Put the vertex thread IDs into VGPRs as-is instead of packing them.
849 * The NGG cull shader will read them from there.
850 */
851 if (key->vs_prolog.gs_fast_launch_tri_list) {
852 input_vgprs[0] = ac_build_imad(&ctx->ac, thread_id_in_tg, /* gs_vtx01_offset */
853 LLVMConstInt(ctx->ac.i32, 3, 0), /* Vertex 0 */
854 LLVMConstInt(ctx->ac.i32, 0, 0));
855 input_vgprs[1] = ac_build_imad(&ctx->ac, thread_id_in_tg, /* gs_vtx23_offset */
856 LLVMConstInt(ctx->ac.i32, 3, 0), /* Vertex 1 */
857 LLVMConstInt(ctx->ac.i32, 1, 0));
858 input_vgprs[4] = ac_build_imad(&ctx->ac, thread_id_in_tg, /* gs_vtx45_offset */
859 LLVMConstInt(ctx->ac.i32, 3, 0), /* Vertex 2 */
860 LLVMConstInt(ctx->ac.i32, 2, 0));
861 } else {
862 assert(key->vs_prolog.gs_fast_launch_tri_strip);
863 LLVMBuilderRef builder = ctx->ac.builder;
864 /* Triangle indices: */
865 LLVMValueRef index[3] = {
866 thread_id_in_tg,
867 LLVMBuildAdd(builder, thread_id_in_tg, LLVMConstInt(ctx->ac.i32, 1, 0), ""),
868 LLVMBuildAdd(builder, thread_id_in_tg, LLVMConstInt(ctx->ac.i32, 2, 0), ""),
869 };
870 LLVMValueRef is_odd = LLVMBuildTrunc(ctx->ac.builder, thread_id_in_tg, ctx->ac.i1, "");
871 LLVMValueRef flatshade_first = LLVMBuildICmp(
872 builder, LLVMIntEQ, si_unpack_param(ctx, ctx->vs_state_bits, 4, 2), ctx->ac.i32_0, "");
873
874 ac_build_triangle_strip_indices_to_triangle(&ctx->ac, is_odd, flatshade_first, index);
875 input_vgprs[0] = index[0];
876 input_vgprs[1] = index[1];
877 input_vgprs[4] = index[2];
878 }
879
880 /* Triangles always have all edge flags set initially. */
881 input_vgprs[3] = LLVMConstInt(ctx->ac.i32, 0x7 << 8, 0);
882
883 input_vgprs[2] =
884 LLVMBuildAdd(ctx->ac.builder, input_vgprs[2], thread_id_in_tg, ""); /* PrimID */
885 input_vgprs[5] =
886 LLVMBuildAdd(ctx->ac.builder, input_vgprs[5], thread_id_in_tg, ""); /* VertexID */
887 input_vgprs[8] = input_vgprs[6]; /* InstanceID */
888 }
889
890 unsigned vertex_id_vgpr = first_vs_vgpr;
891 unsigned instance_id_vgpr = ctx->screen->info.chip_class >= GFX10
892 ? first_vs_vgpr + 3
893 : first_vs_vgpr + (key->vs_prolog.as_ls ? 2 : 1);
894
895 ctx->abi.vertex_id = input_vgprs[vertex_id_vgpr];
896 ctx->abi.instance_id = input_vgprs[instance_id_vgpr];
897
898 /* InstanceID = VertexID >> 16;
899 * VertexID = VertexID & 0xffff;
900 */
901 if (key->vs_prolog.states.unpack_instance_id_from_vertex_id) {
902 ctx->abi.instance_id =
903 LLVMBuildLShr(ctx->ac.builder, ctx->abi.vertex_id, LLVMConstInt(ctx->ac.i32, 16, 0), "");
904 ctx->abi.vertex_id = LLVMBuildAnd(ctx->ac.builder, ctx->abi.vertex_id,
905 LLVMConstInt(ctx->ac.i32, 0xffff, 0), "");
906 }
907
908 /* Copy inputs to outputs. This should be no-op, as the registers match,
909 * but it will prevent the compiler from overwriting them unintentionally.
910 */
911 ret = ctx->return_value;
912 for (i = 0; i < key->vs_prolog.num_input_sgprs; i++) {
913 LLVMValueRef p = LLVMGetParam(func, i);
914 ret = LLVMBuildInsertValue(ctx->ac.builder, ret, p, i, "");
915 }
916 for (i = 0; i < num_input_vgprs; i++) {
917 LLVMValueRef p = input_vgprs[i];
918
919 if (i == vertex_id_vgpr)
920 p = ctx->abi.vertex_id;
921 else if (i == instance_id_vgpr)
922 p = ctx->abi.instance_id;
923
924 p = ac_to_float(&ctx->ac, p);
925 ret = LLVMBuildInsertValue(ctx->ac.builder, ret, p, key->vs_prolog.num_input_sgprs + i, "");
926 }
927
928 /* Compute vertex load indices from instance divisors. */
929 LLVMValueRef instance_divisor_constbuf = NULL;
930
931 if (key->vs_prolog.states.instance_divisor_is_fetched) {
932 LLVMValueRef list = si_prolog_get_rw_buffers(ctx);
933 LLVMValueRef buf_index = LLVMConstInt(ctx->ac.i32, SI_VS_CONST_INSTANCE_DIVISORS, 0);
934 instance_divisor_constbuf = ac_build_load_to_sgpr(&ctx->ac, list, buf_index);
935 }
936
937 for (i = 0; i < key->vs_prolog.num_inputs; i++) {
938 bool divisor_is_one = key->vs_prolog.states.instance_divisor_is_one & (1u << i);
939 bool divisor_is_fetched = key->vs_prolog.states.instance_divisor_is_fetched & (1u << i);
940 LLVMValueRef index = NULL;
941
942 if (divisor_is_one) {
943 index = ctx->abi.instance_id;
944 } else if (divisor_is_fetched) {
945 LLVMValueRef udiv_factors[4];
946
947 for (unsigned j = 0; j < 4; j++) {
948 udiv_factors[j] = si_buffer_load_const(ctx, instance_divisor_constbuf,
949 LLVMConstInt(ctx->ac.i32, i * 16 + j * 4, 0));
950 udiv_factors[j] = ac_to_integer(&ctx->ac, udiv_factors[j]);
951 }
952 /* The faster NUW version doesn't work when InstanceID == UINT_MAX.
953 * Such InstanceID might not be achievable in a reasonable time though.
954 */
955 index = ac_build_fast_udiv_nuw(&ctx->ac, ctx->abi.instance_id, udiv_factors[0],
956 udiv_factors[1], udiv_factors[2], udiv_factors[3]);
957 }
958
959 if (divisor_is_one || divisor_is_fetched) {
960 /* Add StartInstance. */
961 index =
962 LLVMBuildAdd(ctx->ac.builder, index,
963 LLVMGetParam(ctx->main_fn, user_sgpr_base + SI_SGPR_START_INSTANCE), "");
964 } else {
965 /* VertexID + BaseVertex */
966 index = LLVMBuildAdd(ctx->ac.builder, ctx->abi.vertex_id,
967 LLVMGetParam(func, user_sgpr_base + SI_SGPR_BASE_VERTEX), "");
968 }
969
970 index = ac_to_float(&ctx->ac, index);
971 ret = LLVMBuildInsertValue(ctx->ac.builder, ret, index, ctx->args.arg_count + i, "");
972 }
973
974 si_llvm_build_ret(ctx, ret);
975 }
976
977 static LLVMValueRef get_base_vertex(struct ac_shader_abi *abi)
978 {
979 struct si_shader_context *ctx = si_shader_context_from_abi(abi);
980
981 /* For non-indexed draws, the base vertex set by the driver
982 * (for direct draws) or the CP (for indirect draws) is the
983 * first vertex ID, but GLSL expects 0 to be returned.
984 */
985 LLVMValueRef vs_state = ac_get_arg(&ctx->ac, ctx->vs_state_bits);
986 LLVMValueRef indexed;
987
988 indexed = LLVMBuildLShr(ctx->ac.builder, vs_state, ctx->ac.i32_1, "");
989 indexed = LLVMBuildTrunc(ctx->ac.builder, indexed, ctx->ac.i1, "");
990
991 return LLVMBuildSelect(ctx->ac.builder, indexed, ac_get_arg(&ctx->ac, ctx->args.base_vertex),
992 ctx->ac.i32_0, "");
993 }
994
995 void si_llvm_init_vs_callbacks(struct si_shader_context *ctx, bool ngg_cull_shader)
996 {
997 struct si_shader *shader = ctx->shader;
998
999 if (shader->key.as_ls)
1000 ctx->abi.emit_outputs = si_llvm_emit_ls_epilogue;
1001 else if (shader->key.as_es)
1002 ctx->abi.emit_outputs = si_llvm_emit_es_epilogue;
1003 else if (shader->key.opt.vs_as_prim_discard_cs)
1004 ctx->abi.emit_outputs = si_llvm_emit_prim_discard_cs_epilogue;
1005 else if (ngg_cull_shader)
1006 ctx->abi.emit_outputs = gfx10_emit_ngg_culling_epilogue;
1007 else if (shader->key.as_ngg)
1008 ctx->abi.emit_outputs = gfx10_emit_ngg_epilogue;
1009 else
1010 ctx->abi.emit_outputs = si_llvm_emit_vs_epilogue;
1011
1012 ctx->abi.load_base_vertex = get_base_vertex;
1013 }