radeonsi: turn an assertion into return in si_nir_store_output_tcs
[mesa.git] / src / gallium / drivers / radeonsi / si_shader_llvm_tess.c
1 /*
2 * Copyright 2020 Advanced Micro Devices, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 #include "si_shader_internal.h"
26 #include "si_pipe.h"
27 #include "sid.h"
28
29 static LLVMValueRef get_rel_patch_id(struct si_shader_context *ctx)
30 {
31 switch (ctx->type) {
32 case PIPE_SHADER_TESS_CTRL:
33 return si_unpack_param(ctx, ctx->args.tcs_rel_ids, 0, 8);
34
35 case PIPE_SHADER_TESS_EVAL:
36 return ac_get_arg(&ctx->ac, ctx->tes_rel_patch_id);
37
38 default:
39 assert(0);
40 return NULL;
41 }
42 }
43
44 /* Tessellation shaders pass outputs to the next shader using LDS.
45 *
46 * LS outputs = TCS inputs
47 * TCS outputs = TES inputs
48 *
49 * The LDS layout is:
50 * - TCS inputs for patch 0
51 * - TCS inputs for patch 1
52 * - TCS inputs for patch 2 = get_tcs_in_current_patch_offset (if RelPatchID==2)
53 * - ...
54 * - TCS outputs for patch 0 = get_tcs_out_patch0_offset
55 * - Per-patch TCS outputs for patch 0 = get_tcs_out_patch0_patch_data_offset
56 * - TCS outputs for patch 1
57 * - Per-patch TCS outputs for patch 1
58 * - TCS outputs for patch 2 = get_tcs_out_current_patch_offset (if RelPatchID==2)
59 * - Per-patch TCS outputs for patch 2 = get_tcs_out_current_patch_data_offset (if RelPatchID==2)
60 * - ...
61 *
62 * All three shaders VS(LS), TCS, TES share the same LDS space.
63 */
64
65 static LLVMValueRef
66 get_tcs_in_patch_stride(struct si_shader_context *ctx)
67 {
68 return si_unpack_param(ctx, ctx->vs_state_bits, 11, 13);
69 }
70
71 static unsigned get_tcs_out_vertex_dw_stride_constant(struct si_shader_context *ctx)
72 {
73 assert(ctx->type == PIPE_SHADER_TESS_CTRL);
74
75 if (ctx->shader->key.mono.u.ff_tcs_inputs_to_copy)
76 return util_last_bit64(ctx->shader->key.mono.u.ff_tcs_inputs_to_copy) * 4;
77
78 return util_last_bit64(ctx->shader->selector->outputs_written) * 4;
79 }
80
81 static LLVMValueRef get_tcs_out_vertex_dw_stride(struct si_shader_context *ctx)
82 {
83 unsigned stride = get_tcs_out_vertex_dw_stride_constant(ctx);
84
85 return LLVMConstInt(ctx->i32, stride, 0);
86 }
87
88 static LLVMValueRef get_tcs_out_patch_stride(struct si_shader_context *ctx)
89 {
90 if (ctx->shader->key.mono.u.ff_tcs_inputs_to_copy)
91 return si_unpack_param(ctx, ctx->tcs_out_lds_layout, 0, 13);
92
93 const struct si_shader_info *info = &ctx->shader->selector->info;
94 unsigned tcs_out_vertices = info->properties[TGSI_PROPERTY_TCS_VERTICES_OUT];
95 unsigned vertex_dw_stride = get_tcs_out_vertex_dw_stride_constant(ctx);
96 unsigned num_patch_outputs = util_last_bit64(ctx->shader->selector->patch_outputs_written);
97 unsigned patch_dw_stride = tcs_out_vertices * vertex_dw_stride +
98 num_patch_outputs * 4;
99 return LLVMConstInt(ctx->i32, patch_dw_stride, 0);
100 }
101
102 static LLVMValueRef
103 get_tcs_out_patch0_offset(struct si_shader_context *ctx)
104 {
105 return LLVMBuildMul(ctx->ac.builder,
106 si_unpack_param(ctx, ctx->tcs_out_lds_offsets, 0, 16),
107 LLVMConstInt(ctx->i32, 4, 0), "");
108 }
109
110 static LLVMValueRef
111 get_tcs_out_patch0_patch_data_offset(struct si_shader_context *ctx)
112 {
113 return LLVMBuildMul(ctx->ac.builder,
114 si_unpack_param(ctx, ctx->tcs_out_lds_offsets, 16, 16),
115 LLVMConstInt(ctx->i32, 4, 0), "");
116 }
117
118 static LLVMValueRef
119 get_tcs_in_current_patch_offset(struct si_shader_context *ctx)
120 {
121 LLVMValueRef patch_stride = get_tcs_in_patch_stride(ctx);
122 LLVMValueRef rel_patch_id = get_rel_patch_id(ctx);
123
124 return LLVMBuildMul(ctx->ac.builder, patch_stride, rel_patch_id, "");
125 }
126
127 static LLVMValueRef
128 get_tcs_out_current_patch_offset(struct si_shader_context *ctx)
129 {
130 LLVMValueRef patch0_offset = get_tcs_out_patch0_offset(ctx);
131 LLVMValueRef patch_stride = get_tcs_out_patch_stride(ctx);
132 LLVMValueRef rel_patch_id = get_rel_patch_id(ctx);
133
134 return ac_build_imad(&ctx->ac, patch_stride, rel_patch_id, patch0_offset);
135 }
136
137 static LLVMValueRef
138 get_tcs_out_current_patch_data_offset(struct si_shader_context *ctx)
139 {
140 LLVMValueRef patch0_patch_data_offset =
141 get_tcs_out_patch0_patch_data_offset(ctx);
142 LLVMValueRef patch_stride = get_tcs_out_patch_stride(ctx);
143 LLVMValueRef rel_patch_id = get_rel_patch_id(ctx);
144
145 return ac_build_imad(&ctx->ac, patch_stride, rel_patch_id, patch0_patch_data_offset);
146 }
147
148 static LLVMValueRef get_num_tcs_out_vertices(struct si_shader_context *ctx)
149 {
150 unsigned tcs_out_vertices =
151 ctx->shader->selector ?
152 ctx->shader->selector->info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT] : 0;
153
154 /* If !tcs_out_vertices, it's either the fixed-func TCS or the TCS epilog. */
155 if (ctx->type == PIPE_SHADER_TESS_CTRL && tcs_out_vertices)
156 return LLVMConstInt(ctx->i32, tcs_out_vertices, 0);
157
158 return si_unpack_param(ctx, ctx->tcs_offchip_layout, 6, 6);
159 }
160
161 static LLVMValueRef get_tcs_in_vertex_dw_stride(struct si_shader_context *ctx)
162 {
163 unsigned stride;
164
165 switch (ctx->type) {
166 case PIPE_SHADER_VERTEX:
167 stride = ctx->shader->selector->lshs_vertex_stride / 4;
168 return LLVMConstInt(ctx->i32, stride, 0);
169
170 case PIPE_SHADER_TESS_CTRL:
171 if (ctx->screen->info.chip_class >= GFX9 &&
172 ctx->shader->is_monolithic) {
173 stride = ctx->shader->key.part.tcs.ls->lshs_vertex_stride / 4;
174 return LLVMConstInt(ctx->i32, stride, 0);
175 }
176 return si_unpack_param(ctx, ctx->vs_state_bits, 24, 8);
177
178 default:
179 assert(0);
180 return NULL;
181 }
182 }
183
184 static LLVMValueRef get_dw_address_from_generic_indices(struct si_shader_context *ctx,
185 LLVMValueRef vertex_dw_stride,
186 LLVMValueRef base_addr,
187 LLVMValueRef vertex_index,
188 LLVMValueRef param_index,
189 ubyte name, ubyte index)
190 {
191 if (vertex_dw_stride) {
192 base_addr = ac_build_imad(&ctx->ac, vertex_index,
193 vertex_dw_stride, base_addr);
194 }
195
196 if (param_index) {
197 base_addr = ac_build_imad(&ctx->ac, param_index,
198 LLVMConstInt(ctx->i32, 4, 0), base_addr);
199 }
200
201 int param = name == TGSI_SEMANTIC_PATCH ||
202 name == TGSI_SEMANTIC_TESSINNER ||
203 name == TGSI_SEMANTIC_TESSOUTER ?
204 si_shader_io_get_unique_index_patch(name, index) :
205 si_shader_io_get_unique_index(name, index, false);
206
207 /* Add the base address of the element. */
208 return LLVMBuildAdd(ctx->ac.builder, base_addr,
209 LLVMConstInt(ctx->i32, param * 4, 0), "");
210 }
211
212 /* The offchip buffer layout for TCS->TES is
213 *
214 * - attribute 0 of patch 0 vertex 0
215 * - attribute 0 of patch 0 vertex 1
216 * - attribute 0 of patch 0 vertex 2
217 * ...
218 * - attribute 0 of patch 1 vertex 0
219 * - attribute 0 of patch 1 vertex 1
220 * ...
221 * - attribute 1 of patch 0 vertex 0
222 * - attribute 1 of patch 0 vertex 1
223 * ...
224 * - per patch attribute 0 of patch 0
225 * - per patch attribute 0 of patch 1
226 * ...
227 *
228 * Note that every attribute has 4 components.
229 */
230 static LLVMValueRef get_tcs_tes_buffer_address(struct si_shader_context *ctx,
231 LLVMValueRef rel_patch_id,
232 LLVMValueRef vertex_index,
233 LLVMValueRef param_index)
234 {
235 LLVMValueRef base_addr, vertices_per_patch, num_patches, total_vertices;
236 LLVMValueRef param_stride, constant16;
237
238 vertices_per_patch = get_num_tcs_out_vertices(ctx);
239 num_patches = si_unpack_param(ctx, ctx->tcs_offchip_layout, 0, 6);
240 total_vertices = LLVMBuildMul(ctx->ac.builder, vertices_per_patch,
241 num_patches, "");
242
243 constant16 = LLVMConstInt(ctx->i32, 16, 0);
244 if (vertex_index) {
245 base_addr = ac_build_imad(&ctx->ac, rel_patch_id,
246 vertices_per_patch, vertex_index);
247 param_stride = total_vertices;
248 } else {
249 base_addr = rel_patch_id;
250 param_stride = num_patches;
251 }
252
253 base_addr = ac_build_imad(&ctx->ac, param_index, param_stride, base_addr);
254 base_addr = LLVMBuildMul(ctx->ac.builder, base_addr, constant16, "");
255
256 if (!vertex_index) {
257 LLVMValueRef patch_data_offset =
258 si_unpack_param(ctx, ctx->tcs_offchip_layout, 12, 20);
259
260 base_addr = LLVMBuildAdd(ctx->ac.builder, base_addr,
261 patch_data_offset, "");
262 }
263 return base_addr;
264 }
265
266 static LLVMValueRef get_tcs_tes_buffer_address_from_generic_indices(
267 struct si_shader_context *ctx,
268 LLVMValueRef vertex_index,
269 LLVMValueRef param_index,
270 ubyte name, ubyte index)
271 {
272 unsigned param_index_base;
273
274 param_index_base = name == TGSI_SEMANTIC_PATCH ||
275 name == TGSI_SEMANTIC_TESSINNER ||
276 name == TGSI_SEMANTIC_TESSOUTER ?
277 si_shader_io_get_unique_index_patch(name, index) :
278 si_shader_io_get_unique_index(name, index, false);
279
280 if (param_index) {
281 param_index = LLVMBuildAdd(ctx->ac.builder, param_index,
282 LLVMConstInt(ctx->i32, param_index_base, 0),
283 "");
284 } else {
285 param_index = LLVMConstInt(ctx->i32, param_index_base, 0);
286 }
287
288 return get_tcs_tes_buffer_address(ctx, get_rel_patch_id(ctx),
289 vertex_index, param_index);
290 }
291
292 static LLVMValueRef buffer_load(struct si_shader_context *ctx,
293 LLVMTypeRef type, unsigned swizzle,
294 LLVMValueRef buffer, LLVMValueRef offset,
295 LLVMValueRef base, bool can_speculate)
296 {
297 LLVMValueRef value, value2;
298 LLVMTypeRef vec_type = LLVMVectorType(type, 4);
299
300 if (swizzle == ~0) {
301 value = ac_build_buffer_load(&ctx->ac, buffer, 4, NULL, base, offset,
302 0, ac_glc, can_speculate, false);
303
304 return LLVMBuildBitCast(ctx->ac.builder, value, vec_type, "");
305 }
306
307 if (ac_get_type_size(type) != 8) {
308 value = ac_build_buffer_load(&ctx->ac, buffer, 4, NULL, base, offset,
309 0, ac_glc, can_speculate, false);
310
311 value = LLVMBuildBitCast(ctx->ac.builder, value, vec_type, "");
312 return LLVMBuildExtractElement(ctx->ac.builder, value,
313 LLVMConstInt(ctx->i32, swizzle, 0), "");
314 }
315
316 value = ac_build_buffer_load(&ctx->ac, buffer, 1, NULL, base, offset,
317 swizzle * 4, ac_glc, can_speculate, false);
318
319 value2 = ac_build_buffer_load(&ctx->ac, buffer, 1, NULL, base, offset,
320 swizzle * 4 + 4, ac_glc, can_speculate, false);
321
322 return si_build_gather_64bit(ctx, type, value, value2);
323 }
324
325 /**
326 * Load from LSHS LDS storage.
327 *
328 * \param type output value type
329 * \param swizzle offset (typically 0..3); it can be ~0, which loads a vec4
330 * \param dw_addr address in dwords
331 */
332 static LLVMValueRef lshs_lds_load(struct si_shader_context *ctx,
333 LLVMTypeRef type, unsigned swizzle,
334 LLVMValueRef dw_addr)
335 {
336 LLVMValueRef value;
337
338 if (swizzle == ~0) {
339 LLVMValueRef values[4];
340
341 for (unsigned chan = 0; chan < 4; chan++)
342 values[chan] = lshs_lds_load(ctx, type, chan, dw_addr);
343
344 return ac_build_gather_values(&ctx->ac, values, 4);
345 }
346
347 /* Split 64-bit loads. */
348 if (ac_get_type_size(type) == 8) {
349 LLVMValueRef lo, hi;
350
351 lo = lshs_lds_load(ctx, ctx->i32, swizzle, dw_addr);
352 hi = lshs_lds_load(ctx, ctx->i32, swizzle + 1, dw_addr);
353 return si_build_gather_64bit(ctx, type, lo, hi);
354 }
355
356 dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr,
357 LLVMConstInt(ctx->i32, swizzle, 0), "");
358
359 value = ac_lds_load(&ctx->ac, dw_addr);
360
361 return LLVMBuildBitCast(ctx->ac.builder, value, type, "");
362 }
363
364 /**
365 * Store to LSHS LDS storage.
366 *
367 * \param swizzle offset (typically 0..3)
368 * \param dw_addr address in dwords
369 * \param value value to store
370 */
371 static void lshs_lds_store(struct si_shader_context *ctx,
372 unsigned dw_offset_imm, LLVMValueRef dw_addr,
373 LLVMValueRef value)
374 {
375 dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr,
376 LLVMConstInt(ctx->i32, dw_offset_imm, 0), "");
377
378 ac_lds_store(&ctx->ac, dw_addr, value);
379 }
380
381 enum si_tess_ring {
382 TCS_FACTOR_RING,
383 TESS_OFFCHIP_RING_TCS,
384 TESS_OFFCHIP_RING_TES,
385 };
386
387 static LLVMValueRef get_tess_ring_descriptor(struct si_shader_context *ctx,
388 enum si_tess_ring ring)
389 {
390 LLVMBuilderRef builder = ctx->ac.builder;
391 LLVMValueRef addr = ac_get_arg(&ctx->ac,
392 ring == TESS_OFFCHIP_RING_TES ?
393 ctx->tes_offchip_addr :
394 ctx->tcs_out_lds_layout);
395
396 /* TCS only receives high 13 bits of the address. */
397 if (ring == TESS_OFFCHIP_RING_TCS || ring == TCS_FACTOR_RING) {
398 addr = LLVMBuildAnd(builder, addr,
399 LLVMConstInt(ctx->i32, 0xfff80000, 0), "");
400 }
401
402 if (ring == TCS_FACTOR_RING) {
403 unsigned tf_offset = ctx->screen->tess_offchip_ring_size;
404 addr = LLVMBuildAdd(builder, addr,
405 LLVMConstInt(ctx->i32, tf_offset, 0), "");
406 }
407
408 uint32_t rsrc3 = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
409 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
410 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
411 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
412
413 if (ctx->screen->info.chip_class >= GFX10)
414 rsrc3 |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
415 S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) |
416 S_008F0C_RESOURCE_LEVEL(1);
417 else
418 rsrc3 |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
419 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
420
421 LLVMValueRef desc[4];
422 desc[0] = addr;
423 desc[1] = LLVMConstInt(ctx->i32,
424 S_008F04_BASE_ADDRESS_HI(ctx->screen->info.address32_hi), 0);
425 desc[2] = LLVMConstInt(ctx->i32, 0xffffffff, 0);
426 desc[3] = LLVMConstInt(ctx->i32, rsrc3, false);
427
428 return ac_build_gather_values(&ctx->ac, desc, 4);
429 }
430
431 void si_llvm_preload_tes_rings(struct si_shader_context *ctx)
432 {
433 ctx->tess_offchip_ring = get_tess_ring_descriptor(ctx, TESS_OFFCHIP_RING_TES);
434 }
435
436 static LLVMValueRef si_nir_load_tcs_varyings(struct ac_shader_abi *abi,
437 LLVMTypeRef type,
438 LLVMValueRef vertex_index,
439 LLVMValueRef param_index,
440 unsigned const_index,
441 unsigned location,
442 unsigned driver_location,
443 unsigned component,
444 unsigned num_components,
445 bool is_patch,
446 bool is_compact,
447 bool load_input)
448 {
449 struct si_shader_context *ctx = si_shader_context_from_abi(abi);
450 struct si_shader_info *info = &ctx->shader->selector->info;
451 LLVMValueRef dw_addr, stride;
452 ubyte name, index;
453
454 driver_location = driver_location / 4;
455
456 if (load_input) {
457 name = info->input_semantic_name[driver_location];
458 index = info->input_semantic_index[driver_location];
459 } else {
460 name = info->output_semantic_name[driver_location];
461 index = info->output_semantic_index[driver_location];
462 }
463
464 assert((name == TGSI_SEMANTIC_PATCH ||
465 name == TGSI_SEMANTIC_TESSINNER ||
466 name == TGSI_SEMANTIC_TESSOUTER) == is_patch);
467
468 if (load_input) {
469 stride = get_tcs_in_vertex_dw_stride(ctx);
470 dw_addr = get_tcs_in_current_patch_offset(ctx);
471 } else {
472 if (is_patch) {
473 stride = NULL;
474 dw_addr = get_tcs_out_current_patch_data_offset(ctx);
475 } else {
476 stride = get_tcs_out_vertex_dw_stride(ctx);
477 dw_addr = get_tcs_out_current_patch_offset(ctx);
478 }
479 }
480
481 if (!param_index) {
482 param_index = LLVMConstInt(ctx->i32, const_index, 0);
483 }
484
485 dw_addr = get_dw_address_from_generic_indices(ctx, stride, dw_addr,
486 vertex_index, param_index,
487 name, index);
488
489 LLVMValueRef value[4];
490 for (unsigned i = 0; i < num_components; i++) {
491 unsigned offset = i;
492 if (ac_get_type_size(type) == 8)
493 offset *= 2;
494
495 offset += component;
496 value[i + component] = lshs_lds_load(ctx, type, offset, dw_addr);
497 }
498
499 return ac_build_varying_gather_values(&ctx->ac, value, num_components, component);
500 }
501
502 LLVMValueRef si_nir_load_input_tes(struct ac_shader_abi *abi,
503 LLVMTypeRef type,
504 LLVMValueRef vertex_index,
505 LLVMValueRef param_index,
506 unsigned const_index,
507 unsigned location,
508 unsigned driver_location,
509 unsigned component,
510 unsigned num_components,
511 bool is_patch,
512 bool is_compact,
513 bool load_input)
514 {
515 struct si_shader_context *ctx = si_shader_context_from_abi(abi);
516 struct si_shader_info *info = &ctx->shader->selector->info;
517 LLVMValueRef base, addr;
518
519 driver_location = driver_location / 4;
520 ubyte name = info->input_semantic_name[driver_location];
521 ubyte index = info->input_semantic_index[driver_location];
522
523 assert((name == TGSI_SEMANTIC_PATCH ||
524 name == TGSI_SEMANTIC_TESSINNER ||
525 name == TGSI_SEMANTIC_TESSOUTER) == is_patch);
526
527 base = ac_get_arg(&ctx->ac, ctx->tcs_offchip_offset);
528
529 if (!param_index) {
530 param_index = LLVMConstInt(ctx->i32, const_index, 0);
531 }
532
533 addr = get_tcs_tes_buffer_address_from_generic_indices(ctx, vertex_index,
534 param_index,
535 name, index);
536
537 /* TODO: This will generate rather ordinary llvm code, although it
538 * should be easy for the optimiser to fix up. In future we might want
539 * to refactor buffer_load().
540 */
541 LLVMValueRef value[4];
542 for (unsigned i = 0; i < num_components; i++) {
543 unsigned offset = i;
544 if (ac_get_type_size(type) == 8) {
545 offset *= 2;
546 if (offset == 4) {
547 ubyte name = info->input_semantic_name[driver_location + 1];
548 ubyte index = info->input_semantic_index[driver_location + 1];
549 addr = get_tcs_tes_buffer_address_from_generic_indices(ctx,
550 vertex_index,
551 param_index,
552 name, index);
553 }
554
555 offset = offset % 4;
556 }
557
558 offset += component;
559 value[i + component] = buffer_load(ctx, type, offset,
560 ctx->tess_offchip_ring, base, addr, true);
561 }
562
563 return ac_build_varying_gather_values(&ctx->ac, value, num_components, component);
564 }
565
566 static void si_nir_store_output_tcs(struct ac_shader_abi *abi,
567 const struct nir_variable *var,
568 LLVMValueRef vertex_index,
569 LLVMValueRef param_index,
570 unsigned const_index,
571 LLVMValueRef src,
572 unsigned writemask)
573 {
574 struct si_shader_context *ctx = si_shader_context_from_abi(abi);
575 struct si_shader_info *info = &ctx->shader->selector->info;
576 const unsigned component = var->data.location_frac;
577 unsigned driver_location = var->data.driver_location;
578 LLVMValueRef dw_addr, stride;
579 LLVMValueRef buffer, base, addr;
580 LLVMValueRef values[8];
581 bool skip_lds_store;
582 bool is_tess_factor = false, is_tess_inner = false;
583
584 driver_location = driver_location / 4;
585 ubyte name = info->output_semantic_name[driver_location];
586 ubyte index = info->output_semantic_index[driver_location];
587
588 bool is_const = !param_index;
589 if (!param_index)
590 param_index = LLVMConstInt(ctx->i32, const_index, 0);
591
592 const bool is_patch = var->data.patch ||
593 var->data.location == VARYING_SLOT_TESS_LEVEL_INNER ||
594 var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER;
595
596 /* Invalid SPIR-V can cause this. */
597 if ((name == TGSI_SEMANTIC_PATCH ||
598 name == TGSI_SEMANTIC_TESSINNER ||
599 name == TGSI_SEMANTIC_TESSOUTER) != is_patch)
600 return;
601
602 if (!is_patch) {
603 stride = get_tcs_out_vertex_dw_stride(ctx);
604 dw_addr = get_tcs_out_current_patch_offset(ctx);
605 dw_addr = get_dw_address_from_generic_indices(ctx, stride, dw_addr,
606 vertex_index, param_index,
607 name, index);
608
609 skip_lds_store = !info->reads_pervertex_outputs;
610 } else {
611 dw_addr = get_tcs_out_current_patch_data_offset(ctx);
612 dw_addr = get_dw_address_from_generic_indices(ctx, NULL, dw_addr,
613 vertex_index, param_index,
614 name, index);
615
616 skip_lds_store = !info->reads_perpatch_outputs;
617
618 if (is_const && const_index == 0) {
619 int name = info->output_semantic_name[driver_location];
620
621 /* Always write tess factors into LDS for the TCS epilog. */
622 if (name == TGSI_SEMANTIC_TESSINNER ||
623 name == TGSI_SEMANTIC_TESSOUTER) {
624 /* The epilog doesn't read LDS if invocation 0 defines tess factors. */
625 skip_lds_store = !info->reads_tessfactor_outputs &&
626 ctx->shader->selector->info.tessfactors_are_def_in_all_invocs;
627 is_tess_factor = true;
628 is_tess_inner = name == TGSI_SEMANTIC_TESSINNER;
629 }
630 }
631 }
632
633 buffer = get_tess_ring_descriptor(ctx, TESS_OFFCHIP_RING_TCS);
634
635 base = ac_get_arg(&ctx->ac, ctx->tcs_offchip_offset);
636
637 addr = get_tcs_tes_buffer_address_from_generic_indices(ctx, vertex_index,
638 param_index, name, index);
639
640 for (unsigned chan = component; chan < 8; chan++) {
641 if (!(writemask & (1 << chan)))
642 continue;
643 LLVMValueRef value = ac_llvm_extract_elem(&ctx->ac, src, chan - component);
644
645 unsigned buffer_store_offset = chan % 4;
646 if (chan == 4) {
647 ubyte name = info->output_semantic_name[driver_location + 1];
648 ubyte index = info->output_semantic_index[driver_location + 1];
649 addr = get_tcs_tes_buffer_address_from_generic_indices(ctx,
650 vertex_index,
651 param_index,
652 name, index);
653 }
654
655 /* Skip LDS stores if there is no LDS read of this output. */
656 if (!skip_lds_store)
657 lshs_lds_store(ctx, chan, dw_addr, value);
658
659 value = ac_to_integer(&ctx->ac, value);
660 values[chan] = value;
661
662 if (writemask != 0xF && !is_tess_factor) {
663 ac_build_buffer_store_dword(&ctx->ac, buffer, value, 1,
664 addr, base,
665 4 * buffer_store_offset,
666 ac_glc);
667 }
668
669 /* Write tess factors into VGPRs for the epilog. */
670 if (is_tess_factor &&
671 ctx->shader->selector->info.tessfactors_are_def_in_all_invocs) {
672 if (!is_tess_inner) {
673 LLVMBuildStore(ctx->ac.builder, value, /* outer */
674 ctx->invoc0_tess_factors[chan]);
675 } else if (chan < 2) {
676 LLVMBuildStore(ctx->ac.builder, value, /* inner */
677 ctx->invoc0_tess_factors[4 + chan]);
678 }
679 }
680 }
681
682 if (writemask == 0xF && !is_tess_factor) {
683 LLVMValueRef value = ac_build_gather_values(&ctx->ac,
684 values, 4);
685 ac_build_buffer_store_dword(&ctx->ac, buffer, value, 4, addr,
686 base, 0, ac_glc);
687 }
688 }
689
690 static LLVMValueRef si_load_tess_coord(struct ac_shader_abi *abi)
691 {
692 struct si_shader_context *ctx = si_shader_context_from_abi(abi);
693 LLVMValueRef coord[4] = {
694 ac_get_arg(&ctx->ac, ctx->tes_u),
695 ac_get_arg(&ctx->ac, ctx->tes_v),
696 ctx->ac.f32_0,
697 ctx->ac.f32_0
698 };
699
700 /* For triangles, the vector should be (u, v, 1-u-v). */
701 if (ctx->shader->selector->info.properties[TGSI_PROPERTY_TES_PRIM_MODE] ==
702 PIPE_PRIM_TRIANGLES) {
703 coord[2] = LLVMBuildFSub(ctx->ac.builder, ctx->ac.f32_1,
704 LLVMBuildFAdd(ctx->ac.builder,
705 coord[0], coord[1], ""), "");
706 }
707 return ac_build_gather_values(&ctx->ac, coord, 4);
708 }
709
710 static LLVMValueRef load_tess_level(struct si_shader_context *ctx,
711 unsigned semantic_name)
712 {
713 LLVMValueRef base, addr;
714
715 int param = si_shader_io_get_unique_index_patch(semantic_name, 0);
716
717 base = ac_get_arg(&ctx->ac, ctx->tcs_offchip_offset);
718 addr = get_tcs_tes_buffer_address(ctx, get_rel_patch_id(ctx), NULL,
719 LLVMConstInt(ctx->i32, param, 0));
720
721 return buffer_load(ctx, ctx->f32,
722 ~0, ctx->tess_offchip_ring, base, addr, true);
723
724 }
725
726 static LLVMValueRef load_tess_level_default(struct si_shader_context *ctx,
727 unsigned semantic_name)
728 {
729 LLVMValueRef buf, slot, val[4];
730 int i, offset;
731
732 slot = LLVMConstInt(ctx->i32, SI_HS_CONST_DEFAULT_TESS_LEVELS, 0);
733 buf = ac_get_arg(&ctx->ac, ctx->rw_buffers);
734 buf = ac_build_load_to_sgpr(&ctx->ac, buf, slot);
735 offset = semantic_name == TGSI_SEMANTIC_TESS_DEFAULT_INNER_LEVEL ? 4 : 0;
736
737 for (i = 0; i < 4; i++)
738 val[i] = si_buffer_load_const(ctx, buf,
739 LLVMConstInt(ctx->i32, (offset + i) * 4, 0));
740 return ac_build_gather_values(&ctx->ac, val, 4);
741 }
742
743 static LLVMValueRef si_load_tess_level(struct ac_shader_abi *abi,
744 unsigned varying_id,
745 bool load_default_state)
746 {
747 struct si_shader_context *ctx = si_shader_context_from_abi(abi);
748 unsigned semantic_name;
749
750 if (load_default_state) {
751 switch (varying_id) {
752 case VARYING_SLOT_TESS_LEVEL_INNER:
753 semantic_name = TGSI_SEMANTIC_TESS_DEFAULT_INNER_LEVEL;
754 break;
755 case VARYING_SLOT_TESS_LEVEL_OUTER:
756 semantic_name = TGSI_SEMANTIC_TESS_DEFAULT_OUTER_LEVEL;
757 break;
758 default:
759 unreachable("unknown tess level");
760 }
761 return load_tess_level_default(ctx, semantic_name);
762 }
763
764 switch (varying_id) {
765 case VARYING_SLOT_TESS_LEVEL_INNER:
766 semantic_name = TGSI_SEMANTIC_TESSINNER;
767 break;
768 case VARYING_SLOT_TESS_LEVEL_OUTER:
769 semantic_name = TGSI_SEMANTIC_TESSOUTER;
770 break;
771 default:
772 unreachable("unknown tess level");
773 }
774
775 return load_tess_level(ctx, semantic_name);
776
777 }
778
779 static LLVMValueRef si_load_patch_vertices_in(struct ac_shader_abi *abi)
780 {
781 struct si_shader_context *ctx = si_shader_context_from_abi(abi);
782 if (ctx->type == PIPE_SHADER_TESS_CTRL)
783 return si_unpack_param(ctx, ctx->tcs_out_lds_layout, 13, 6);
784 else if (ctx->type == PIPE_SHADER_TESS_EVAL)
785 return get_num_tcs_out_vertices(ctx);
786 else
787 unreachable("invalid shader stage for TGSI_SEMANTIC_VERTICESIN");
788 }
789
790 /**
791 * Forward all outputs from the vertex shader to the TES. This is only used
792 * for the fixed function TCS.
793 */
794 static void si_copy_tcs_inputs(struct si_shader_context *ctx)
795 {
796 LLVMValueRef invocation_id, buffer, buffer_offset;
797 LLVMValueRef lds_vertex_stride, lds_base;
798 uint64_t inputs;
799
800 invocation_id = si_unpack_param(ctx, ctx->args.tcs_rel_ids, 8, 5);
801 buffer = get_tess_ring_descriptor(ctx, TESS_OFFCHIP_RING_TCS);
802 buffer_offset = ac_get_arg(&ctx->ac, ctx->tcs_offchip_offset);
803
804 lds_vertex_stride = get_tcs_in_vertex_dw_stride(ctx);
805 lds_base = get_tcs_in_current_patch_offset(ctx);
806 lds_base = ac_build_imad(&ctx->ac, invocation_id, lds_vertex_stride,
807 lds_base);
808
809 inputs = ctx->shader->key.mono.u.ff_tcs_inputs_to_copy;
810 while (inputs) {
811 unsigned i = u_bit_scan64(&inputs);
812
813 LLVMValueRef lds_ptr = LLVMBuildAdd(ctx->ac.builder, lds_base,
814 LLVMConstInt(ctx->i32, 4 * i, 0),
815 "");
816
817 LLVMValueRef buffer_addr = get_tcs_tes_buffer_address(ctx,
818 get_rel_patch_id(ctx),
819 invocation_id,
820 LLVMConstInt(ctx->i32, i, 0));
821
822 LLVMValueRef value = lshs_lds_load(ctx, ctx->ac.i32, ~0, lds_ptr);
823
824 ac_build_buffer_store_dword(&ctx->ac, buffer, value, 4, buffer_addr,
825 buffer_offset, 0, ac_glc);
826 }
827 }
828
829 static void si_write_tess_factors(struct si_shader_context *ctx,
830 LLVMValueRef rel_patch_id,
831 LLVMValueRef invocation_id,
832 LLVMValueRef tcs_out_current_patch_data_offset,
833 LLVMValueRef invoc0_tf_outer[4],
834 LLVMValueRef invoc0_tf_inner[2])
835 {
836 struct si_shader *shader = ctx->shader;
837 unsigned tess_inner_index, tess_outer_index;
838 LLVMValueRef lds_base, lds_inner, lds_outer, byteoffset, buffer;
839 LLVMValueRef out[6], vec0, vec1, tf_base, inner[4], outer[4];
840 unsigned stride, outer_comps, inner_comps, i, offset;
841
842 /* Add a barrier before loading tess factors from LDS. */
843 if (!shader->key.part.tcs.epilog.invoc0_tess_factors_are_def)
844 si_llvm_emit_barrier(ctx);
845
846 /* Do this only for invocation 0, because the tess levels are per-patch,
847 * not per-vertex.
848 *
849 * This can't jump, because invocation 0 executes this. It should
850 * at least mask out the loads and stores for other invocations.
851 */
852 ac_build_ifcc(&ctx->ac,
853 LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ,
854 invocation_id, ctx->i32_0, ""), 6503);
855
856 /* Determine the layout of one tess factor element in the buffer. */
857 switch (shader->key.part.tcs.epilog.prim_mode) {
858 case PIPE_PRIM_LINES:
859 stride = 2; /* 2 dwords, 1 vec2 store */
860 outer_comps = 2;
861 inner_comps = 0;
862 break;
863 case PIPE_PRIM_TRIANGLES:
864 stride = 4; /* 4 dwords, 1 vec4 store */
865 outer_comps = 3;
866 inner_comps = 1;
867 break;
868 case PIPE_PRIM_QUADS:
869 stride = 6; /* 6 dwords, 2 stores (vec4 + vec2) */
870 outer_comps = 4;
871 inner_comps = 2;
872 break;
873 default:
874 assert(0);
875 return;
876 }
877
878 for (i = 0; i < 4; i++) {
879 inner[i] = LLVMGetUndef(ctx->i32);
880 outer[i] = LLVMGetUndef(ctx->i32);
881 }
882
883 if (shader->key.part.tcs.epilog.invoc0_tess_factors_are_def) {
884 /* Tess factors are in VGPRs. */
885 for (i = 0; i < outer_comps; i++)
886 outer[i] = out[i] = invoc0_tf_outer[i];
887 for (i = 0; i < inner_comps; i++)
888 inner[i] = out[outer_comps+i] = invoc0_tf_inner[i];
889 } else {
890 /* Load tess_inner and tess_outer from LDS.
891 * Any invocation can write them, so we can't get them from a temporary.
892 */
893 tess_inner_index = si_shader_io_get_unique_index_patch(TGSI_SEMANTIC_TESSINNER, 0);
894 tess_outer_index = si_shader_io_get_unique_index_patch(TGSI_SEMANTIC_TESSOUTER, 0);
895
896 lds_base = tcs_out_current_patch_data_offset;
897 lds_inner = LLVMBuildAdd(ctx->ac.builder, lds_base,
898 LLVMConstInt(ctx->i32,
899 tess_inner_index * 4, 0), "");
900 lds_outer = LLVMBuildAdd(ctx->ac.builder, lds_base,
901 LLVMConstInt(ctx->i32,
902 tess_outer_index * 4, 0), "");
903
904 for (i = 0; i < outer_comps; i++) {
905 outer[i] = out[i] =
906 lshs_lds_load(ctx, ctx->ac.i32, i, lds_outer);
907 }
908 for (i = 0; i < inner_comps; i++) {
909 inner[i] = out[outer_comps+i] =
910 lshs_lds_load(ctx, ctx->ac.i32, i, lds_inner);
911 }
912 }
913
914 if (shader->key.part.tcs.epilog.prim_mode == PIPE_PRIM_LINES) {
915 /* For isolines, the hardware expects tess factors in the
916 * reverse order from what NIR specifies.
917 */
918 LLVMValueRef tmp = out[0];
919 out[0] = out[1];
920 out[1] = tmp;
921 }
922
923 /* Convert the outputs to vectors for stores. */
924 vec0 = ac_build_gather_values(&ctx->ac, out, MIN2(stride, 4));
925 vec1 = NULL;
926
927 if (stride > 4)
928 vec1 = ac_build_gather_values(&ctx->ac, out+4, stride - 4);
929
930 /* Get the buffer. */
931 buffer = get_tess_ring_descriptor(ctx, TCS_FACTOR_RING);
932
933 /* Get the offset. */
934 tf_base = ac_get_arg(&ctx->ac,
935 ctx->tcs_factor_offset);
936 byteoffset = LLVMBuildMul(ctx->ac.builder, rel_patch_id,
937 LLVMConstInt(ctx->i32, 4 * stride, 0), "");
938
939 ac_build_ifcc(&ctx->ac,
940 LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ,
941 rel_patch_id, ctx->i32_0, ""), 6504);
942
943 /* Store the dynamic HS control word. */
944 offset = 0;
945 if (ctx->screen->info.chip_class <= GFX8) {
946 ac_build_buffer_store_dword(&ctx->ac, buffer,
947 LLVMConstInt(ctx->i32, 0x80000000, 0),
948 1, ctx->i32_0, tf_base,
949 offset, ac_glc);
950 offset += 4;
951 }
952
953 ac_build_endif(&ctx->ac, 6504);
954
955 /* Store the tessellation factors. */
956 ac_build_buffer_store_dword(&ctx->ac, buffer, vec0,
957 MIN2(stride, 4), byteoffset, tf_base,
958 offset, ac_glc);
959 offset += 16;
960 if (vec1)
961 ac_build_buffer_store_dword(&ctx->ac, buffer, vec1,
962 stride - 4, byteoffset, tf_base,
963 offset, ac_glc);
964
965 /* Store the tess factors into the offchip buffer if TES reads them. */
966 if (shader->key.part.tcs.epilog.tes_reads_tess_factors) {
967 LLVMValueRef buf, base, inner_vec, outer_vec, tf_outer_offset;
968 LLVMValueRef tf_inner_offset;
969 unsigned param_outer, param_inner;
970
971 buf = get_tess_ring_descriptor(ctx, TESS_OFFCHIP_RING_TCS);
972 base = ac_get_arg(&ctx->ac, ctx->tcs_offchip_offset);
973
974 param_outer = si_shader_io_get_unique_index_patch(
975 TGSI_SEMANTIC_TESSOUTER, 0);
976 tf_outer_offset = get_tcs_tes_buffer_address(ctx, rel_patch_id, NULL,
977 LLVMConstInt(ctx->i32, param_outer, 0));
978
979 unsigned outer_vec_size =
980 ac_has_vec3_support(ctx->screen->info.chip_class, false) ?
981 outer_comps : util_next_power_of_two(outer_comps);
982 outer_vec = ac_build_gather_values(&ctx->ac, outer, outer_vec_size);
983
984 ac_build_buffer_store_dword(&ctx->ac, buf, outer_vec,
985 outer_comps, tf_outer_offset,
986 base, 0, ac_glc);
987 if (inner_comps) {
988 param_inner = si_shader_io_get_unique_index_patch(
989 TGSI_SEMANTIC_TESSINNER, 0);
990 tf_inner_offset = get_tcs_tes_buffer_address(ctx, rel_patch_id, NULL,
991 LLVMConstInt(ctx->i32, param_inner, 0));
992
993 inner_vec = inner_comps == 1 ? inner[0] :
994 ac_build_gather_values(&ctx->ac, inner, inner_comps);
995 ac_build_buffer_store_dword(&ctx->ac, buf, inner_vec,
996 inner_comps, tf_inner_offset,
997 base, 0, ac_glc);
998 }
999 }
1000
1001 ac_build_endif(&ctx->ac, 6503);
1002 }
1003
1004 /* This only writes the tessellation factor levels. */
1005 static void si_llvm_emit_tcs_epilogue(struct ac_shader_abi *abi,
1006 unsigned max_outputs,
1007 LLVMValueRef *addrs)
1008 {
1009 struct si_shader_context *ctx = si_shader_context_from_abi(abi);
1010 LLVMBuilderRef builder = ctx->ac.builder;
1011 LLVMValueRef rel_patch_id, invocation_id, tf_lds_offset;
1012
1013 si_copy_tcs_inputs(ctx);
1014
1015 rel_patch_id = get_rel_patch_id(ctx);
1016 invocation_id = si_unpack_param(ctx, ctx->args.tcs_rel_ids, 8, 5);
1017 tf_lds_offset = get_tcs_out_current_patch_data_offset(ctx);
1018
1019 if (ctx->screen->info.chip_class >= GFX9) {
1020 LLVMBasicBlockRef blocks[2] = {
1021 LLVMGetInsertBlock(builder),
1022 ctx->merged_wrap_if_entry_block
1023 };
1024 LLVMValueRef values[2];
1025
1026 ac_build_endif(&ctx->ac, ctx->merged_wrap_if_label);
1027
1028 values[0] = rel_patch_id;
1029 values[1] = LLVMGetUndef(ctx->i32);
1030 rel_patch_id = ac_build_phi(&ctx->ac, ctx->i32, 2, values, blocks);
1031
1032 values[0] = tf_lds_offset;
1033 values[1] = LLVMGetUndef(ctx->i32);
1034 tf_lds_offset = ac_build_phi(&ctx->ac, ctx->i32, 2, values, blocks);
1035
1036 values[0] = invocation_id;
1037 values[1] = ctx->i32_1; /* cause the epilog to skip threads */
1038 invocation_id = ac_build_phi(&ctx->ac, ctx->i32, 2, values, blocks);
1039 }
1040
1041 /* Return epilog parameters from this function. */
1042 LLVMValueRef ret = ctx->return_value;
1043 unsigned vgpr;
1044
1045 if (ctx->screen->info.chip_class >= GFX9) {
1046 ret = si_insert_input_ret(ctx, ret, ctx->tcs_offchip_layout,
1047 8 + GFX9_SGPR_TCS_OFFCHIP_LAYOUT);
1048 ret = si_insert_input_ret(ctx, ret, ctx->tcs_out_lds_layout,
1049 8 + GFX9_SGPR_TCS_OUT_LAYOUT);
1050 /* Tess offchip and tess factor offsets are at the beginning. */
1051 ret = si_insert_input_ret(ctx, ret, ctx->tcs_offchip_offset, 2);
1052 ret = si_insert_input_ret(ctx, ret, ctx->tcs_factor_offset, 4);
1053 vgpr = 8 + GFX9_SGPR_TCS_OUT_LAYOUT + 1;
1054 } else {
1055 ret = si_insert_input_ret(ctx, ret, ctx->tcs_offchip_layout,
1056 GFX6_SGPR_TCS_OFFCHIP_LAYOUT);
1057 ret = si_insert_input_ret(ctx, ret, ctx->tcs_out_lds_layout,
1058 GFX6_SGPR_TCS_OUT_LAYOUT);
1059 /* Tess offchip and tess factor offsets are after user SGPRs. */
1060 ret = si_insert_input_ret(ctx, ret, ctx->tcs_offchip_offset,
1061 GFX6_TCS_NUM_USER_SGPR);
1062 ret = si_insert_input_ret(ctx, ret, ctx->tcs_factor_offset,
1063 GFX6_TCS_NUM_USER_SGPR + 1);
1064 vgpr = GFX6_TCS_NUM_USER_SGPR + 2;
1065 }
1066
1067 /* VGPRs */
1068 rel_patch_id = ac_to_float(&ctx->ac, rel_patch_id);
1069 invocation_id = ac_to_float(&ctx->ac, invocation_id);
1070 tf_lds_offset = ac_to_float(&ctx->ac, tf_lds_offset);
1071
1072 /* Leave a hole corresponding to the two input VGPRs. This ensures that
1073 * the invocation_id output does not alias the tcs_rel_ids input,
1074 * which saves a V_MOV on gfx9.
1075 */
1076 vgpr += 2;
1077
1078 ret = LLVMBuildInsertValue(builder, ret, rel_patch_id, vgpr++, "");
1079 ret = LLVMBuildInsertValue(builder, ret, invocation_id, vgpr++, "");
1080
1081 if (ctx->shader->selector->info.tessfactors_are_def_in_all_invocs) {
1082 vgpr++; /* skip the tess factor LDS offset */
1083 for (unsigned i = 0; i < 6; i++) {
1084 LLVMValueRef value =
1085 LLVMBuildLoad(builder, ctx->invoc0_tess_factors[i], "");
1086 value = ac_to_float(&ctx->ac, value);
1087 ret = LLVMBuildInsertValue(builder, ret, value, vgpr++, "");
1088 }
1089 } else {
1090 ret = LLVMBuildInsertValue(builder, ret, tf_lds_offset, vgpr++, "");
1091 }
1092 ctx->return_value = ret;
1093 }
1094
1095 /* Pass TCS inputs from LS to TCS on GFX9. */
1096 static void si_set_ls_return_value_for_tcs(struct si_shader_context *ctx)
1097 {
1098 LLVMValueRef ret = ctx->return_value;
1099
1100 ret = si_insert_input_ptr(ctx, ret, ctx->other_const_and_shader_buffers, 0);
1101 ret = si_insert_input_ptr(ctx, ret, ctx->other_samplers_and_images, 1);
1102 ret = si_insert_input_ret(ctx, ret, ctx->tcs_offchip_offset, 2);
1103 ret = si_insert_input_ret(ctx, ret, ctx->merged_wave_info, 3);
1104 ret = si_insert_input_ret(ctx, ret, ctx->tcs_factor_offset, 4);
1105 ret = si_insert_input_ret(ctx, ret, ctx->merged_scratch_offset, 5);
1106
1107 ret = si_insert_input_ptr(ctx, ret, ctx->rw_buffers,
1108 8 + SI_SGPR_RW_BUFFERS);
1109 ret = si_insert_input_ptr(ctx, ret,
1110 ctx->bindless_samplers_and_images,
1111 8 + SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES);
1112
1113 ret = si_insert_input_ret(ctx, ret, ctx->vs_state_bits,
1114 8 + SI_SGPR_VS_STATE_BITS);
1115
1116 ret = si_insert_input_ret(ctx, ret, ctx->tcs_offchip_layout,
1117 8 + GFX9_SGPR_TCS_OFFCHIP_LAYOUT);
1118 ret = si_insert_input_ret(ctx, ret, ctx->tcs_out_lds_offsets,
1119 8 + GFX9_SGPR_TCS_OUT_OFFSETS);
1120 ret = si_insert_input_ret(ctx, ret, ctx->tcs_out_lds_layout,
1121 8 + GFX9_SGPR_TCS_OUT_LAYOUT);
1122
1123 unsigned vgpr = 8 + GFX9_TCS_NUM_USER_SGPR;
1124 ret = LLVMBuildInsertValue(ctx->ac.builder, ret,
1125 ac_to_float(&ctx->ac,
1126 ac_get_arg(&ctx->ac, ctx->args.tcs_patch_id)),
1127 vgpr++, "");
1128 ret = LLVMBuildInsertValue(ctx->ac.builder, ret,
1129 ac_to_float(&ctx->ac,
1130 ac_get_arg(&ctx->ac, ctx->args.tcs_rel_ids)),
1131 vgpr++, "");
1132 ctx->return_value = ret;
1133 }
1134
1135 void si_llvm_emit_ls_epilogue(struct ac_shader_abi *abi, unsigned max_outputs,
1136 LLVMValueRef *addrs)
1137 {
1138 struct si_shader_context *ctx = si_shader_context_from_abi(abi);
1139 struct si_shader *shader = ctx->shader;
1140 struct si_shader_info *info = &shader->selector->info;
1141 unsigned i, chan;
1142 LLVMValueRef vertex_id = ac_get_arg(&ctx->ac, ctx->rel_auto_id);
1143 LLVMValueRef vertex_dw_stride = get_tcs_in_vertex_dw_stride(ctx);
1144 LLVMValueRef base_dw_addr = LLVMBuildMul(ctx->ac.builder, vertex_id,
1145 vertex_dw_stride, "");
1146
1147 /* Write outputs to LDS. The next shader (TCS aka HS) will read
1148 * its inputs from it. */
1149 for (i = 0; i < info->num_outputs; i++) {
1150 unsigned name = info->output_semantic_name[i];
1151 unsigned index = info->output_semantic_index[i];
1152
1153 /* The ARB_shader_viewport_layer_array spec contains the
1154 * following issue:
1155 *
1156 * 2) What happens if gl_ViewportIndex or gl_Layer is
1157 * written in the vertex shader and a geometry shader is
1158 * present?
1159 *
1160 * RESOLVED: The value written by the last vertex processing
1161 * stage is used. If the last vertex processing stage
1162 * (vertex, tessellation evaluation or geometry) does not
1163 * statically assign to gl_ViewportIndex or gl_Layer, index
1164 * or layer zero is assumed.
1165 *
1166 * So writes to those outputs in VS-as-LS are simply ignored.
1167 */
1168 if (name == TGSI_SEMANTIC_LAYER ||
1169 name == TGSI_SEMANTIC_VIEWPORT_INDEX)
1170 continue;
1171
1172 int param = si_shader_io_get_unique_index(name, index, false);
1173 LLVMValueRef dw_addr = LLVMBuildAdd(ctx->ac.builder, base_dw_addr,
1174 LLVMConstInt(ctx->i32, param * 4, 0), "");
1175
1176 for (chan = 0; chan < 4; chan++) {
1177 if (!(info->output_usagemask[i] & (1 << chan)))
1178 continue;
1179
1180 lshs_lds_store(ctx, chan, dw_addr,
1181 LLVMBuildLoad(ctx->ac.builder, addrs[4 * i + chan], ""));
1182 }
1183 }
1184
1185 if (ctx->screen->info.chip_class >= GFX9)
1186 si_set_ls_return_value_for_tcs(ctx);
1187 }
1188
1189 /**
1190 * Compile the TCS epilog function. This writes tesselation factors to memory
1191 * based on the output primitive type of the tesselator (determined by TES).
1192 */
1193 void si_llvm_build_tcs_epilog(struct si_shader_context *ctx,
1194 union si_shader_part_key *key)
1195 {
1196 memset(&ctx->args, 0, sizeof(ctx->args));
1197
1198 if (ctx->screen->info.chip_class >= GFX9) {
1199 ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
1200 ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
1201 ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT,
1202 &ctx->tcs_offchip_offset);
1203 ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); /* wave info */
1204 ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT,
1205 &ctx->tcs_factor_offset);
1206 ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
1207 ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
1208 ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
1209 ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
1210 ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
1211 ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
1212 ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
1213 ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
1214 ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
1215 ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
1216 ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
1217 ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT,
1218 &ctx->tcs_offchip_layout);
1219 ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
1220 ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT,
1221 &ctx->tcs_out_lds_layout);
1222 } else {
1223 ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
1224 ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
1225 ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
1226 ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
1227 ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT,
1228 &ctx->tcs_offchip_layout);
1229 ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
1230 ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT,
1231 &ctx->tcs_out_lds_layout);
1232 ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
1233 ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT,
1234 &ctx->tcs_offchip_offset);
1235 ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT,
1236 &ctx->tcs_factor_offset);
1237 }
1238
1239 ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* VGPR gap */
1240 ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* VGPR gap */
1241 struct ac_arg rel_patch_id; /* patch index within the wave (REL_PATCH_ID) */
1242 ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &rel_patch_id);
1243 struct ac_arg invocation_id; /* invocation ID within the patch */
1244 ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &invocation_id);
1245 struct ac_arg tcs_out_current_patch_data_offset; /* LDS offset where tess factors should be loaded from */
1246 ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT,
1247 &tcs_out_current_patch_data_offset);
1248
1249 struct ac_arg tess_factors[6];
1250 for (unsigned i = 0; i < 6; i++)
1251 ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &tess_factors[i]);
1252
1253 /* Create the function. */
1254 si_llvm_create_func(ctx, "tcs_epilog", NULL, 0,
1255 ctx->screen->info.chip_class >= GFX7 ? 128 : 0);
1256 ac_declare_lds_as_pointer(&ctx->ac);
1257
1258 LLVMValueRef invoc0_tess_factors[6];
1259 for (unsigned i = 0; i < 6; i++)
1260 invoc0_tess_factors[i] = ac_get_arg(&ctx->ac, tess_factors[i]);
1261
1262 si_write_tess_factors(ctx,
1263 ac_get_arg(&ctx->ac, rel_patch_id),
1264 ac_get_arg(&ctx->ac, invocation_id),
1265 ac_get_arg(&ctx->ac, tcs_out_current_patch_data_offset),
1266 invoc0_tess_factors, invoc0_tess_factors + 4);
1267
1268 LLVMBuildRetVoid(ctx->ac.builder);
1269 }
1270
1271 void si_llvm_init_tcs_callbacks(struct si_shader_context *ctx)
1272 {
1273 ctx->abi.load_tess_varyings = si_nir_load_tcs_varyings;
1274 ctx->abi.load_tess_level = si_load_tess_level;
1275 ctx->abi.store_tcs_outputs = si_nir_store_output_tcs;
1276 ctx->abi.emit_outputs = si_llvm_emit_tcs_epilogue;
1277 ctx->abi.load_patch_vertices_in = si_load_patch_vertices_in;
1278 }
1279
1280 void si_llvm_init_tes_callbacks(struct si_shader_context *ctx)
1281 {
1282 ctx->abi.load_tess_varyings = si_nir_load_input_tes;
1283 ctx->abi.load_tess_coord = si_load_tess_coord;
1284 ctx->abi.load_tess_level = si_load_tess_level;
1285 ctx->abi.load_patch_vertices_in = si_load_patch_vertices_in;
1286 }