amd/registers: switch to new generated register definitions
[mesa.git] / src / gallium / drivers / radeonsi / si_shader_llvm_tess.c
1 /*
2 * Copyright 2020 Advanced Micro Devices, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 #include "si_pipe.h"
26 #include "si_shader_internal.h"
27 #include "sid.h"
28
29 static LLVMValueRef get_rel_patch_id(struct si_shader_context *ctx)
30 {
31 switch (ctx->type) {
32 case PIPE_SHADER_TESS_CTRL:
33 return si_unpack_param(ctx, ctx->args.tcs_rel_ids, 0, 8);
34
35 case PIPE_SHADER_TESS_EVAL:
36 return ac_get_arg(&ctx->ac, ctx->tes_rel_patch_id);
37
38 default:
39 assert(0);
40 return NULL;
41 }
42 }
43
44 /* Tessellation shaders pass outputs to the next shader using LDS.
45 *
46 * LS outputs = TCS inputs
47 * TCS outputs = TES inputs
48 *
49 * The LDS layout is:
50 * - TCS inputs for patch 0
51 * - TCS inputs for patch 1
52 * - TCS inputs for patch 2 = get_tcs_in_current_patch_offset (if RelPatchID==2)
53 * - ...
54 * - TCS outputs for patch 0 = get_tcs_out_patch0_offset
55 * - Per-patch TCS outputs for patch 0 = get_tcs_out_patch0_patch_data_offset
56 * - TCS outputs for patch 1
57 * - Per-patch TCS outputs for patch 1
58 * - TCS outputs for patch 2 = get_tcs_out_current_patch_offset (if RelPatchID==2)
59 * - Per-patch TCS outputs for patch 2 = get_tcs_out_current_patch_data_offset (if RelPatchID==2)
60 * - ...
61 *
62 * All three shaders VS(LS), TCS, TES share the same LDS space.
63 */
64
65 static LLVMValueRef get_tcs_in_patch_stride(struct si_shader_context *ctx)
66 {
67 return si_unpack_param(ctx, ctx->vs_state_bits, 11, 13);
68 }
69
70 static unsigned get_tcs_out_vertex_dw_stride_constant(struct si_shader_context *ctx)
71 {
72 assert(ctx->type == PIPE_SHADER_TESS_CTRL);
73
74 if (ctx->shader->key.mono.u.ff_tcs_inputs_to_copy)
75 return util_last_bit64(ctx->shader->key.mono.u.ff_tcs_inputs_to_copy) * 4;
76
77 return util_last_bit64(ctx->shader->selector->outputs_written) * 4;
78 }
79
80 static LLVMValueRef get_tcs_out_vertex_dw_stride(struct si_shader_context *ctx)
81 {
82 unsigned stride = get_tcs_out_vertex_dw_stride_constant(ctx);
83
84 return LLVMConstInt(ctx->ac.i32, stride, 0);
85 }
86
87 static LLVMValueRef get_tcs_out_patch_stride(struct si_shader_context *ctx)
88 {
89 if (ctx->shader->key.mono.u.ff_tcs_inputs_to_copy)
90 return si_unpack_param(ctx, ctx->tcs_out_lds_layout, 0, 13);
91
92 const struct si_shader_info *info = &ctx->shader->selector->info;
93 unsigned tcs_out_vertices = info->properties[TGSI_PROPERTY_TCS_VERTICES_OUT];
94 unsigned vertex_dw_stride = get_tcs_out_vertex_dw_stride_constant(ctx);
95 unsigned num_patch_outputs = util_last_bit64(ctx->shader->selector->patch_outputs_written);
96 unsigned patch_dw_stride = tcs_out_vertices * vertex_dw_stride + num_patch_outputs * 4;
97 return LLVMConstInt(ctx->ac.i32, patch_dw_stride, 0);
98 }
99
100 static LLVMValueRef get_tcs_out_patch0_offset(struct si_shader_context *ctx)
101 {
102 return LLVMBuildMul(ctx->ac.builder, si_unpack_param(ctx, ctx->tcs_out_lds_offsets, 0, 16),
103 LLVMConstInt(ctx->ac.i32, 4, 0), "");
104 }
105
106 static LLVMValueRef get_tcs_out_patch0_patch_data_offset(struct si_shader_context *ctx)
107 {
108 return LLVMBuildMul(ctx->ac.builder, si_unpack_param(ctx, ctx->tcs_out_lds_offsets, 16, 16),
109 LLVMConstInt(ctx->ac.i32, 4, 0), "");
110 }
111
112 static LLVMValueRef get_tcs_in_current_patch_offset(struct si_shader_context *ctx)
113 {
114 LLVMValueRef patch_stride = get_tcs_in_patch_stride(ctx);
115 LLVMValueRef rel_patch_id = get_rel_patch_id(ctx);
116
117 return LLVMBuildMul(ctx->ac.builder, patch_stride, rel_patch_id, "");
118 }
119
120 static LLVMValueRef get_tcs_out_current_patch_offset(struct si_shader_context *ctx)
121 {
122 LLVMValueRef patch0_offset = get_tcs_out_patch0_offset(ctx);
123 LLVMValueRef patch_stride = get_tcs_out_patch_stride(ctx);
124 LLVMValueRef rel_patch_id = get_rel_patch_id(ctx);
125
126 return ac_build_imad(&ctx->ac, patch_stride, rel_patch_id, patch0_offset);
127 }
128
129 static LLVMValueRef get_tcs_out_current_patch_data_offset(struct si_shader_context *ctx)
130 {
131 LLVMValueRef patch0_patch_data_offset = get_tcs_out_patch0_patch_data_offset(ctx);
132 LLVMValueRef patch_stride = get_tcs_out_patch_stride(ctx);
133 LLVMValueRef rel_patch_id = get_rel_patch_id(ctx);
134
135 return ac_build_imad(&ctx->ac, patch_stride, rel_patch_id, patch0_patch_data_offset);
136 }
137
138 static LLVMValueRef get_num_tcs_out_vertices(struct si_shader_context *ctx)
139 {
140 unsigned tcs_out_vertices =
141 ctx->shader->selector ? ctx->shader->selector->info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT]
142 : 0;
143
144 /* If !tcs_out_vertices, it's either the fixed-func TCS or the TCS epilog. */
145 if (ctx->type == PIPE_SHADER_TESS_CTRL && tcs_out_vertices)
146 return LLVMConstInt(ctx->ac.i32, tcs_out_vertices, 0);
147
148 return si_unpack_param(ctx, ctx->tcs_offchip_layout, 6, 6);
149 }
150
151 static LLVMValueRef get_tcs_in_vertex_dw_stride(struct si_shader_context *ctx)
152 {
153 unsigned stride;
154
155 switch (ctx->type) {
156 case PIPE_SHADER_VERTEX:
157 stride = ctx->shader->selector->lshs_vertex_stride / 4;
158 return LLVMConstInt(ctx->ac.i32, stride, 0);
159
160 case PIPE_SHADER_TESS_CTRL:
161 if (ctx->screen->info.chip_class >= GFX9 && ctx->shader->is_monolithic) {
162 stride = ctx->shader->key.part.tcs.ls->lshs_vertex_stride / 4;
163 return LLVMConstInt(ctx->ac.i32, stride, 0);
164 }
165 return si_unpack_param(ctx, ctx->vs_state_bits, 24, 8);
166
167 default:
168 assert(0);
169 return NULL;
170 }
171 }
172
173 static LLVMValueRef
174 get_dw_address_from_generic_indices(struct si_shader_context *ctx, LLVMValueRef vertex_dw_stride,
175 LLVMValueRef base_addr, LLVMValueRef vertex_index,
176 LLVMValueRef param_index, ubyte name, ubyte index)
177 {
178 if (vertex_dw_stride) {
179 base_addr = ac_build_imad(&ctx->ac, vertex_index, vertex_dw_stride, base_addr);
180 }
181
182 if (param_index) {
183 base_addr = ac_build_imad(&ctx->ac, param_index, LLVMConstInt(ctx->ac.i32, 4, 0), base_addr);
184 }
185
186 int param = name == TGSI_SEMANTIC_PATCH || name == TGSI_SEMANTIC_TESSINNER ||
187 name == TGSI_SEMANTIC_TESSOUTER
188 ? si_shader_io_get_unique_index_patch(name, index)
189 : si_shader_io_get_unique_index(name, index, false);
190
191 /* Add the base address of the element. */
192 return LLVMBuildAdd(ctx->ac.builder, base_addr, LLVMConstInt(ctx->ac.i32, param * 4, 0), "");
193 }
194
195 /* The offchip buffer layout for TCS->TES is
196 *
197 * - attribute 0 of patch 0 vertex 0
198 * - attribute 0 of patch 0 vertex 1
199 * - attribute 0 of patch 0 vertex 2
200 * ...
201 * - attribute 0 of patch 1 vertex 0
202 * - attribute 0 of patch 1 vertex 1
203 * ...
204 * - attribute 1 of patch 0 vertex 0
205 * - attribute 1 of patch 0 vertex 1
206 * ...
207 * - per patch attribute 0 of patch 0
208 * - per patch attribute 0 of patch 1
209 * ...
210 *
211 * Note that every attribute has 4 components.
212 */
213 static LLVMValueRef get_tcs_tes_buffer_address(struct si_shader_context *ctx,
214 LLVMValueRef rel_patch_id, LLVMValueRef vertex_index,
215 LLVMValueRef param_index)
216 {
217 LLVMValueRef base_addr, vertices_per_patch, num_patches, total_vertices;
218 LLVMValueRef param_stride, constant16;
219
220 vertices_per_patch = get_num_tcs_out_vertices(ctx);
221 num_patches = si_unpack_param(ctx, ctx->tcs_offchip_layout, 0, 6);
222 total_vertices = LLVMBuildMul(ctx->ac.builder, vertices_per_patch, num_patches, "");
223
224 constant16 = LLVMConstInt(ctx->ac.i32, 16, 0);
225 if (vertex_index) {
226 base_addr = ac_build_imad(&ctx->ac, rel_patch_id, vertices_per_patch, vertex_index);
227 param_stride = total_vertices;
228 } else {
229 base_addr = rel_patch_id;
230 param_stride = num_patches;
231 }
232
233 base_addr = ac_build_imad(&ctx->ac, param_index, param_stride, base_addr);
234 base_addr = LLVMBuildMul(ctx->ac.builder, base_addr, constant16, "");
235
236 if (!vertex_index) {
237 LLVMValueRef patch_data_offset = si_unpack_param(ctx, ctx->tcs_offchip_layout, 12, 20);
238
239 base_addr = LLVMBuildAdd(ctx->ac.builder, base_addr, patch_data_offset, "");
240 }
241 return base_addr;
242 }
243
244 static LLVMValueRef get_tcs_tes_buffer_address_from_generic_indices(struct si_shader_context *ctx,
245 LLVMValueRef vertex_index,
246 LLVMValueRef param_index,
247 ubyte name, ubyte index)
248 {
249 unsigned param_index_base;
250
251 param_index_base = name == TGSI_SEMANTIC_PATCH || name == TGSI_SEMANTIC_TESSINNER ||
252 name == TGSI_SEMANTIC_TESSOUTER
253 ? si_shader_io_get_unique_index_patch(name, index)
254 : si_shader_io_get_unique_index(name, index, false);
255
256 if (param_index) {
257 param_index = LLVMBuildAdd(ctx->ac.builder, param_index,
258 LLVMConstInt(ctx->ac.i32, param_index_base, 0), "");
259 } else {
260 param_index = LLVMConstInt(ctx->ac.i32, param_index_base, 0);
261 }
262
263 return get_tcs_tes_buffer_address(ctx, get_rel_patch_id(ctx), vertex_index, param_index);
264 }
265
266 static LLVMValueRef buffer_load(struct si_shader_context *ctx, LLVMTypeRef type, unsigned swizzle,
267 LLVMValueRef buffer, LLVMValueRef offset, LLVMValueRef base,
268 bool can_speculate)
269 {
270 LLVMValueRef value, value2;
271 LLVMTypeRef vec_type = LLVMVectorType(type, 4);
272
273 if (swizzle == ~0) {
274 value = ac_build_buffer_load(&ctx->ac, buffer, 4, NULL, base, offset, 0, ac_glc,
275 can_speculate, false);
276
277 return LLVMBuildBitCast(ctx->ac.builder, value, vec_type, "");
278 }
279
280 if (ac_get_type_size(type) != 8) {
281 value = ac_build_buffer_load(&ctx->ac, buffer, 4, NULL, base, offset, 0, ac_glc,
282 can_speculate, false);
283
284 value = LLVMBuildBitCast(ctx->ac.builder, value, vec_type, "");
285 return LLVMBuildExtractElement(ctx->ac.builder, value, LLVMConstInt(ctx->ac.i32, swizzle, 0),
286 "");
287 }
288
289 value = ac_build_buffer_load(&ctx->ac, buffer, 1, NULL, base, offset, swizzle * 4, ac_glc,
290 can_speculate, false);
291
292 value2 = ac_build_buffer_load(&ctx->ac, buffer, 1, NULL, base, offset, swizzle * 4 + 4, ac_glc,
293 can_speculate, false);
294
295 return si_build_gather_64bit(ctx, type, value, value2);
296 }
297
298 /**
299 * Load from LSHS LDS storage.
300 *
301 * \param type output value type
302 * \param swizzle offset (typically 0..3); it can be ~0, which loads a vec4
303 * \param dw_addr address in dwords
304 */
305 static LLVMValueRef lshs_lds_load(struct si_shader_context *ctx, LLVMTypeRef type, unsigned swizzle,
306 LLVMValueRef dw_addr)
307 {
308 LLVMValueRef value;
309
310 if (swizzle == ~0) {
311 LLVMValueRef values[4];
312
313 for (unsigned chan = 0; chan < 4; chan++)
314 values[chan] = lshs_lds_load(ctx, type, chan, dw_addr);
315
316 return ac_build_gather_values(&ctx->ac, values, 4);
317 }
318
319 /* Split 64-bit loads. */
320 if (ac_get_type_size(type) == 8) {
321 LLVMValueRef lo, hi;
322
323 lo = lshs_lds_load(ctx, ctx->ac.i32, swizzle, dw_addr);
324 hi = lshs_lds_load(ctx, ctx->ac.i32, swizzle + 1, dw_addr);
325 return si_build_gather_64bit(ctx, type, lo, hi);
326 }
327
328 dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr, LLVMConstInt(ctx->ac.i32, swizzle, 0), "");
329
330 value = ac_lds_load(&ctx->ac, dw_addr);
331
332 return LLVMBuildBitCast(ctx->ac.builder, value, type, "");
333 }
334
335 /**
336 * Store to LSHS LDS storage.
337 *
338 * \param swizzle offset (typically 0..3)
339 * \param dw_addr address in dwords
340 * \param value value to store
341 */
342 static void lshs_lds_store(struct si_shader_context *ctx, unsigned dw_offset_imm,
343 LLVMValueRef dw_addr, LLVMValueRef value)
344 {
345 dw_addr =
346 LLVMBuildAdd(ctx->ac.builder, dw_addr, LLVMConstInt(ctx->ac.i32, dw_offset_imm, 0), "");
347
348 ac_lds_store(&ctx->ac, dw_addr, value);
349 }
350
351 enum si_tess_ring
352 {
353 TCS_FACTOR_RING,
354 TESS_OFFCHIP_RING_TCS,
355 TESS_OFFCHIP_RING_TES,
356 };
357
358 static LLVMValueRef get_tess_ring_descriptor(struct si_shader_context *ctx, enum si_tess_ring ring)
359 {
360 LLVMBuilderRef builder = ctx->ac.builder;
361 LLVMValueRef addr = ac_get_arg(
362 &ctx->ac, ring == TESS_OFFCHIP_RING_TES ? ctx->tes_offchip_addr : ctx->tcs_out_lds_layout);
363
364 /* TCS only receives high 13 bits of the address. */
365 if (ring == TESS_OFFCHIP_RING_TCS || ring == TCS_FACTOR_RING) {
366 addr = LLVMBuildAnd(builder, addr, LLVMConstInt(ctx->ac.i32, 0xfff80000, 0), "");
367 }
368
369 if (ring == TCS_FACTOR_RING) {
370 unsigned tf_offset = ctx->screen->tess_offchip_ring_size;
371 addr = LLVMBuildAdd(builder, addr, LLVMConstInt(ctx->ac.i32, tf_offset, 0), "");
372 }
373
374 uint32_t rsrc3 = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
375 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
376
377 if (ctx->screen->info.chip_class >= GFX10)
378 rsrc3 |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
379 S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1);
380 else
381 rsrc3 |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
382 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
383
384 LLVMValueRef desc[4];
385 desc[0] = addr;
386 desc[1] = LLVMConstInt(ctx->ac.i32, S_008F04_BASE_ADDRESS_HI(ctx->screen->info.address32_hi), 0);
387 desc[2] = LLVMConstInt(ctx->ac.i32, 0xffffffff, 0);
388 desc[3] = LLVMConstInt(ctx->ac.i32, rsrc3, false);
389
390 return ac_build_gather_values(&ctx->ac, desc, 4);
391 }
392
393 void si_llvm_preload_tes_rings(struct si_shader_context *ctx)
394 {
395 ctx->tess_offchip_ring = get_tess_ring_descriptor(ctx, TESS_OFFCHIP_RING_TES);
396 }
397
398 static LLVMValueRef si_nir_load_tcs_varyings(struct ac_shader_abi *abi, LLVMTypeRef type,
399 LLVMValueRef vertex_index, LLVMValueRef param_index,
400 unsigned const_index, unsigned location,
401 unsigned driver_location, unsigned component,
402 unsigned num_components, bool is_patch,
403 bool is_compact, bool load_input)
404 {
405 struct si_shader_context *ctx = si_shader_context_from_abi(abi);
406 struct si_shader_info *info = &ctx->shader->selector->info;
407 LLVMValueRef dw_addr, stride;
408 ubyte name, index;
409
410 driver_location = driver_location / 4;
411
412 if (load_input) {
413 name = info->input_semantic_name[driver_location];
414 index = info->input_semantic_index[driver_location];
415 } else {
416 name = info->output_semantic_name[driver_location];
417 index = info->output_semantic_index[driver_location];
418 }
419
420 assert((name == TGSI_SEMANTIC_PATCH || name == TGSI_SEMANTIC_TESSINNER ||
421 name == TGSI_SEMANTIC_TESSOUTER) == is_patch);
422
423 if (load_input) {
424 stride = get_tcs_in_vertex_dw_stride(ctx);
425 dw_addr = get_tcs_in_current_patch_offset(ctx);
426 } else {
427 if (is_patch) {
428 stride = NULL;
429 dw_addr = get_tcs_out_current_patch_data_offset(ctx);
430 } else {
431 stride = get_tcs_out_vertex_dw_stride(ctx);
432 dw_addr = get_tcs_out_current_patch_offset(ctx);
433 }
434 }
435
436 if (!param_index) {
437 param_index = LLVMConstInt(ctx->ac.i32, const_index, 0);
438 }
439
440 dw_addr = get_dw_address_from_generic_indices(ctx, stride, dw_addr, vertex_index, param_index,
441 name, index);
442
443 LLVMValueRef value[4];
444 for (unsigned i = 0; i < num_components; i++) {
445 unsigned offset = i;
446 if (ac_get_type_size(type) == 8)
447 offset *= 2;
448
449 offset += component;
450 value[i + component] = lshs_lds_load(ctx, type, offset, dw_addr);
451 }
452
453 return ac_build_varying_gather_values(&ctx->ac, value, num_components, component);
454 }
455
456 static LLVMValueRef si_nir_load_input_tes(struct ac_shader_abi *abi, LLVMTypeRef type,
457 LLVMValueRef vertex_index, LLVMValueRef param_index,
458 unsigned const_index, unsigned location,
459 unsigned driver_location, unsigned component,
460 unsigned num_components, bool is_patch, bool is_compact,
461 bool load_input)
462 {
463 struct si_shader_context *ctx = si_shader_context_from_abi(abi);
464 struct si_shader_info *info = &ctx->shader->selector->info;
465 LLVMValueRef base, addr;
466
467 driver_location = driver_location / 4;
468 ubyte name = info->input_semantic_name[driver_location];
469 ubyte index = info->input_semantic_index[driver_location];
470
471 assert((name == TGSI_SEMANTIC_PATCH || name == TGSI_SEMANTIC_TESSINNER ||
472 name == TGSI_SEMANTIC_TESSOUTER) == is_patch);
473
474 base = ac_get_arg(&ctx->ac, ctx->tcs_offchip_offset);
475
476 if (!param_index) {
477 param_index = LLVMConstInt(ctx->ac.i32, const_index, 0);
478 }
479
480 addr =
481 get_tcs_tes_buffer_address_from_generic_indices(ctx, vertex_index, param_index, name, index);
482
483 /* TODO: This will generate rather ordinary llvm code, although it
484 * should be easy for the optimiser to fix up. In future we might want
485 * to refactor buffer_load().
486 */
487 LLVMValueRef value[4];
488 for (unsigned i = 0; i < num_components; i++) {
489 unsigned offset = i;
490 if (ac_get_type_size(type) == 8) {
491 offset *= 2;
492 if (offset == 4) {
493 ubyte name = info->input_semantic_name[driver_location + 1];
494 ubyte index = info->input_semantic_index[driver_location + 1];
495 addr = get_tcs_tes_buffer_address_from_generic_indices(ctx, vertex_index, param_index,
496 name, index);
497 }
498
499 offset = offset % 4;
500 }
501
502 offset += component;
503 value[i + component] =
504 buffer_load(ctx, type, offset, ctx->tess_offchip_ring, base, addr, true);
505 }
506
507 return ac_build_varying_gather_values(&ctx->ac, value, num_components, component);
508 }
509
510 static void si_nir_store_output_tcs(struct ac_shader_abi *abi, const struct nir_variable *var,
511 LLVMValueRef vertex_index, LLVMValueRef param_index,
512 unsigned const_index, LLVMValueRef src, unsigned writemask)
513 {
514 struct si_shader_context *ctx = si_shader_context_from_abi(abi);
515 struct si_shader_info *info = &ctx->shader->selector->info;
516 unsigned component = var->data.location_frac;
517 unsigned driver_location = var->data.driver_location;
518 LLVMValueRef dw_addr, stride;
519 LLVMValueRef buffer, base, addr;
520 LLVMValueRef values[8];
521 bool skip_lds_store;
522 bool is_tess_factor = false, is_tess_inner = false;
523
524 if (var->data.compact) {
525 component += const_index;
526 writemask <<= const_index;
527 const_index = 0;
528 }
529
530 driver_location = driver_location / 4;
531 ubyte name = info->output_semantic_name[driver_location];
532 ubyte index = info->output_semantic_index[driver_location];
533
534 bool is_const = !param_index;
535 if (!param_index)
536 param_index = LLVMConstInt(ctx->ac.i32, const_index, 0);
537
538 const bool is_patch = var->data.patch || var->data.location == VARYING_SLOT_TESS_LEVEL_INNER ||
539 var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER;
540
541 /* Invalid SPIR-V can cause this. */
542 if ((name == TGSI_SEMANTIC_PATCH || name == TGSI_SEMANTIC_TESSINNER ||
543 name == TGSI_SEMANTIC_TESSOUTER) != is_patch)
544 return;
545
546 if (!is_patch) {
547 stride = get_tcs_out_vertex_dw_stride(ctx);
548 dw_addr = get_tcs_out_current_patch_offset(ctx);
549 dw_addr = get_dw_address_from_generic_indices(ctx, stride, dw_addr, vertex_index, param_index,
550 name, index);
551
552 skip_lds_store = !info->reads_pervertex_outputs;
553 } else {
554 dw_addr = get_tcs_out_current_patch_data_offset(ctx);
555 dw_addr = get_dw_address_from_generic_indices(ctx, NULL, dw_addr, vertex_index, param_index,
556 name, index);
557
558 skip_lds_store = !info->reads_perpatch_outputs;
559
560 if (is_const && const_index == 0) {
561 int name = info->output_semantic_name[driver_location];
562
563 /* Always write tess factors into LDS for the TCS epilog. */
564 if (name == TGSI_SEMANTIC_TESSINNER || name == TGSI_SEMANTIC_TESSOUTER) {
565 /* The epilog doesn't read LDS if invocation 0 defines tess factors. */
566 skip_lds_store = !info->reads_tessfactor_outputs &&
567 ctx->shader->selector->info.tessfactors_are_def_in_all_invocs;
568 is_tess_factor = true;
569 is_tess_inner = name == TGSI_SEMANTIC_TESSINNER;
570 }
571 }
572 }
573
574 buffer = get_tess_ring_descriptor(ctx, TESS_OFFCHIP_RING_TCS);
575
576 base = ac_get_arg(&ctx->ac, ctx->tcs_offchip_offset);
577
578 addr =
579 get_tcs_tes_buffer_address_from_generic_indices(ctx, vertex_index, param_index, name, index);
580
581 for (unsigned chan = component; chan < 8; chan++) {
582 if (!(writemask & (1 << chan)))
583 continue;
584 LLVMValueRef value = ac_llvm_extract_elem(&ctx->ac, src, chan - component);
585
586 unsigned buffer_store_offset = chan % 4;
587 if (chan == 4) {
588 ubyte name = info->output_semantic_name[driver_location + 1];
589 ubyte index = info->output_semantic_index[driver_location + 1];
590 addr = get_tcs_tes_buffer_address_from_generic_indices(ctx, vertex_index, param_index,
591 name, index);
592 }
593
594 /* Skip LDS stores if there is no LDS read of this output. */
595 if (!skip_lds_store)
596 lshs_lds_store(ctx, chan, dw_addr, value);
597
598 value = ac_to_integer(&ctx->ac, value);
599 values[chan] = value;
600
601 if (writemask != 0xF && !is_tess_factor) {
602 ac_build_buffer_store_dword(&ctx->ac, buffer, value, 1, addr, base,
603 4 * buffer_store_offset, ac_glc);
604 }
605
606 /* Write tess factors into VGPRs for the epilog. */
607 if (is_tess_factor && ctx->shader->selector->info.tessfactors_are_def_in_all_invocs) {
608 if (!is_tess_inner) {
609 LLVMBuildStore(ctx->ac.builder, value, /* outer */
610 ctx->invoc0_tess_factors[chan]);
611 } else if (chan < 2) {
612 LLVMBuildStore(ctx->ac.builder, value, /* inner */
613 ctx->invoc0_tess_factors[4 + chan]);
614 }
615 }
616 }
617
618 if (writemask == 0xF && !is_tess_factor) {
619 LLVMValueRef value = ac_build_gather_values(&ctx->ac, values, 4);
620 ac_build_buffer_store_dword(&ctx->ac, buffer, value, 4, addr, base, 0, ac_glc);
621 }
622 }
623
624 static LLVMValueRef si_load_tess_coord(struct ac_shader_abi *abi)
625 {
626 struct si_shader_context *ctx = si_shader_context_from_abi(abi);
627 LLVMValueRef coord[4] = {ac_get_arg(&ctx->ac, ctx->tes_u), ac_get_arg(&ctx->ac, ctx->tes_v),
628 ctx->ac.f32_0, ctx->ac.f32_0};
629
630 /* For triangles, the vector should be (u, v, 1-u-v). */
631 if (ctx->shader->selector->info.properties[TGSI_PROPERTY_TES_PRIM_MODE] == PIPE_PRIM_TRIANGLES) {
632 coord[2] = LLVMBuildFSub(ctx->ac.builder, ctx->ac.f32_1,
633 LLVMBuildFAdd(ctx->ac.builder, coord[0], coord[1], ""), "");
634 }
635 return ac_build_gather_values(&ctx->ac, coord, 4);
636 }
637
638 static LLVMValueRef load_tess_level(struct si_shader_context *ctx, unsigned semantic_name)
639 {
640 LLVMValueRef base, addr;
641
642 int param = si_shader_io_get_unique_index_patch(semantic_name, 0);
643
644 base = ac_get_arg(&ctx->ac, ctx->tcs_offchip_offset);
645 addr = get_tcs_tes_buffer_address(ctx, get_rel_patch_id(ctx), NULL,
646 LLVMConstInt(ctx->ac.i32, param, 0));
647
648 return buffer_load(ctx, ctx->ac.f32, ~0, ctx->tess_offchip_ring, base, addr, true);
649 }
650
651 static LLVMValueRef load_tess_level_default(struct si_shader_context *ctx, unsigned semantic_name)
652 {
653 LLVMValueRef buf, slot, val[4];
654 int i, offset;
655
656 slot = LLVMConstInt(ctx->ac.i32, SI_HS_CONST_DEFAULT_TESS_LEVELS, 0);
657 buf = ac_get_arg(&ctx->ac, ctx->rw_buffers);
658 buf = ac_build_load_to_sgpr(&ctx->ac, buf, slot);
659 offset = semantic_name == TGSI_SEMANTIC_TESS_DEFAULT_INNER_LEVEL ? 4 : 0;
660
661 for (i = 0; i < 4; i++)
662 val[i] = si_buffer_load_const(ctx, buf, LLVMConstInt(ctx->ac.i32, (offset + i) * 4, 0));
663 return ac_build_gather_values(&ctx->ac, val, 4);
664 }
665
666 static LLVMValueRef si_load_tess_level(struct ac_shader_abi *abi, unsigned varying_id,
667 bool load_default_state)
668 {
669 struct si_shader_context *ctx = si_shader_context_from_abi(abi);
670 unsigned semantic_name;
671
672 if (load_default_state) {
673 switch (varying_id) {
674 case VARYING_SLOT_TESS_LEVEL_INNER:
675 semantic_name = TGSI_SEMANTIC_TESS_DEFAULT_INNER_LEVEL;
676 break;
677 case VARYING_SLOT_TESS_LEVEL_OUTER:
678 semantic_name = TGSI_SEMANTIC_TESS_DEFAULT_OUTER_LEVEL;
679 break;
680 default:
681 unreachable("unknown tess level");
682 }
683 return load_tess_level_default(ctx, semantic_name);
684 }
685
686 switch (varying_id) {
687 case VARYING_SLOT_TESS_LEVEL_INNER:
688 semantic_name = TGSI_SEMANTIC_TESSINNER;
689 break;
690 case VARYING_SLOT_TESS_LEVEL_OUTER:
691 semantic_name = TGSI_SEMANTIC_TESSOUTER;
692 break;
693 default:
694 unreachable("unknown tess level");
695 }
696
697 return load_tess_level(ctx, semantic_name);
698 }
699
700 static LLVMValueRef si_load_patch_vertices_in(struct ac_shader_abi *abi)
701 {
702 struct si_shader_context *ctx = si_shader_context_from_abi(abi);
703 if (ctx->type == PIPE_SHADER_TESS_CTRL)
704 return si_unpack_param(ctx, ctx->tcs_out_lds_layout, 13, 6);
705 else if (ctx->type == PIPE_SHADER_TESS_EVAL)
706 return get_num_tcs_out_vertices(ctx);
707 else
708 unreachable("invalid shader stage for TGSI_SEMANTIC_VERTICESIN");
709 }
710
711 /**
712 * Forward all outputs from the vertex shader to the TES. This is only used
713 * for the fixed function TCS.
714 */
715 static void si_copy_tcs_inputs(struct si_shader_context *ctx)
716 {
717 LLVMValueRef invocation_id, buffer, buffer_offset;
718 LLVMValueRef lds_vertex_stride, lds_base;
719 uint64_t inputs;
720
721 invocation_id = si_unpack_param(ctx, ctx->args.tcs_rel_ids, 8, 5);
722 buffer = get_tess_ring_descriptor(ctx, TESS_OFFCHIP_RING_TCS);
723 buffer_offset = ac_get_arg(&ctx->ac, ctx->tcs_offchip_offset);
724
725 lds_vertex_stride = get_tcs_in_vertex_dw_stride(ctx);
726 lds_base = get_tcs_in_current_patch_offset(ctx);
727 lds_base = ac_build_imad(&ctx->ac, invocation_id, lds_vertex_stride, lds_base);
728
729 inputs = ctx->shader->key.mono.u.ff_tcs_inputs_to_copy;
730 while (inputs) {
731 unsigned i = u_bit_scan64(&inputs);
732
733 LLVMValueRef lds_ptr =
734 LLVMBuildAdd(ctx->ac.builder, lds_base, LLVMConstInt(ctx->ac.i32, 4 * i, 0), "");
735
736 LLVMValueRef buffer_addr = get_tcs_tes_buffer_address(
737 ctx, get_rel_patch_id(ctx), invocation_id, LLVMConstInt(ctx->ac.i32, i, 0));
738
739 LLVMValueRef value = lshs_lds_load(ctx, ctx->ac.i32, ~0, lds_ptr);
740
741 ac_build_buffer_store_dword(&ctx->ac, buffer, value, 4, buffer_addr, buffer_offset, 0,
742 ac_glc);
743 }
744 }
745
746 static void si_write_tess_factors(struct si_shader_context *ctx, LLVMValueRef rel_patch_id,
747 LLVMValueRef invocation_id,
748 LLVMValueRef tcs_out_current_patch_data_offset,
749 LLVMValueRef invoc0_tf_outer[4], LLVMValueRef invoc0_tf_inner[2])
750 {
751 struct si_shader *shader = ctx->shader;
752 unsigned tess_inner_index, tess_outer_index;
753 LLVMValueRef lds_base, lds_inner, lds_outer, byteoffset, buffer;
754 LLVMValueRef out[6], vec0, vec1, tf_base, inner[4], outer[4];
755 unsigned stride, outer_comps, inner_comps, i, offset;
756
757 /* Add a barrier before loading tess factors from LDS. */
758 if (!shader->key.part.tcs.epilog.invoc0_tess_factors_are_def)
759 si_llvm_emit_barrier(ctx);
760
761 /* Do this only for invocation 0, because the tess levels are per-patch,
762 * not per-vertex.
763 *
764 * This can't jump, because invocation 0 executes this. It should
765 * at least mask out the loads and stores for other invocations.
766 */
767 ac_build_ifcc(&ctx->ac,
768 LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, invocation_id, ctx->ac.i32_0, ""), 6503);
769
770 /* Determine the layout of one tess factor element in the buffer. */
771 switch (shader->key.part.tcs.epilog.prim_mode) {
772 case PIPE_PRIM_LINES:
773 stride = 2; /* 2 dwords, 1 vec2 store */
774 outer_comps = 2;
775 inner_comps = 0;
776 break;
777 case PIPE_PRIM_TRIANGLES:
778 stride = 4; /* 4 dwords, 1 vec4 store */
779 outer_comps = 3;
780 inner_comps = 1;
781 break;
782 case PIPE_PRIM_QUADS:
783 stride = 6; /* 6 dwords, 2 stores (vec4 + vec2) */
784 outer_comps = 4;
785 inner_comps = 2;
786 break;
787 default:
788 assert(0);
789 return;
790 }
791
792 for (i = 0; i < 4; i++) {
793 inner[i] = LLVMGetUndef(ctx->ac.i32);
794 outer[i] = LLVMGetUndef(ctx->ac.i32);
795 }
796
797 if (shader->key.part.tcs.epilog.invoc0_tess_factors_are_def) {
798 /* Tess factors are in VGPRs. */
799 for (i = 0; i < outer_comps; i++)
800 outer[i] = out[i] = invoc0_tf_outer[i];
801 for (i = 0; i < inner_comps; i++)
802 inner[i] = out[outer_comps + i] = invoc0_tf_inner[i];
803 } else {
804 /* Load tess_inner and tess_outer from LDS.
805 * Any invocation can write them, so we can't get them from a temporary.
806 */
807 tess_inner_index = si_shader_io_get_unique_index_patch(TGSI_SEMANTIC_TESSINNER, 0);
808 tess_outer_index = si_shader_io_get_unique_index_patch(TGSI_SEMANTIC_TESSOUTER, 0);
809
810 lds_base = tcs_out_current_patch_data_offset;
811 lds_inner = LLVMBuildAdd(ctx->ac.builder, lds_base,
812 LLVMConstInt(ctx->ac.i32, tess_inner_index * 4, 0), "");
813 lds_outer = LLVMBuildAdd(ctx->ac.builder, lds_base,
814 LLVMConstInt(ctx->ac.i32, tess_outer_index * 4, 0), "");
815
816 for (i = 0; i < outer_comps; i++) {
817 outer[i] = out[i] = lshs_lds_load(ctx, ctx->ac.i32, i, lds_outer);
818 }
819 for (i = 0; i < inner_comps; i++) {
820 inner[i] = out[outer_comps + i] = lshs_lds_load(ctx, ctx->ac.i32, i, lds_inner);
821 }
822 }
823
824 if (shader->key.part.tcs.epilog.prim_mode == PIPE_PRIM_LINES) {
825 /* For isolines, the hardware expects tess factors in the
826 * reverse order from what NIR specifies.
827 */
828 LLVMValueRef tmp = out[0];
829 out[0] = out[1];
830 out[1] = tmp;
831 }
832
833 /* Convert the outputs to vectors for stores. */
834 vec0 = ac_build_gather_values(&ctx->ac, out, MIN2(stride, 4));
835 vec1 = NULL;
836
837 if (stride > 4)
838 vec1 = ac_build_gather_values(&ctx->ac, out + 4, stride - 4);
839
840 /* Get the buffer. */
841 buffer = get_tess_ring_descriptor(ctx, TCS_FACTOR_RING);
842
843 /* Get the offset. */
844 tf_base = ac_get_arg(&ctx->ac, ctx->tcs_factor_offset);
845 byteoffset =
846 LLVMBuildMul(ctx->ac.builder, rel_patch_id, LLVMConstInt(ctx->ac.i32, 4 * stride, 0), "");
847
848 ac_build_ifcc(&ctx->ac,
849 LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, rel_patch_id, ctx->ac.i32_0, ""), 6504);
850
851 /* Store the dynamic HS control word. */
852 offset = 0;
853 if (ctx->screen->info.chip_class <= GFX8) {
854 ac_build_buffer_store_dword(&ctx->ac, buffer, LLVMConstInt(ctx->ac.i32, 0x80000000, 0), 1,
855 ctx->ac.i32_0, tf_base, offset, ac_glc);
856 offset += 4;
857 }
858
859 ac_build_endif(&ctx->ac, 6504);
860
861 /* Store the tessellation factors. */
862 ac_build_buffer_store_dword(&ctx->ac, buffer, vec0, MIN2(stride, 4), byteoffset, tf_base, offset,
863 ac_glc);
864 offset += 16;
865 if (vec1)
866 ac_build_buffer_store_dword(&ctx->ac, buffer, vec1, stride - 4, byteoffset, tf_base, offset,
867 ac_glc);
868
869 /* Store the tess factors into the offchip buffer if TES reads them. */
870 if (shader->key.part.tcs.epilog.tes_reads_tess_factors) {
871 LLVMValueRef buf, base, inner_vec, outer_vec, tf_outer_offset;
872 LLVMValueRef tf_inner_offset;
873 unsigned param_outer, param_inner;
874
875 buf = get_tess_ring_descriptor(ctx, TESS_OFFCHIP_RING_TCS);
876 base = ac_get_arg(&ctx->ac, ctx->tcs_offchip_offset);
877
878 param_outer = si_shader_io_get_unique_index_patch(TGSI_SEMANTIC_TESSOUTER, 0);
879 tf_outer_offset = get_tcs_tes_buffer_address(ctx, rel_patch_id, NULL,
880 LLVMConstInt(ctx->ac.i32, param_outer, 0));
881
882 unsigned outer_vec_size = ac_has_vec3_support(ctx->screen->info.chip_class, false)
883 ? outer_comps
884 : util_next_power_of_two(outer_comps);
885 outer_vec = ac_build_gather_values(&ctx->ac, outer, outer_vec_size);
886
887 ac_build_buffer_store_dword(&ctx->ac, buf, outer_vec, outer_comps, tf_outer_offset, base, 0,
888 ac_glc);
889 if (inner_comps) {
890 param_inner = si_shader_io_get_unique_index_patch(TGSI_SEMANTIC_TESSINNER, 0);
891 tf_inner_offset = get_tcs_tes_buffer_address(ctx, rel_patch_id, NULL,
892 LLVMConstInt(ctx->ac.i32, param_inner, 0));
893
894 inner_vec =
895 inner_comps == 1 ? inner[0] : ac_build_gather_values(&ctx->ac, inner, inner_comps);
896 ac_build_buffer_store_dword(&ctx->ac, buf, inner_vec, inner_comps, tf_inner_offset, base,
897 0, ac_glc);
898 }
899 }
900
901 ac_build_endif(&ctx->ac, 6503);
902 }
903
904 /* This only writes the tessellation factor levels. */
905 static void si_llvm_emit_tcs_epilogue(struct ac_shader_abi *abi, unsigned max_outputs,
906 LLVMValueRef *addrs)
907 {
908 struct si_shader_context *ctx = si_shader_context_from_abi(abi);
909 LLVMBuilderRef builder = ctx->ac.builder;
910 LLVMValueRef rel_patch_id, invocation_id, tf_lds_offset;
911
912 si_copy_tcs_inputs(ctx);
913
914 rel_patch_id = get_rel_patch_id(ctx);
915 invocation_id = si_unpack_param(ctx, ctx->args.tcs_rel_ids, 8, 5);
916 tf_lds_offset = get_tcs_out_current_patch_data_offset(ctx);
917
918 if (ctx->screen->info.chip_class >= GFX9) {
919 LLVMBasicBlockRef blocks[2] = {LLVMGetInsertBlock(builder), ctx->merged_wrap_if_entry_block};
920 LLVMValueRef values[2];
921
922 ac_build_endif(&ctx->ac, ctx->merged_wrap_if_label);
923
924 values[0] = rel_patch_id;
925 values[1] = LLVMGetUndef(ctx->ac.i32);
926 rel_patch_id = ac_build_phi(&ctx->ac, ctx->ac.i32, 2, values, blocks);
927
928 values[0] = tf_lds_offset;
929 values[1] = LLVMGetUndef(ctx->ac.i32);
930 tf_lds_offset = ac_build_phi(&ctx->ac, ctx->ac.i32, 2, values, blocks);
931
932 values[0] = invocation_id;
933 values[1] = ctx->ac.i32_1; /* cause the epilog to skip threads */
934 invocation_id = ac_build_phi(&ctx->ac, ctx->ac.i32, 2, values, blocks);
935 }
936
937 /* Return epilog parameters from this function. */
938 LLVMValueRef ret = ctx->return_value;
939 unsigned vgpr;
940
941 if (ctx->screen->info.chip_class >= GFX9) {
942 ret =
943 si_insert_input_ret(ctx, ret, ctx->tcs_offchip_layout, 8 + GFX9_SGPR_TCS_OFFCHIP_LAYOUT);
944 ret = si_insert_input_ret(ctx, ret, ctx->tcs_out_lds_layout, 8 + GFX9_SGPR_TCS_OUT_LAYOUT);
945 /* Tess offchip and tess factor offsets are at the beginning. */
946 ret = si_insert_input_ret(ctx, ret, ctx->tcs_offchip_offset, 2);
947 ret = si_insert_input_ret(ctx, ret, ctx->tcs_factor_offset, 4);
948 vgpr = 8 + GFX9_SGPR_TCS_OUT_LAYOUT + 1;
949 } else {
950 ret = si_insert_input_ret(ctx, ret, ctx->tcs_offchip_layout, GFX6_SGPR_TCS_OFFCHIP_LAYOUT);
951 ret = si_insert_input_ret(ctx, ret, ctx->tcs_out_lds_layout, GFX6_SGPR_TCS_OUT_LAYOUT);
952 /* Tess offchip and tess factor offsets are after user SGPRs. */
953 ret = si_insert_input_ret(ctx, ret, ctx->tcs_offchip_offset, GFX6_TCS_NUM_USER_SGPR);
954 ret = si_insert_input_ret(ctx, ret, ctx->tcs_factor_offset, GFX6_TCS_NUM_USER_SGPR + 1);
955 vgpr = GFX6_TCS_NUM_USER_SGPR + 2;
956 }
957
958 /* VGPRs */
959 rel_patch_id = ac_to_float(&ctx->ac, rel_patch_id);
960 invocation_id = ac_to_float(&ctx->ac, invocation_id);
961 tf_lds_offset = ac_to_float(&ctx->ac, tf_lds_offset);
962
963 /* Leave a hole corresponding to the two input VGPRs. This ensures that
964 * the invocation_id output does not alias the tcs_rel_ids input,
965 * which saves a V_MOV on gfx9.
966 */
967 vgpr += 2;
968
969 ret = LLVMBuildInsertValue(builder, ret, rel_patch_id, vgpr++, "");
970 ret = LLVMBuildInsertValue(builder, ret, invocation_id, vgpr++, "");
971
972 if (ctx->shader->selector->info.tessfactors_are_def_in_all_invocs) {
973 vgpr++; /* skip the tess factor LDS offset */
974 for (unsigned i = 0; i < 6; i++) {
975 LLVMValueRef value = LLVMBuildLoad(builder, ctx->invoc0_tess_factors[i], "");
976 value = ac_to_float(&ctx->ac, value);
977 ret = LLVMBuildInsertValue(builder, ret, value, vgpr++, "");
978 }
979 } else {
980 ret = LLVMBuildInsertValue(builder, ret, tf_lds_offset, vgpr++, "");
981 }
982 ctx->return_value = ret;
983 }
984
985 /* Pass TCS inputs from LS to TCS on GFX9. */
986 static void si_set_ls_return_value_for_tcs(struct si_shader_context *ctx)
987 {
988 LLVMValueRef ret = ctx->return_value;
989
990 ret = si_insert_input_ptr(ctx, ret, ctx->other_const_and_shader_buffers, 0);
991 ret = si_insert_input_ptr(ctx, ret, ctx->other_samplers_and_images, 1);
992 ret = si_insert_input_ret(ctx, ret, ctx->tcs_offchip_offset, 2);
993 ret = si_insert_input_ret(ctx, ret, ctx->merged_wave_info, 3);
994 ret = si_insert_input_ret(ctx, ret, ctx->tcs_factor_offset, 4);
995 ret = si_insert_input_ret(ctx, ret, ctx->merged_scratch_offset, 5);
996
997 ret = si_insert_input_ptr(ctx, ret, ctx->rw_buffers, 8 + SI_SGPR_RW_BUFFERS);
998 ret = si_insert_input_ptr(ctx, ret, ctx->bindless_samplers_and_images,
999 8 + SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES);
1000
1001 ret = si_insert_input_ret(ctx, ret, ctx->vs_state_bits, 8 + SI_SGPR_VS_STATE_BITS);
1002
1003 ret = si_insert_input_ret(ctx, ret, ctx->tcs_offchip_layout, 8 + GFX9_SGPR_TCS_OFFCHIP_LAYOUT);
1004 ret = si_insert_input_ret(ctx, ret, ctx->tcs_out_lds_offsets, 8 + GFX9_SGPR_TCS_OUT_OFFSETS);
1005 ret = si_insert_input_ret(ctx, ret, ctx->tcs_out_lds_layout, 8 + GFX9_SGPR_TCS_OUT_LAYOUT);
1006
1007 unsigned vgpr = 8 + GFX9_TCS_NUM_USER_SGPR;
1008 ret = LLVMBuildInsertValue(ctx->ac.builder, ret,
1009 ac_to_float(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args.tcs_patch_id)),
1010 vgpr++, "");
1011 ret = LLVMBuildInsertValue(ctx->ac.builder, ret,
1012 ac_to_float(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args.tcs_rel_ids)),
1013 vgpr++, "");
1014 ctx->return_value = ret;
1015 }
1016
1017 void si_llvm_emit_ls_epilogue(struct ac_shader_abi *abi, unsigned max_outputs, LLVMValueRef *addrs)
1018 {
1019 struct si_shader_context *ctx = si_shader_context_from_abi(abi);
1020 struct si_shader *shader = ctx->shader;
1021 struct si_shader_info *info = &shader->selector->info;
1022 unsigned i, chan;
1023 LLVMValueRef vertex_id = ac_get_arg(&ctx->ac, ctx->rel_auto_id);
1024 LLVMValueRef vertex_dw_stride = get_tcs_in_vertex_dw_stride(ctx);
1025 LLVMValueRef base_dw_addr = LLVMBuildMul(ctx->ac.builder, vertex_id, vertex_dw_stride, "");
1026
1027 /* Write outputs to LDS. The next shader (TCS aka HS) will read
1028 * its inputs from it. */
1029 for (i = 0; i < info->num_outputs; i++) {
1030 unsigned name = info->output_semantic_name[i];
1031 unsigned index = info->output_semantic_index[i];
1032
1033 /* The ARB_shader_viewport_layer_array spec contains the
1034 * following issue:
1035 *
1036 * 2) What happens if gl_ViewportIndex or gl_Layer is
1037 * written in the vertex shader and a geometry shader is
1038 * present?
1039 *
1040 * RESOLVED: The value written by the last vertex processing
1041 * stage is used. If the last vertex processing stage
1042 * (vertex, tessellation evaluation or geometry) does not
1043 * statically assign to gl_ViewportIndex or gl_Layer, index
1044 * or layer zero is assumed.
1045 *
1046 * So writes to those outputs in VS-as-LS are simply ignored.
1047 */
1048 if (name == TGSI_SEMANTIC_LAYER || name == TGSI_SEMANTIC_VIEWPORT_INDEX)
1049 continue;
1050
1051 int param = si_shader_io_get_unique_index(name, index, false);
1052 LLVMValueRef dw_addr =
1053 LLVMBuildAdd(ctx->ac.builder, base_dw_addr, LLVMConstInt(ctx->ac.i32, param * 4, 0), "");
1054
1055 for (chan = 0; chan < 4; chan++) {
1056 if (!(info->output_usagemask[i] & (1 << chan)))
1057 continue;
1058
1059 lshs_lds_store(ctx, chan, dw_addr,
1060 LLVMBuildLoad(ctx->ac.builder, addrs[4 * i + chan], ""));
1061 }
1062 }
1063
1064 if (ctx->screen->info.chip_class >= GFX9)
1065 si_set_ls_return_value_for_tcs(ctx);
1066 }
1067
1068 /**
1069 * Compile the TCS epilog function. This writes tesselation factors to memory
1070 * based on the output primitive type of the tesselator (determined by TES).
1071 */
1072 void si_llvm_build_tcs_epilog(struct si_shader_context *ctx, union si_shader_part_key *key)
1073 {
1074 memset(&ctx->args, 0, sizeof(ctx->args));
1075
1076 if (ctx->screen->info.chip_class >= GFX9) {
1077 ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
1078 ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
1079 ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_offchip_offset);
1080 ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); /* wave info */
1081 ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_factor_offset);
1082 ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
1083 ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
1084 ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
1085 ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
1086 ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
1087 ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
1088 ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
1089 ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
1090 ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
1091 ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
1092 ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
1093 ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_offchip_layout);
1094 ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
1095 ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_out_lds_layout);
1096 } else {
1097 ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
1098 ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
1099 ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
1100 ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
1101 ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_offchip_layout);
1102 ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
1103 ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_out_lds_layout);
1104 ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
1105 ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_offchip_offset);
1106 ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_factor_offset);
1107 }
1108
1109 ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* VGPR gap */
1110 ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* VGPR gap */
1111 struct ac_arg rel_patch_id; /* patch index within the wave (REL_PATCH_ID) */
1112 ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &rel_patch_id);
1113 struct ac_arg invocation_id; /* invocation ID within the patch */
1114 ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &invocation_id);
1115 struct ac_arg
1116 tcs_out_current_patch_data_offset; /* LDS offset where tess factors should be loaded from */
1117 ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &tcs_out_current_patch_data_offset);
1118
1119 struct ac_arg tess_factors[6];
1120 for (unsigned i = 0; i < 6; i++)
1121 ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &tess_factors[i]);
1122
1123 /* Create the function. */
1124 si_llvm_create_func(ctx, "tcs_epilog", NULL, 0, ctx->screen->info.chip_class >= GFX7 ? 128 : 0);
1125 ac_declare_lds_as_pointer(&ctx->ac);
1126
1127 LLVMValueRef invoc0_tess_factors[6];
1128 for (unsigned i = 0; i < 6; i++)
1129 invoc0_tess_factors[i] = ac_get_arg(&ctx->ac, tess_factors[i]);
1130
1131 si_write_tess_factors(ctx, ac_get_arg(&ctx->ac, rel_patch_id),
1132 ac_get_arg(&ctx->ac, invocation_id),
1133 ac_get_arg(&ctx->ac, tcs_out_current_patch_data_offset),
1134 invoc0_tess_factors, invoc0_tess_factors + 4);
1135
1136 LLVMBuildRetVoid(ctx->ac.builder);
1137 }
1138
1139 void si_llvm_init_tcs_callbacks(struct si_shader_context *ctx)
1140 {
1141 ctx->abi.load_tess_varyings = si_nir_load_tcs_varyings;
1142 ctx->abi.load_tess_level = si_load_tess_level;
1143 ctx->abi.store_tcs_outputs = si_nir_store_output_tcs;
1144 ctx->abi.emit_outputs = si_llvm_emit_tcs_epilogue;
1145 ctx->abi.load_patch_vertices_in = si_load_patch_vertices_in;
1146 }
1147
1148 void si_llvm_init_tes_callbacks(struct si_shader_context *ctx, bool ngg_cull_shader)
1149 {
1150 ctx->abi.load_tess_varyings = si_nir_load_input_tes;
1151 ctx->abi.load_tess_coord = si_load_tess_coord;
1152 ctx->abi.load_tess_level = si_load_tess_level;
1153 ctx->abi.load_patch_vertices_in = si_load_patch_vertices_in;
1154
1155 if (ctx->shader->key.as_es)
1156 ctx->abi.emit_outputs = si_llvm_emit_es_epilogue;
1157 else if (ngg_cull_shader)
1158 ctx->abi.emit_outputs = gfx10_emit_ngg_culling_epilogue;
1159 else if (ctx->shader->key.as_ngg)
1160 ctx->abi.emit_outputs = gfx10_emit_ngg_epilogue;
1161 else
1162 ctx->abi.emit_outputs = si_llvm_emit_vs_epilogue;
1163 }