ir3: use empirical size for params as used by the shader
[mesa.git] / src / freedreno / ir3 / ir3_nir_lower_tess.c
1 /*
2 * Copyright © 2019 Google, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include "ir3_nir.h"
25 #include "ir3_compiler.h"
26 #include "compiler/nir/nir_builder.h"
27
28 struct state {
29 uint32_t topology;
30
31 struct primitive_map {
32 unsigned loc[32];
33 unsigned size[32];
34 unsigned stride;
35 } map;
36
37 nir_ssa_def *header;
38
39 nir_variable *vertex_count_var;
40 nir_variable *emitted_vertex_var;
41 nir_variable *vertex_flags_out;
42
43 struct exec_list old_outputs;
44 struct exec_list emit_outputs;
45 };
46
47 static nir_ssa_def *
48 bitfield_extract(nir_builder *b, nir_ssa_def *v, uint32_t start, uint32_t mask)
49 {
50 return nir_iand(b, nir_ushr(b, v, nir_imm_int(b, start)),
51 nir_imm_int(b, mask));
52 }
53
54 static nir_ssa_def *
55 build_invocation_id(nir_builder *b, struct state *state)
56 {
57 return bitfield_extract(b, state->header, 11, 31);
58 }
59
60 static nir_ssa_def *
61 build_vertex_id(nir_builder *b, struct state *state)
62 {
63 return bitfield_extract(b, state->header, 6, 31);
64 }
65
66 static nir_ssa_def *
67 build_local_primitive_id(nir_builder *b, struct state *state)
68 {
69 return bitfield_extract(b, state->header, 0, 63);
70 }
71
72 static nir_variable *
73 get_var(struct exec_list *list, int driver_location)
74 {
75 nir_foreach_variable (v, list) {
76 if (v->data.driver_location == driver_location) {
77 return v;
78 }
79 }
80
81 return NULL;
82 }
83
84 static bool
85 is_tess_levels(nir_variable *var)
86 {
87 return (var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER ||
88 var->data.location == VARYING_SLOT_TESS_LEVEL_INNER);
89 }
90
91 static nir_ssa_def *
92 build_local_offset(nir_builder *b, struct state *state,
93 nir_ssa_def *vertex, uint32_t base, nir_ssa_def *offset)
94 {
95 nir_ssa_def *primitive_stride = nir_load_vs_primitive_stride_ir3(b);
96 nir_ssa_def *primitive_offset =
97 nir_imul24(b, build_local_primitive_id(b, state), primitive_stride);
98 nir_ssa_def *attr_offset;
99 nir_ssa_def *vertex_stride;
100
101 switch (b->shader->info.stage) {
102 case MESA_SHADER_VERTEX:
103 case MESA_SHADER_TESS_EVAL:
104 vertex_stride = nir_imm_int(b, state->map.stride * 4);
105 attr_offset = nir_imm_int(b, state->map.loc[base] * 4);
106 break;
107 case MESA_SHADER_TESS_CTRL:
108 case MESA_SHADER_GEOMETRY:
109 vertex_stride = nir_load_vs_vertex_stride_ir3(b);
110 attr_offset = nir_load_primitive_location_ir3(b, base);
111 break;
112 default:
113 unreachable("bad shader stage");
114 }
115
116 nir_ssa_def *vertex_offset = nir_imul24(b, vertex, vertex_stride);
117
118 return nir_iadd(b, nir_iadd(b, primitive_offset, vertex_offset),
119 nir_iadd(b, attr_offset, offset));
120 }
121
122 static nir_intrinsic_instr *
123 replace_intrinsic(nir_builder *b, nir_intrinsic_instr *intr,
124 nir_intrinsic_op op, nir_ssa_def *src0, nir_ssa_def *src1, nir_ssa_def *src2)
125 {
126 nir_intrinsic_instr *new_intr =
127 nir_intrinsic_instr_create(b->shader, op);
128
129 new_intr->src[0] = nir_src_for_ssa(src0);
130 if (src1)
131 new_intr->src[1] = nir_src_for_ssa(src1);
132 if (src2)
133 new_intr->src[2] = nir_src_for_ssa(src2);
134
135 new_intr->num_components = intr->num_components;
136
137 if (nir_intrinsic_infos[op].has_dest)
138 nir_ssa_dest_init(&new_intr->instr, &new_intr->dest,
139 intr->num_components, 32, NULL);
140
141 nir_builder_instr_insert(b, &new_intr->instr);
142
143 if (nir_intrinsic_infos[op].has_dest)
144 nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(&new_intr->dest.ssa));
145
146 nir_instr_remove(&intr->instr);
147
148 return new_intr;
149 }
150
151 static void
152 build_primitive_map(nir_shader *shader, struct primitive_map *map, struct exec_list *list)
153 {
154 nir_foreach_variable (var, list) {
155 switch (var->data.location) {
156 case VARYING_SLOT_TESS_LEVEL_OUTER:
157 case VARYING_SLOT_TESS_LEVEL_INNER:
158 continue;
159 }
160
161 unsigned size = glsl_count_attribute_slots(var->type, false) * 4;
162
163 assert(var->data.driver_location < ARRAY_SIZE(map->size));
164 map->size[var->data.driver_location] =
165 MAX2(map->size[var->data.driver_location], size);
166 }
167
168 unsigned loc = 0;
169 for (uint32_t i = 0; i < ARRAY_SIZE(map->size); i++) {
170 if (map->size[i] == 0)
171 continue;
172 nir_variable *var = get_var(list, i);
173 map->loc[i] = loc;
174 loc += map->size[i];
175
176 if (var->data.patch)
177 map->size[i] = 0;
178 else
179 map->size[i] = map->size[i] / glsl_get_length(var->type);
180 }
181
182 map->stride = loc;
183 }
184
185 static void
186 lower_block_to_explicit_output(nir_block *block, nir_builder *b, struct state *state)
187 {
188 nir_foreach_instr_safe (instr, block) {
189 if (instr->type != nir_instr_type_intrinsic)
190 continue;
191
192 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
193
194 switch (intr->intrinsic) {
195 case nir_intrinsic_store_output: {
196 // src[] = { value, offset }.
197
198 /* nir_lower_io_to_temporaries replaces all access to output
199 * variables with temp variables and then emits a nir_copy_var at
200 * the end of the shader. Thus, we should always get a full wrmask
201 * here.
202 */
203 assert(util_is_power_of_two_nonzero(nir_intrinsic_write_mask(intr) + 1));
204
205 b->cursor = nir_instr_remove(&intr->instr);
206
207 nir_ssa_def *vertex_id = build_vertex_id(b, state);
208 nir_ssa_def *offset = build_local_offset(b, state, vertex_id, nir_intrinsic_base(intr),
209 intr->src[1].ssa);
210 nir_intrinsic_instr *store =
211 nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_shared_ir3);
212
213 store->src[0] = nir_src_for_ssa(intr->src[0].ssa);
214 store->src[1] = nir_src_for_ssa(offset);
215 store->num_components = intr->num_components;
216
217 nir_builder_instr_insert(b, &store->instr);
218 break;
219 }
220
221 default:
222 break;
223 }
224 }
225 }
226
227 static nir_ssa_def *
228 local_thread_id(nir_builder *b)
229 {
230 return bitfield_extract(b, nir_load_gs_header_ir3(b), 16, 1023);
231 }
232
233 void
234 ir3_nir_lower_to_explicit_output(nir_shader *shader, struct ir3_shader_variant *v,
235 unsigned topology)
236 {
237 struct state state = { };
238
239 build_primitive_map(shader, &state.map, &shader->outputs);
240 memcpy(v->output_loc, state.map.loc, sizeof(v->output_loc));
241
242 nir_function_impl *impl = nir_shader_get_entrypoint(shader);
243 assert(impl);
244
245 nir_builder b;
246 nir_builder_init(&b, impl);
247 b.cursor = nir_before_cf_list(&impl->body);
248
249 if (v->type == MESA_SHADER_VERTEX && topology != IR3_TESS_NONE)
250 state.header = nir_load_tcs_header_ir3(&b);
251 else
252 state.header = nir_load_gs_header_ir3(&b);
253
254 nir_foreach_block_safe (block, impl)
255 lower_block_to_explicit_output(block, &b, &state);
256
257 nir_metadata_preserve(impl, nir_metadata_block_index |
258 nir_metadata_dominance);
259
260 v->output_size = state.map.stride;
261 }
262
263
264 static void
265 lower_block_to_explicit_input(nir_block *block, nir_builder *b, struct state *state)
266 {
267 nir_foreach_instr_safe (instr, block) {
268 if (instr->type != nir_instr_type_intrinsic)
269 continue;
270
271 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
272
273 switch (intr->intrinsic) {
274 case nir_intrinsic_load_per_vertex_input: {
275 // src[] = { vertex, offset }.
276
277 b->cursor = nir_before_instr(&intr->instr);
278
279 nir_ssa_def *offset = build_local_offset(b, state,
280 intr->src[0].ssa, // this is typically gl_InvocationID
281 nir_intrinsic_base(intr),
282 intr->src[1].ssa);
283
284 replace_intrinsic(b, intr, nir_intrinsic_load_shared_ir3, offset, NULL, NULL);
285 break;
286 }
287
288 case nir_intrinsic_load_invocation_id: {
289 b->cursor = nir_before_instr(&intr->instr);
290
291 nir_ssa_def *iid = build_invocation_id(b, state);
292 nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(iid));
293 nir_instr_remove(&intr->instr);
294 break;
295 }
296
297 default:
298 break;
299 }
300 }
301 }
302
303 void
304 ir3_nir_lower_to_explicit_input(nir_shader *shader)
305 {
306 struct state state = { };
307
308 nir_function_impl *impl = nir_shader_get_entrypoint(shader);
309 assert(impl);
310
311 nir_builder b;
312 nir_builder_init(&b, impl);
313 b.cursor = nir_before_cf_list(&impl->body);
314
315 if (shader->info.stage == MESA_SHADER_GEOMETRY)
316 state.header = nir_load_gs_header_ir3(&b);
317 else
318 state.header = nir_load_tcs_header_ir3(&b);
319
320 nir_foreach_block_safe (block, impl)
321 lower_block_to_explicit_input(block, &b, &state);
322 }
323
324
325 static nir_ssa_def *
326 build_per_vertex_offset(nir_builder *b, struct state *state,
327 nir_ssa_def *vertex, nir_ssa_def *offset, nir_variable *var)
328 {
329 nir_ssa_def *primitive_id = nir_load_primitive_id(b);
330 nir_ssa_def *patch_stride = nir_load_hs_patch_stride_ir3(b);
331 nir_ssa_def *patch_offset = nir_imul24(b, primitive_id, patch_stride);
332 nir_ssa_def *attr_offset;
333 int loc = var->data.driver_location;
334
335 switch (b->shader->info.stage) {
336 case MESA_SHADER_TESS_CTRL:
337 attr_offset = nir_imm_int(b, state->map.loc[loc]);
338 break;
339 case MESA_SHADER_TESS_EVAL:
340 attr_offset = nir_load_primitive_location_ir3(b, loc);
341 break;
342 default:
343 unreachable("bad shader state");
344 }
345
346 nir_ssa_def *attr_stride = nir_imm_int(b, state->map.size[loc]);
347 nir_ssa_def *vertex_offset = nir_imul24(b, vertex, attr_stride);
348
349 return nir_iadd(b, nir_iadd(b, patch_offset, attr_offset),
350 nir_iadd(b, vertex_offset, nir_ishl(b, offset, nir_imm_int(b, 2))));
351 }
352
353 static nir_ssa_def *
354 build_patch_offset(nir_builder *b, struct state *state, nir_ssa_def *offset, nir_variable *var)
355 {
356 debug_assert(var && var->data.patch);
357
358 return build_per_vertex_offset(b, state, nir_imm_int(b, 0), offset, var);
359 }
360
361 static void
362 tess_level_components(struct state *state, uint32_t *inner, uint32_t *outer)
363 {
364 switch (state->topology) {
365 case IR3_TESS_TRIANGLES:
366 *inner = 1;
367 *outer = 3;
368 break;
369 case IR3_TESS_QUADS:
370 *inner = 2;
371 *outer = 4;
372 break;
373 case IR3_TESS_ISOLINES:
374 *inner = 0;
375 *outer = 2;
376 break;
377 default:
378 unreachable("bad");
379 }
380 }
381
382 static nir_ssa_def *
383 build_tessfactor_base(nir_builder *b, gl_varying_slot slot, struct state *state)
384 {
385 uint32_t inner_levels, outer_levels;
386 tess_level_components(state, &inner_levels, &outer_levels);
387
388 const uint32_t patch_stride = 1 + inner_levels + outer_levels;
389
390 nir_ssa_def *primitive_id = nir_load_primitive_id(b);
391
392 nir_ssa_def *patch_offset = nir_imul24(b, primitive_id, nir_imm_int(b, patch_stride));
393
394 uint32_t offset;
395 switch (slot) {
396 case VARYING_SLOT_TESS_LEVEL_OUTER:
397 /* There's some kind of header dword, tess levels start at index 1. */
398 offset = 1;
399 break;
400 case VARYING_SLOT_TESS_LEVEL_INNER:
401 offset = 1 + outer_levels;
402 break;
403 default:
404 unreachable("bad");
405 }
406
407 return nir_iadd(b, patch_offset, nir_imm_int(b, offset));
408 }
409
410 static void
411 lower_tess_ctrl_block(nir_block *block, nir_builder *b, struct state *state)
412 {
413 nir_foreach_instr_safe (instr, block) {
414 if (instr->type != nir_instr_type_intrinsic)
415 continue;
416
417 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
418
419 switch (intr->intrinsic) {
420 case nir_intrinsic_control_barrier:
421 case nir_intrinsic_memory_barrier_tcs_patch:
422 /* Hull shaders dispatch 32 wide so an entire patch will always
423 * fit in a single warp and execute in lock-step. Consequently,
424 * we don't need to do anything for TCS barriers so just remove
425 * the intrinsic. Otherwise we'll emit an actual barrier
426 * instructions, which will deadlock.
427 */
428 nir_instr_remove(&intr->instr);
429 break;
430
431 case nir_intrinsic_load_per_vertex_output: {
432 // src[] = { vertex, offset }.
433
434 b->cursor = nir_before_instr(&intr->instr);
435
436 nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
437 nir_variable *var = get_var(&b->shader->outputs, nir_intrinsic_base(intr));
438 nir_ssa_def *offset = build_per_vertex_offset(b, state,
439 intr->src[0].ssa, intr->src[1].ssa, var);
440
441 replace_intrinsic(b, intr, nir_intrinsic_load_global_ir3, address, offset, NULL);
442 break;
443 }
444
445 case nir_intrinsic_store_per_vertex_output: {
446 // src[] = { value, vertex, offset }.
447
448 b->cursor = nir_before_instr(&intr->instr);
449
450 /* sparse writemask not supported */
451 assert(util_is_power_of_two_nonzero(nir_intrinsic_write_mask(intr) + 1));
452
453 nir_ssa_def *value = intr->src[0].ssa;
454 nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
455 nir_variable *var = get_var(&b->shader->outputs, nir_intrinsic_base(intr));
456 nir_ssa_def *offset = build_per_vertex_offset(b, state,
457 intr->src[1].ssa, intr->src[2].ssa, var);
458
459 replace_intrinsic(b, intr, nir_intrinsic_store_global_ir3, value, address,
460 nir_iadd(b, offset, nir_imm_int(b, nir_intrinsic_component(intr))));
461
462 break;
463 }
464
465 case nir_intrinsic_load_output: {
466 // src[] = { offset }.
467
468 nir_variable *var = get_var(&b->shader->outputs, nir_intrinsic_base(intr));
469
470 b->cursor = nir_before_instr(&intr->instr);
471
472 nir_ssa_def *address, *offset;
473
474 /* note if vectorization of the tess level loads ever happens:
475 * "ldg" across 16-byte boundaries can behave incorrectly if results
476 * are never used. most likely some issue with (sy) not properly
477 * syncing with values coming from a second memory transaction.
478 */
479 if (is_tess_levels(var)) {
480 assert(intr->dest.ssa.num_components == 1);
481 address = nir_load_tess_factor_base_ir3(b);
482 offset = build_tessfactor_base(b, var->data.location, state);
483 } else {
484 address = nir_load_tess_param_base_ir3(b);
485 offset = build_patch_offset(b, state, intr->src[0].ssa, var);
486 }
487
488 replace_intrinsic(b, intr, nir_intrinsic_load_global_ir3, address, offset, NULL);
489 break;
490 }
491
492 case nir_intrinsic_store_output: {
493 // src[] = { value, offset }.
494
495 /* write patch output to bo */
496
497 nir_variable *var = get_var(&b->shader->outputs, nir_intrinsic_base(intr));
498
499 b->cursor = nir_before_instr(&intr->instr);
500
501 /* sparse writemask not supported */
502 assert(util_is_power_of_two_nonzero(nir_intrinsic_write_mask(intr) + 1));
503
504 if (is_tess_levels(var)) {
505 /* with tess levels are defined as float[4] and float[2],
506 * but tess factor BO has smaller sizes for tris/isolines,
507 * so we have to discard any writes beyond the number of
508 * components for inner/outer levels */
509 uint32_t inner_levels, outer_levels, levels;
510 tess_level_components(state, &inner_levels, &outer_levels);
511
512 if (var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER)
513 levels = outer_levels;
514 else
515 levels = inner_levels;
516
517 assert(intr->src[0].ssa->num_components == 1);
518
519 nir_ssa_def *offset =
520 nir_iadd_imm(b, intr->src[1].ssa, nir_intrinsic_component(intr));
521
522 nir_if *nif = nir_push_if(b, nir_ult(b, offset, nir_imm_int(b, levels)));
523
524 replace_intrinsic(b, intr, nir_intrinsic_store_global_ir3,
525 intr->src[0].ssa,
526 nir_load_tess_factor_base_ir3(b),
527 nir_iadd(b, offset, build_tessfactor_base(b, var->data.location, state)));
528
529 nir_pop_if(b, nif);
530 } else {
531 nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
532 nir_ssa_def *offset = build_patch_offset(b, state, intr->src[1].ssa, var);
533
534 debug_assert(nir_intrinsic_component(intr) == 0);
535
536 replace_intrinsic(b, intr, nir_intrinsic_store_global_ir3,
537 intr->src[0].ssa, address, offset);
538 }
539 break;
540 }
541
542 default:
543 break;
544 }
545 }
546 }
547
548 static void
549 emit_tess_epilouge(nir_builder *b, struct state *state)
550 {
551 /* Insert endpatch instruction:
552 *
553 * TODO we should re-work this to use normal flow control.
554 */
555
556 nir_intrinsic_instr *end_patch =
557 nir_intrinsic_instr_create(b->shader, nir_intrinsic_end_patch_ir3);
558 nir_builder_instr_insert(b, &end_patch->instr);
559 }
560
561 void
562 ir3_nir_lower_tess_ctrl(nir_shader *shader, struct ir3_shader_variant *v,
563 unsigned topology)
564 {
565 struct state state = { .topology = topology };
566
567 if (shader_debug_enabled(shader->info.stage)) {
568 fprintf(stderr, "NIR (before tess lowering) for %s shader:\n",
569 _mesa_shader_stage_to_string(shader->info.stage));
570 nir_print_shader(shader, stderr);
571 }
572
573 build_primitive_map(shader, &state.map, &shader->outputs);
574 memcpy(v->output_loc, state.map.loc, sizeof(v->output_loc));
575 v->output_size = state.map.stride;
576
577 nir_function_impl *impl = nir_shader_get_entrypoint(shader);
578 assert(impl);
579
580 nir_builder b;
581 nir_builder_init(&b, impl);
582 b.cursor = nir_before_cf_list(&impl->body);
583
584 state.header = nir_load_tcs_header_ir3(&b);
585
586 nir_foreach_block_safe (block, impl)
587 lower_tess_ctrl_block(block, &b, &state);
588
589 /* Now move the body of the TCS into a conditional:
590 *
591 * if (gl_InvocationID < num_vertices)
592 * // body
593 *
594 */
595
596 nir_cf_list body;
597 nir_cf_extract(&body, nir_before_cf_list(&impl->body),
598 nir_after_cf_list(&impl->body));
599
600 b.cursor = nir_after_cf_list(&impl->body);
601
602 /* Re-emit the header, since the old one got moved into the if branch */
603 state.header = nir_load_tcs_header_ir3(&b);
604 nir_ssa_def *iid = build_invocation_id(&b, &state);
605
606 const uint32_t nvertices = shader->info.tess.tcs_vertices_out;
607 nir_ssa_def *cond = nir_ult(&b, iid, nir_imm_int(&b, nvertices));
608
609 nir_if *nif = nir_push_if(&b, cond);
610
611 nir_cf_reinsert(&body, b.cursor);
612
613 b.cursor = nir_after_cf_list(&nif->then_list);
614
615 /* Insert conditional exit for threads invocation id != 0 */
616 nir_ssa_def *iid0_cond = nir_ieq(&b, iid, nir_imm_int(&b, 0));
617 nir_intrinsic_instr *cond_end =
618 nir_intrinsic_instr_create(shader, nir_intrinsic_cond_end_ir3);
619 cond_end->src[0] = nir_src_for_ssa(iid0_cond);
620 nir_builder_instr_insert(&b, &cond_end->instr);
621
622 emit_tess_epilouge(&b, &state);
623
624 nir_pop_if(&b, nif);
625
626 nir_metadata_preserve(impl, 0);
627 }
628
629
630 static void
631 lower_tess_eval_block(nir_block *block, nir_builder *b, struct state *state)
632 {
633 nir_foreach_instr_safe (instr, block) {
634 if (instr->type != nir_instr_type_intrinsic)
635 continue;
636
637 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
638
639 switch (intr->intrinsic) {
640 case nir_intrinsic_load_tess_coord: {
641 b->cursor = nir_after_instr(&intr->instr);
642 nir_ssa_def *x = nir_channel(b, &intr->dest.ssa, 0);
643 nir_ssa_def *y = nir_channel(b, &intr->dest.ssa, 1);
644 nir_ssa_def *z;
645
646 if (state->topology == IR3_TESS_TRIANGLES)
647 z = nir_fsub(b, nir_fsub(b, nir_imm_float(b, 1.0f), y), x);
648 else
649 z = nir_imm_float(b, 0.0f);
650
651 nir_ssa_def *coord = nir_vec3(b, x, y, z);
652
653 nir_ssa_def_rewrite_uses_after(&intr->dest.ssa,
654 nir_src_for_ssa(coord),
655 b->cursor.instr);
656 break;
657 }
658
659 case nir_intrinsic_load_per_vertex_input: {
660 // src[] = { vertex, offset }.
661
662 b->cursor = nir_before_instr(&intr->instr);
663
664 nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
665 nir_variable *var = get_var(&b->shader->inputs, nir_intrinsic_base(intr));
666 nir_ssa_def *offset = build_per_vertex_offset(b, state,
667 intr->src[0].ssa, intr->src[1].ssa, var);
668
669 replace_intrinsic(b, intr, nir_intrinsic_load_global_ir3, address, offset, NULL);
670 break;
671 }
672
673 case nir_intrinsic_load_input: {
674 // src[] = { offset }.
675
676 nir_variable *var = get_var(&b->shader->inputs, nir_intrinsic_base(intr));
677
678 debug_assert(var->data.patch);
679
680 b->cursor = nir_before_instr(&intr->instr);
681
682 nir_ssa_def *address, *offset;
683
684 /* note if vectorization of the tess level loads ever happens:
685 * "ldg" across 16-byte boundaries can behave incorrectly if results
686 * are never used. most likely some issue with (sy) not properly
687 * syncing with values coming from a second memory transaction.
688 */
689 if (is_tess_levels(var)) {
690 assert(intr->dest.ssa.num_components == 1);
691 address = nir_load_tess_factor_base_ir3(b);
692 offset = build_tessfactor_base(b, var->data.location, state);
693 } else {
694 address = nir_load_tess_param_base_ir3(b);
695 offset = build_patch_offset(b, state, intr->src[0].ssa, var);
696 }
697
698 offset = nir_iadd(b, offset, nir_imm_int(b, nir_intrinsic_component(intr)));
699
700 replace_intrinsic(b, intr, nir_intrinsic_load_global_ir3, address, offset, NULL);
701 break;
702 }
703
704 default:
705 break;
706 }
707 }
708 }
709
710 void
711 ir3_nir_lower_tess_eval(nir_shader *shader, unsigned topology)
712 {
713 struct state state = { .topology = topology };
714
715 if (shader_debug_enabled(shader->info.stage)) {
716 fprintf(stderr, "NIR (before tess lowering) for %s shader:\n",
717 _mesa_shader_stage_to_string(shader->info.stage));
718 nir_print_shader(shader, stderr);
719 }
720
721 /* Build map of inputs so we have the sizes. */
722 build_primitive_map(shader, &state.map, &shader->inputs);
723
724 nir_function_impl *impl = nir_shader_get_entrypoint(shader);
725 assert(impl);
726
727 nir_builder b;
728 nir_builder_init(&b, impl);
729
730 nir_foreach_block_safe (block, impl)
731 lower_tess_eval_block(block, &b, &state);
732
733 nir_metadata_preserve(impl, 0);
734 }
735
736 static void
737 lower_gs_block(nir_block *block, nir_builder *b, struct state *state)
738 {
739 nir_foreach_instr_safe (instr, block) {
740 if (instr->type != nir_instr_type_intrinsic)
741 continue;
742
743 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
744
745 switch (intr->intrinsic) {
746 case nir_intrinsic_end_primitive: {
747 b->cursor = nir_before_instr(&intr->instr);
748 nir_store_var(b, state->vertex_flags_out, nir_imm_int(b, 4), 0x1);
749 nir_instr_remove(&intr->instr);
750 break;
751 }
752
753 case nir_intrinsic_emit_vertex: {
754 /* Load the vertex count */
755 b->cursor = nir_before_instr(&intr->instr);
756 nir_ssa_def *count = nir_load_var(b, state->vertex_count_var);
757
758 nir_push_if(b, nir_ieq(b, count, local_thread_id(b)));
759
760 foreach_two_lists(dest_node, &state->emit_outputs, src_node, &state->old_outputs) {
761 nir_variable *dest = exec_node_data(nir_variable, dest_node, node);
762 nir_variable *src = exec_node_data(nir_variable, src_node, node);
763 nir_copy_var(b, dest, src);
764 }
765
766 nir_instr_remove(&intr->instr);
767
768 nir_store_var(b, state->emitted_vertex_var,
769 nir_iadd(b, nir_load_var(b, state->emitted_vertex_var), nir_imm_int(b, 1)), 0x1);
770
771 nir_pop_if(b, NULL);
772
773 /* Increment the vertex count by 1 */
774 nir_store_var(b, state->vertex_count_var,
775 nir_iadd(b, count, nir_imm_int(b, 1)), 0x1); /* .x */
776 nir_store_var(b, state->vertex_flags_out, nir_imm_int(b, 0), 0x1);
777
778 break;
779 }
780
781 default:
782 break;
783 }
784 }
785 }
786
787 void
788 ir3_nir_lower_gs(nir_shader *shader)
789 {
790 struct state state = { };
791
792 if (shader_debug_enabled(shader->info.stage)) {
793 fprintf(stderr, "NIR (before gs lowering):\n");
794 nir_print_shader(shader, stderr);
795 }
796
797 build_primitive_map(shader, &state.map, &shader->inputs);
798
799 /* Create an output var for vertex_flags. This will be shadowed below,
800 * same way regular outputs get shadowed, and this variable will become a
801 * temporary.
802 */
803 state.vertex_flags_out = nir_variable_create(shader, nir_var_shader_out,
804 glsl_uint_type(), "vertex_flags");
805 state.vertex_flags_out->data.driver_location = shader->num_outputs++;
806 state.vertex_flags_out->data.location = VARYING_SLOT_GS_VERTEX_FLAGS_IR3;
807 state.vertex_flags_out->data.interpolation = INTERP_MODE_NONE;
808
809 nir_function_impl *impl = nir_shader_get_entrypoint(shader);
810 assert(impl);
811
812 nir_builder b;
813 nir_builder_init(&b, impl);
814 b.cursor = nir_before_cf_list(&impl->body);
815
816 state.header = nir_load_gs_header_ir3(&b);
817
818 /* Generate two set of shadow vars for the output variables. The first
819 * set replaces the real outputs and the second set (emit_outputs) we'll
820 * assign in the emit_vertex conditionals. Then at the end of the shader
821 * we copy the emit_outputs to the real outputs, so that we get
822 * store_output in uniform control flow.
823 */
824 exec_list_move_nodes_to(&shader->outputs, &state.old_outputs);
825 exec_list_make_empty(&state.emit_outputs);
826 nir_foreach_variable(var, &state.old_outputs) {
827 /* Create a new output var by cloning the original output var and
828 * stealing the name.
829 */
830 nir_variable *output = nir_variable_clone(var, shader);
831 exec_list_push_tail(&shader->outputs, &output->node);
832
833 /* Rewrite the original output to be a shadow variable. */
834 var->name = ralloc_asprintf(var, "%s@gs-temp", output->name);
835 var->data.mode = nir_var_shader_temp;
836
837 /* Clone the shadow variable to create the emit shadow variable that
838 * we'll assign in the emit conditionals.
839 */
840 nir_variable *emit_output = nir_variable_clone(var, shader);
841 emit_output->name = ralloc_asprintf(var, "%s@emit-temp", output->name);
842 exec_list_push_tail(&state.emit_outputs, &emit_output->node);
843 }
844
845 /* During the shader we'll keep track of which vertex we're currently
846 * emitting for the EmitVertex test and how many vertices we emitted so we
847 * know to discard if didn't emit any. In most simple shaders, this can
848 * all be statically determined and gets optimized away.
849 */
850 state.vertex_count_var =
851 nir_local_variable_create(impl, glsl_uint_type(), "vertex_count");
852 state.emitted_vertex_var =
853 nir_local_variable_create(impl, glsl_uint_type(), "emitted_vertex");
854
855 /* Initialize to 0. */
856 b.cursor = nir_before_cf_list(&impl->body);
857 nir_store_var(&b, state.vertex_count_var, nir_imm_int(&b, 0), 0x1);
858 nir_store_var(&b, state.emitted_vertex_var, nir_imm_int(&b, 0), 0x1);
859 nir_store_var(&b, state.vertex_flags_out, nir_imm_int(&b, 4), 0x1);
860
861 nir_foreach_block_safe (block, impl)
862 lower_gs_block(block, &b, &state);
863
864 set_foreach(impl->end_block->predecessors, block_entry) {
865 struct nir_block *block = (void *)block_entry->key;
866 b.cursor = nir_after_block_before_jump(block);
867
868 nir_intrinsic_instr *discard_if =
869 nir_intrinsic_instr_create(b.shader, nir_intrinsic_discard_if);
870
871 nir_ssa_def *cond = nir_ieq(&b, nir_load_var(&b, state.emitted_vertex_var), nir_imm_int(&b, 0));
872
873 discard_if->src[0] = nir_src_for_ssa(cond);
874
875 nir_builder_instr_insert(&b, &discard_if->instr);
876
877 foreach_two_lists(dest_node, &shader->outputs, src_node, &state.emit_outputs) {
878 nir_variable *dest = exec_node_data(nir_variable, dest_node, node);
879 nir_variable *src = exec_node_data(nir_variable, src_node, node);
880 nir_copy_var(&b, dest, src);
881 }
882 }
883
884 exec_list_append(&shader->globals, &state.old_outputs);
885 exec_list_append(&shader->globals, &state.emit_outputs);
886
887 nir_metadata_preserve(impl, 0);
888
889 nir_lower_global_vars_to_local(shader);
890 nir_split_var_copies(shader);
891 nir_lower_var_copies(shader);
892
893 nir_fixup_deref_modes(shader);
894
895 if (shader_debug_enabled(shader->info.stage)) {
896 fprintf(stderr, "NIR (after gs lowering):\n");
897 nir_print_shader(shader, stderr);
898 }
899 }
900
901 uint32_t
902 ir3_link_geometry_stages(const struct ir3_shader_variant *producer,
903 const struct ir3_shader_variant *consumer,
904 uint32_t *locs)
905 {
906 uint32_t num_loc = 0, factor;
907
908 switch (consumer->type) {
909 case MESA_SHADER_TESS_CTRL:
910 case MESA_SHADER_GEOMETRY:
911 /* These stages load with ldlw, which expects byte offsets. */
912 factor = 4;
913 break;
914 case MESA_SHADER_TESS_EVAL:
915 /* The tess eval shader uses ldg, which takes dword offsets. */
916 factor = 1;
917 break;
918 default:
919 unreachable("bad shader stage");
920 }
921
922 nir_foreach_variable(in_var, &consumer->shader->nir->inputs) {
923 nir_foreach_variable(out_var, &producer->shader->nir->outputs) {
924 if (in_var->data.location == out_var->data.location) {
925 locs[in_var->data.driver_location] =
926 producer->output_loc[out_var->data.driver_location] * factor;
927
928 debug_assert(num_loc <= in_var->data.driver_location + 1);
929 num_loc = in_var->data.driver_location + 1;
930 }
931 }
932 }
933
934 return num_loc;
935 }