freedreno/ir3: Skip tess epilogue if the program is missing stores.
[mesa.git] / src / freedreno / ir3 / ir3_nir_lower_tess.c
1 /*
2 * Copyright © 2019 Google, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include "ir3_nir.h"
25 #include "ir3_compiler.h"
26 #include "compiler/nir/nir_builder.h"
27
28 struct state {
29 uint32_t topology;
30
31 struct primitive_map {
32 unsigned loc[32];
33 unsigned size[32];
34 unsigned stride;
35 } map;
36
37 nir_ssa_def *header;
38
39 nir_variable *vertex_count_var;
40 nir_variable *emitted_vertex_var;
41 nir_variable *vertex_flags_out;
42
43 struct exec_list old_outputs;
44 struct exec_list emit_outputs;
45
46 nir_ssa_def *outer_levels[4];
47 nir_ssa_def *inner_levels[2];
48 };
49
50 static nir_ssa_def *
51 bitfield_extract(nir_builder *b, nir_ssa_def *v, uint32_t start, uint32_t mask)
52 {
53 return nir_iand(b, nir_ushr(b, v, nir_imm_int(b, start)),
54 nir_imm_int(b, mask));
55 }
56
57 static nir_ssa_def *
58 build_invocation_id(nir_builder *b, struct state *state)
59 {
60 return bitfield_extract(b, state->header, 11, 31);
61 }
62
63 static nir_ssa_def *
64 build_vertex_id(nir_builder *b, struct state *state)
65 {
66 return bitfield_extract(b, state->header, 6, 31);
67 }
68
69 static nir_ssa_def *
70 build_local_primitive_id(nir_builder *b, struct state *state)
71 {
72 return bitfield_extract(b, state->header, 0, 63);
73 }
74
75 static nir_variable *
76 get_var(struct exec_list *list, int driver_location)
77 {
78 nir_foreach_variable (v, list) {
79 if (v->data.driver_location == driver_location) {
80 return v;
81 }
82 }
83
84 return NULL;
85 }
86
87 static nir_ssa_def *
88 build_local_offset(nir_builder *b, struct state *state,
89 nir_ssa_def *vertex, uint32_t base, nir_ssa_def *offset)
90 {
91 nir_ssa_def *primitive_stride = nir_load_vs_primitive_stride_ir3(b);
92 nir_ssa_def *primitive_offset =
93 nir_imul24(b, build_local_primitive_id(b, state), primitive_stride);
94 nir_ssa_def *attr_offset;
95 nir_ssa_def *vertex_stride;
96
97 switch (b->shader->info.stage) {
98 case MESA_SHADER_VERTEX:
99 case MESA_SHADER_TESS_EVAL:
100 vertex_stride = nir_imm_int(b, state->map.stride * 4);
101 attr_offset = nir_imm_int(b, state->map.loc[base] * 4);
102 break;
103 case MESA_SHADER_TESS_CTRL:
104 case MESA_SHADER_GEOMETRY:
105 vertex_stride = nir_load_vs_vertex_stride_ir3(b);
106 attr_offset = nir_load_primitive_location_ir3(b, base);
107 break;
108 default:
109 unreachable("bad shader stage");
110 }
111
112 nir_ssa_def *vertex_offset = nir_imul24(b, vertex, vertex_stride);
113
114 return nir_iadd(b, nir_iadd(b, primitive_offset, vertex_offset),
115 nir_iadd(b, attr_offset, offset));
116 }
117
118 static nir_intrinsic_instr *
119 replace_intrinsic(nir_builder *b, nir_intrinsic_instr *intr,
120 nir_intrinsic_op op, nir_ssa_def *src0, nir_ssa_def *src1, nir_ssa_def *src2)
121 {
122 nir_intrinsic_instr *new_intr =
123 nir_intrinsic_instr_create(b->shader, op);
124
125 new_intr->src[0] = nir_src_for_ssa(src0);
126 if (src1)
127 new_intr->src[1] = nir_src_for_ssa(src1);
128 if (src2)
129 new_intr->src[2] = nir_src_for_ssa(src2);
130
131 new_intr->num_components = intr->num_components;
132
133 if (nir_intrinsic_infos[op].has_dest)
134 nir_ssa_dest_init(&new_intr->instr, &new_intr->dest,
135 intr->num_components, 32, NULL);
136
137 nir_builder_instr_insert(b, &new_intr->instr);
138
139 if (nir_intrinsic_infos[op].has_dest)
140 nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(&new_intr->dest.ssa));
141
142 nir_instr_remove(&intr->instr);
143
144 return new_intr;
145 }
146
147 static void
148 build_primitive_map(nir_shader *shader, struct primitive_map *map, struct exec_list *list)
149 {
150 nir_foreach_variable (var, list) {
151 switch (var->data.location) {
152 case VARYING_SLOT_TESS_LEVEL_OUTER:
153 case VARYING_SLOT_TESS_LEVEL_INNER:
154 continue;
155 }
156
157 unsigned size = glsl_count_attribute_slots(var->type, false) * 4;
158
159 assert(var->data.driver_location < ARRAY_SIZE(map->size));
160 map->size[var->data.driver_location] =
161 MAX2(map->size[var->data.driver_location], size);
162 }
163
164 unsigned loc = 0;
165 for (uint32_t i = 0; i < ARRAY_SIZE(map->size); i++) {
166 if (map->size[i] == 0)
167 continue;
168 nir_variable *var = get_var(list, i);
169 map->loc[i] = loc;
170 loc += map->size[i];
171
172 if (var->data.patch)
173 map->size[i] = 0;
174 else
175 map->size[i] = map->size[i] / glsl_get_length(var->type);
176 }
177
178 map->stride = loc;
179 }
180
181 static void
182 lower_block_to_explicit_output(nir_block *block, nir_builder *b, struct state *state)
183 {
184 nir_foreach_instr_safe (instr, block) {
185 if (instr->type != nir_instr_type_intrinsic)
186 continue;
187
188 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
189
190 switch (intr->intrinsic) {
191 case nir_intrinsic_store_output: {
192 // src[] = { value, offset }.
193
194 b->cursor = nir_instr_remove(&intr->instr);
195
196 nir_ssa_def *vertex_id = build_vertex_id(b, state);
197 nir_ssa_def *offset = build_local_offset(b, state, vertex_id, nir_intrinsic_base(intr),
198 intr->src[1].ssa);
199 nir_intrinsic_instr *store =
200 nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_shared_ir3);
201
202 nir_intrinsic_set_write_mask(store, MASK(intr->num_components));
203 store->src[0] = nir_src_for_ssa(intr->src[0].ssa);
204 store->src[1] = nir_src_for_ssa(offset);
205
206 store->num_components = intr->num_components;
207
208 nir_builder_instr_insert(b, &store->instr);
209 break;
210 }
211
212 default:
213 break;
214 }
215 }
216 }
217
218 static nir_ssa_def *
219 local_thread_id(nir_builder *b)
220 {
221 return bitfield_extract(b, nir_load_gs_header_ir3(b), 16, 1023);
222 }
223
224 void
225 ir3_nir_lower_to_explicit_output(nir_shader *shader, struct ir3_shader *s, unsigned topology)
226 {
227 struct state state = { };
228
229 build_primitive_map(shader, &state.map, &shader->outputs);
230 memcpy(s->output_loc, state.map.loc, sizeof(s->output_loc));
231
232 nir_function_impl *impl = nir_shader_get_entrypoint(shader);
233 assert(impl);
234
235 nir_builder b;
236 nir_builder_init(&b, impl);
237 b.cursor = nir_before_cf_list(&impl->body);
238
239 if (s->type == MESA_SHADER_VERTEX && topology != IR3_TESS_NONE)
240 state.header = nir_load_tcs_header_ir3(&b);
241 else
242 state.header = nir_load_gs_header_ir3(&b);
243
244 nir_foreach_block_safe (block, impl)
245 lower_block_to_explicit_output(block, &b, &state);
246
247 nir_metadata_preserve(impl, nir_metadata_block_index |
248 nir_metadata_dominance);
249
250 s->output_size = state.map.stride;
251 }
252
253
254 static void
255 lower_block_to_explicit_input(nir_block *block, nir_builder *b, struct state *state)
256 {
257 nir_foreach_instr_safe (instr, block) {
258 if (instr->type != nir_instr_type_intrinsic)
259 continue;
260
261 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
262
263 switch (intr->intrinsic) {
264 case nir_intrinsic_load_per_vertex_input: {
265 // src[] = { vertex, offset }.
266
267 b->cursor = nir_before_instr(&intr->instr);
268
269 nir_ssa_def *offset = build_local_offset(b, state,
270 intr->src[0].ssa, // this is typically gl_InvocationID
271 nir_intrinsic_base(intr),
272 intr->src[1].ssa);
273
274 replace_intrinsic(b, intr, nir_intrinsic_load_shared_ir3, offset, NULL, NULL);
275 break;
276 }
277
278 case nir_intrinsic_load_invocation_id: {
279 b->cursor = nir_before_instr(&intr->instr);
280
281 nir_ssa_def *iid = build_invocation_id(b, state);
282 nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(iid));
283 nir_instr_remove(&intr->instr);
284 break;
285 }
286
287 default:
288 break;
289 }
290 }
291 }
292
293 void
294 ir3_nir_lower_to_explicit_input(nir_shader *shader)
295 {
296 struct state state = { };
297
298 nir_function_impl *impl = nir_shader_get_entrypoint(shader);
299 assert(impl);
300
301 nir_builder b;
302 nir_builder_init(&b, impl);
303 b.cursor = nir_before_cf_list(&impl->body);
304
305 if (shader->info.stage == MESA_SHADER_GEOMETRY)
306 state.header = nir_load_gs_header_ir3(&b);
307 else
308 state.header = nir_load_tcs_header_ir3(&b);
309
310 nir_foreach_block_safe (block, impl)
311 lower_block_to_explicit_input(block, &b, &state);
312 }
313
314
315 static nir_ssa_def *
316 build_per_vertex_offset(nir_builder *b, struct state *state,
317 nir_ssa_def *vertex, nir_ssa_def *offset, nir_variable *var)
318 {
319 nir_ssa_def *primitive_id = nir_load_primitive_id(b);
320 nir_ssa_def *patch_stride = nir_load_hs_patch_stride_ir3(b);
321 nir_ssa_def *patch_offset = nir_imul24(b, primitive_id, patch_stride);
322 nir_ssa_def *attr_offset;
323 int loc = var->data.driver_location;
324
325 switch (b->shader->info.stage) {
326 case MESA_SHADER_TESS_CTRL:
327 attr_offset = nir_imm_int(b, state->map.loc[loc]);
328 break;
329 case MESA_SHADER_TESS_EVAL:
330 attr_offset = nir_load_primitive_location_ir3(b, loc);
331 break;
332 default:
333 unreachable("bad shader state");
334 }
335
336 nir_ssa_def *attr_stride = nir_imm_int(b, state->map.size[loc]);
337 nir_ssa_def *vertex_offset = nir_imul24(b, vertex, attr_stride);
338
339 return nir_iadd(b, nir_iadd(b, patch_offset, attr_offset),
340 nir_iadd(b, vertex_offset, nir_ishl(b, offset, nir_imm_int(b, 2))));
341 }
342
343 static nir_ssa_def *
344 build_patch_offset(nir_builder *b, struct state *state, nir_ssa_def *offset, nir_variable *var)
345 {
346 debug_assert(var && var->data.patch);
347
348 return build_per_vertex_offset(b, state, nir_imm_int(b, 0), offset, var);
349 }
350
351 static nir_ssa_def *
352 build_tessfactor_base(nir_builder *b, gl_varying_slot slot, struct state *state)
353 {
354 uint32_t inner_levels, outer_levels;
355 switch (state->topology) {
356 case IR3_TESS_TRIANGLES:
357 inner_levels = 1;
358 outer_levels = 3;
359 break;
360 case IR3_TESS_QUADS:
361 inner_levels = 2;
362 outer_levels = 4;
363 break;
364 case IR3_TESS_ISOLINES:
365 inner_levels = 0;
366 outer_levels = 2;
367 break;
368 default:
369 unreachable("bad");
370 }
371
372 const uint32_t patch_stride = 1 + inner_levels + outer_levels;
373
374 nir_ssa_def *primitive_id = nir_load_primitive_id(b);
375
376 nir_ssa_def *patch_offset = nir_imul24(b, primitive_id, nir_imm_int(b, patch_stride));
377
378 uint32_t offset;
379 switch (slot) {
380 case VARYING_SLOT_TESS_LEVEL_OUTER:
381 /* There's some kind of header dword, tess levels start at index 1. */
382 offset = 1;
383 break;
384 case VARYING_SLOT_TESS_LEVEL_INNER:
385 offset = 1 + outer_levels;
386 break;
387 default:
388 unreachable("bad");
389 }
390
391 return nir_iadd(b, patch_offset, nir_imm_int(b, offset));
392 }
393
394 static void
395 lower_tess_ctrl_block(nir_block *block, nir_builder *b, struct state *state)
396 {
397 nir_foreach_instr_safe (instr, block) {
398 if (instr->type != nir_instr_type_intrinsic)
399 continue;
400
401 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
402
403 switch (intr->intrinsic) {
404 case nir_intrinsic_control_barrier:
405 case nir_intrinsic_memory_barrier_tcs_patch:
406 /* Hull shaders dispatch 32 wide so an entire patch will always
407 * fit in a single warp and execute in lock-step. Consequently,
408 * we don't need to do anything for TCS barriers so just remove
409 * the intrinsic. Otherwise we'll emit an actual barrier
410 * instructions, which will deadlock.
411 */
412 nir_instr_remove(&intr->instr);
413 break;
414
415 case nir_intrinsic_load_per_vertex_output: {
416 // src[] = { vertex, offset }.
417
418 b->cursor = nir_before_instr(&intr->instr);
419
420 nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
421 nir_variable *var = get_var(&b->shader->outputs, nir_intrinsic_base(intr));
422 nir_ssa_def *offset = build_per_vertex_offset(b, state,
423 intr->src[0].ssa, intr->src[1].ssa, var);
424
425 replace_intrinsic(b, intr, nir_intrinsic_load_global_ir3, address, offset, NULL);
426 break;
427 }
428
429 case nir_intrinsic_store_per_vertex_output: {
430 // src[] = { value, vertex, offset }.
431
432 b->cursor = nir_before_instr(&intr->instr);
433
434 nir_ssa_def *value = intr->src[0].ssa;
435 nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
436 nir_variable *var = get_var(&b->shader->outputs, nir_intrinsic_base(intr));
437 nir_ssa_def *offset = build_per_vertex_offset(b, state,
438 intr->src[1].ssa, intr->src[2].ssa, var);
439
440 nir_intrinsic_instr *store =
441 replace_intrinsic(b, intr, nir_intrinsic_store_global_ir3, value, address,
442 nir_iadd(b, offset, nir_imm_int(b, nir_intrinsic_component(intr))));
443
444 nir_intrinsic_set_write_mask(store, nir_intrinsic_write_mask(intr));
445
446 break;
447 }
448
449 case nir_intrinsic_load_tess_level_inner:
450 case nir_intrinsic_load_tess_level_outer: {
451 b->cursor = nir_before_instr(&intr->instr);
452
453 gl_varying_slot slot;
454 if (intr->intrinsic == nir_intrinsic_load_tess_level_inner)
455 slot = VARYING_SLOT_TESS_LEVEL_INNER;
456 else
457 slot = VARYING_SLOT_TESS_LEVEL_OUTER;
458
459 nir_ssa_def *address = nir_load_tess_factor_base_ir3(b);
460 nir_ssa_def *offset = build_tessfactor_base(b, slot, state);
461
462 replace_intrinsic(b, intr, nir_intrinsic_load_global_ir3, address, offset, NULL);
463 break;
464 }
465
466 case nir_intrinsic_load_output: {
467 // src[] = { offset }.
468
469 nir_variable *var = get_var(&b->shader->outputs, nir_intrinsic_base(intr));
470
471 b->cursor = nir_before_instr(&intr->instr);
472
473 nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
474 nir_ssa_def *offset = build_patch_offset(b, state, intr->src[0].ssa, var);
475
476 replace_intrinsic(b, intr, nir_intrinsic_load_global_ir3, address, offset, NULL);
477 break;
478 }
479
480 case nir_intrinsic_store_output: {
481 // src[] = { value, offset }.
482
483 /* write patch output to bo */
484
485 nir_variable *var = get_var(&b->shader->outputs, nir_intrinsic_base(intr));
486
487 nir_ssa_def **levels = NULL;
488 if (var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER)
489 levels = state->outer_levels;
490 else if (var->data.location == VARYING_SLOT_TESS_LEVEL_INNER)
491 levels = state->inner_levels;
492
493 b->cursor = nir_before_instr(&intr->instr);
494
495 if (levels) {
496 for (int i = 0; i < 4; i++)
497 if (nir_intrinsic_write_mask(intr) & (1 << i))
498 levels[i] = nir_channel(b, intr->src[0].ssa, i);
499 nir_instr_remove(&intr->instr);
500 } else {
501 nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
502 nir_ssa_def *offset = build_patch_offset(b, state, intr->src[1].ssa, var);
503
504 debug_assert(nir_intrinsic_component(intr) == 0);
505
506 nir_intrinsic_instr *store =
507 replace_intrinsic(b, intr, nir_intrinsic_store_global_ir3,
508 intr->src[0].ssa, address, offset);
509
510 nir_intrinsic_set_write_mask(store, nir_intrinsic_write_mask(intr));
511 }
512 break;
513 }
514
515 default:
516 break;
517 }
518 }
519 }
520
521 static void
522 emit_tess_epilouge(nir_builder *b, struct state *state)
523 {
524 nir_ssa_def *tessfactor_address = nir_load_tess_factor_base_ir3(b);
525 nir_ssa_def *levels[2];
526
527 if (!state->outer_levels[0])
528 return;
529
530 /* Then emit the epilogue that actually writes out the tessellation levels
531 * to the BOs.
532 */
533 switch (state->topology) {
534 case IR3_TESS_TRIANGLES:
535 levels[0] = nir_vec4(b, state->outer_levels[0], state->outer_levels[1],
536 state->outer_levels[2], state->inner_levels[0]);
537 levels[1] = NULL;
538 break;
539 case IR3_TESS_QUADS:
540 levels[0] = nir_vec4(b, state->outer_levels[0], state->outer_levels[1],
541 state->outer_levels[2], state->outer_levels[3]);
542 levels[1] = nir_vec2(b, state->inner_levels[0], state->inner_levels[1]);
543 break;
544 case IR3_TESS_ISOLINES:
545 levels[0] = nir_vec2(b, state->outer_levels[0], state->outer_levels[1]);
546 levels[1] = NULL;
547 break;
548 default:
549 unreachable("nope");
550 }
551
552 nir_ssa_def *offset = build_tessfactor_base(b, VARYING_SLOT_TESS_LEVEL_OUTER, state);
553
554 nir_intrinsic_instr *store =
555 nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_global_ir3);
556
557 store->src[0] = nir_src_for_ssa(levels[0]);
558 store->src[1] = nir_src_for_ssa(tessfactor_address);
559 store->src[2] = nir_src_for_ssa(offset);
560 nir_builder_instr_insert(b, &store->instr);
561 store->num_components = levels[0]->num_components;
562 nir_intrinsic_set_write_mask(store, (1 << levels[0]->num_components) - 1);
563
564 if (levels[1]) {
565 store = nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_global_ir3);
566 offset = nir_iadd(b, offset, nir_imm_int(b, levels[0]->num_components));
567
568 store->src[0] = nir_src_for_ssa(levels[1]);
569 store->src[1] = nir_src_for_ssa(tessfactor_address);
570 store->src[2] = nir_src_for_ssa(offset);
571 nir_builder_instr_insert(b, &store->instr);
572 store->num_components = levels[1]->num_components;
573 nir_intrinsic_set_write_mask(store, (1 << levels[1]->num_components) - 1);
574 }
575
576 /* Finally, Insert endpatch instruction:
577 *
578 * TODO we should re-work this to use normal flow control.
579 */
580
581 nir_intrinsic_instr *end_patch =
582 nir_intrinsic_instr_create(b->shader, nir_intrinsic_end_patch_ir3);
583 nir_builder_instr_insert(b, &end_patch->instr);
584 }
585
586 void
587 ir3_nir_lower_tess_ctrl(nir_shader *shader, struct ir3_shader *s, unsigned topology)
588 {
589 struct state state = { .topology = topology };
590
591 if (shader_debug_enabled(shader->info.stage)) {
592 fprintf(stderr, "NIR (before tess lowering) for %s shader:\n",
593 _mesa_shader_stage_to_string(shader->info.stage));
594 nir_print_shader(shader, stderr);
595 }
596
597 build_primitive_map(shader, &state.map, &shader->outputs);
598 memcpy(s->output_loc, state.map.loc, sizeof(s->output_loc));
599 s->output_size = state.map.stride;
600
601 nir_function_impl *impl = nir_shader_get_entrypoint(shader);
602 assert(impl);
603
604 nir_builder b;
605 nir_builder_init(&b, impl);
606 b.cursor = nir_before_cf_list(&impl->body);
607
608 state.header = nir_load_tcs_header_ir3(&b);
609
610 nir_foreach_block_safe (block, impl)
611 lower_tess_ctrl_block(block, &b, &state);
612
613 /* Now move the body of the TCS into a conditional:
614 *
615 * if (gl_InvocationID < num_vertices)
616 * // body
617 *
618 */
619
620 nir_cf_list body;
621 nir_cf_extract(&body, nir_before_cf_list(&impl->body),
622 nir_after_cf_list(&impl->body));
623
624 b.cursor = nir_after_cf_list(&impl->body);
625
626 /* Re-emit the header, since the old one got moved into the if branch */
627 state.header = nir_load_tcs_header_ir3(&b);
628 nir_ssa_def *iid = build_invocation_id(&b, &state);
629
630 const uint32_t nvertices = shader->info.tess.tcs_vertices_out;
631 nir_ssa_def *cond = nir_ult(&b, iid, nir_imm_int(&b, nvertices));
632
633 nir_if *nif = nir_push_if(&b, cond);
634
635 nir_cf_reinsert(&body, b.cursor);
636
637 b.cursor = nir_after_cf_list(&nif->then_list);
638
639 /* Insert conditional exit for threads invocation id != 0 */
640 nir_ssa_def *iid0_cond = nir_ieq(&b, iid, nir_imm_int(&b, 0));
641 nir_intrinsic_instr *cond_end =
642 nir_intrinsic_instr_create(shader, nir_intrinsic_cond_end_ir3);
643 cond_end->src[0] = nir_src_for_ssa(iid0_cond);
644 nir_builder_instr_insert(&b, &cond_end->instr);
645
646 emit_tess_epilouge(&b, &state);
647
648 nir_pop_if(&b, nif);
649
650 nir_metadata_preserve(impl, 0);
651 }
652
653
654 static void
655 lower_tess_eval_block(nir_block *block, nir_builder *b, struct state *state)
656 {
657 nir_foreach_instr_safe (instr, block) {
658 if (instr->type != nir_instr_type_intrinsic)
659 continue;
660
661 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
662
663 switch (intr->intrinsic) {
664 case nir_intrinsic_load_tess_coord: {
665 b->cursor = nir_after_instr(&intr->instr);
666 nir_ssa_def *x = nir_channel(b, &intr->dest.ssa, 0);
667 nir_ssa_def *y = nir_channel(b, &intr->dest.ssa, 1);
668 nir_ssa_def *z;
669
670 if (state->topology == IR3_TESS_TRIANGLES)
671 z = nir_fsub(b, nir_fsub(b, nir_imm_float(b, 1.0f), y), x);
672 else
673 z = nir_imm_float(b, 0.0f);
674
675 nir_ssa_def *coord = nir_vec3(b, x, y, z);
676
677 nir_ssa_def_rewrite_uses_after(&intr->dest.ssa,
678 nir_src_for_ssa(coord),
679 b->cursor.instr);
680 break;
681 }
682
683 case nir_intrinsic_load_per_vertex_input: {
684 // src[] = { vertex, offset }.
685
686 b->cursor = nir_before_instr(&intr->instr);
687
688 nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
689 nir_variable *var = get_var(&b->shader->inputs, nir_intrinsic_base(intr));
690 nir_ssa_def *offset = build_per_vertex_offset(b, state,
691 intr->src[0].ssa, intr->src[1].ssa, var);
692
693 replace_intrinsic(b, intr, nir_intrinsic_load_global_ir3, address, offset, NULL);
694 break;
695 }
696
697 case nir_intrinsic_load_tess_level_inner:
698 case nir_intrinsic_load_tess_level_outer: {
699 b->cursor = nir_before_instr(&intr->instr);
700
701 gl_varying_slot slot;
702 if (intr->intrinsic == nir_intrinsic_load_tess_level_inner)
703 slot = VARYING_SLOT_TESS_LEVEL_INNER;
704 else
705 slot = VARYING_SLOT_TESS_LEVEL_OUTER;
706
707 nir_ssa_def *address = nir_load_tess_factor_base_ir3(b);
708 nir_ssa_def *offset = build_tessfactor_base(b, slot, state);
709
710 /* Loading across a vec4 (16b) memory boundary is problematic
711 * if we don't use components from the second vec4. The tess
712 * levels aren't guaranteed to be vec4 aligned and we don't
713 * know which levels are actually used, so we load each
714 * component individually.
715 */
716 nir_ssa_def *levels[4];
717 for (unsigned i = 0; i < intr->num_components; i++) {
718 nir_intrinsic_instr *new_intr =
719 nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_global_ir3);
720
721 new_intr->src[0] = nir_src_for_ssa(address);
722 new_intr->src[1] = nir_src_for_ssa(nir_iadd(b, offset, nir_imm_int(b, i)));
723 new_intr->num_components = 1;
724 nir_ssa_dest_init(&new_intr->instr, &new_intr->dest, 1, 32, NULL);
725 nir_builder_instr_insert(b, &new_intr->instr);
726 levels[i] = &new_intr->dest.ssa;
727 }
728
729 nir_ssa_def *v = nir_vec(b, levels, intr->num_components);
730
731 nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(v));
732
733 nir_instr_remove(&intr->instr);
734 break;
735 }
736
737 case nir_intrinsic_load_input: {
738 // src[] = { offset }.
739
740 nir_variable *var = get_var(&b->shader->inputs, nir_intrinsic_base(intr));
741
742 debug_assert(var->data.patch);
743
744 b->cursor = nir_before_instr(&intr->instr);
745
746 nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
747 nir_ssa_def *offset = build_patch_offset(b, state, intr->src[0].ssa, var);
748
749 replace_intrinsic(b, intr, nir_intrinsic_load_global_ir3, address, offset, NULL);
750 break;
751 }
752
753 default:
754 break;
755 }
756 }
757 }
758
759 void
760 ir3_nir_lower_tess_eval(nir_shader *shader, unsigned topology)
761 {
762 struct state state = { .topology = topology };
763
764 if (shader_debug_enabled(shader->info.stage)) {
765 fprintf(stderr, "NIR (before tess lowering) for %s shader:\n",
766 _mesa_shader_stage_to_string(shader->info.stage));
767 nir_print_shader(shader, stderr);
768 }
769
770 /* Build map of inputs so we have the sizes. */
771 build_primitive_map(shader, &state.map, &shader->inputs);
772
773 nir_function_impl *impl = nir_shader_get_entrypoint(shader);
774 assert(impl);
775
776 nir_builder b;
777 nir_builder_init(&b, impl);
778
779 nir_foreach_block_safe (block, impl)
780 lower_tess_eval_block(block, &b, &state);
781
782 nir_metadata_preserve(impl, 0);
783 }
784
785 static void
786 lower_gs_block(nir_block *block, nir_builder *b, struct state *state)
787 {
788 nir_foreach_instr_safe (instr, block) {
789 if (instr->type != nir_instr_type_intrinsic)
790 continue;
791
792 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
793
794 switch (intr->intrinsic) {
795 case nir_intrinsic_end_primitive: {
796 b->cursor = nir_before_instr(&intr->instr);
797 nir_store_var(b, state->vertex_flags_out, nir_imm_int(b, 4), 0x1);
798 nir_instr_remove(&intr->instr);
799 break;
800 }
801
802 case nir_intrinsic_emit_vertex: {
803 /* Load the vertex count */
804 b->cursor = nir_before_instr(&intr->instr);
805 nir_ssa_def *count = nir_load_var(b, state->vertex_count_var);
806
807 nir_push_if(b, nir_ieq(b, count, local_thread_id(b)));
808
809 foreach_two_lists(dest_node, &state->emit_outputs, src_node, &state->old_outputs) {
810 nir_variable *dest = exec_node_data(nir_variable, dest_node, node);
811 nir_variable *src = exec_node_data(nir_variable, src_node, node);
812 nir_copy_var(b, dest, src);
813 }
814
815 nir_instr_remove(&intr->instr);
816
817 nir_store_var(b, state->emitted_vertex_var,
818 nir_iadd(b, nir_load_var(b, state->emitted_vertex_var), nir_imm_int(b, 1)), 0x1);
819
820 nir_pop_if(b, NULL);
821
822 /* Increment the vertex count by 1 */
823 nir_store_var(b, state->vertex_count_var,
824 nir_iadd(b, count, nir_imm_int(b, 1)), 0x1); /* .x */
825 nir_store_var(b, state->vertex_flags_out, nir_imm_int(b, 0), 0x1);
826
827 break;
828 }
829
830 default:
831 break;
832 }
833 }
834 }
835
836 void
837 ir3_nir_lower_gs(nir_shader *shader)
838 {
839 struct state state = { };
840
841 if (shader_debug_enabled(shader->info.stage)) {
842 fprintf(stderr, "NIR (before gs lowering):\n");
843 nir_print_shader(shader, stderr);
844 }
845
846 build_primitive_map(shader, &state.map, &shader->inputs);
847
848 /* Create an output var for vertex_flags. This will be shadowed below,
849 * same way regular outputs get shadowed, and this variable will become a
850 * temporary.
851 */
852 state.vertex_flags_out = nir_variable_create(shader, nir_var_shader_out,
853 glsl_uint_type(), "vertex_flags");
854 state.vertex_flags_out->data.driver_location = shader->num_outputs++;
855 state.vertex_flags_out->data.location = VARYING_SLOT_GS_VERTEX_FLAGS_IR3;
856 state.vertex_flags_out->data.interpolation = INTERP_MODE_NONE;
857
858 nir_function_impl *impl = nir_shader_get_entrypoint(shader);
859 assert(impl);
860
861 nir_builder b;
862 nir_builder_init(&b, impl);
863 b.cursor = nir_before_cf_list(&impl->body);
864
865 state.header = nir_load_gs_header_ir3(&b);
866
867 /* Generate two set of shadow vars for the output variables. The first
868 * set replaces the real outputs and the second set (emit_outputs) we'll
869 * assign in the emit_vertex conditionals. Then at the end of the shader
870 * we copy the emit_outputs to the real outputs, so that we get
871 * store_output in uniform control flow.
872 */
873 exec_list_move_nodes_to(&shader->outputs, &state.old_outputs);
874 exec_list_make_empty(&state.emit_outputs);
875 nir_foreach_variable(var, &state.old_outputs) {
876 /* Create a new output var by cloning the original output var and
877 * stealing the name.
878 */
879 nir_variable *output = nir_variable_clone(var, shader);
880 exec_list_push_tail(&shader->outputs, &output->node);
881
882 /* Rewrite the original output to be a shadow variable. */
883 var->name = ralloc_asprintf(var, "%s@gs-temp", output->name);
884 var->data.mode = nir_var_shader_temp;
885
886 /* Clone the shadow variable to create the emit shadow variable that
887 * we'll assign in the emit conditionals.
888 */
889 nir_variable *emit_output = nir_variable_clone(var, shader);
890 emit_output->name = ralloc_asprintf(var, "%s@emit-temp", output->name);
891 exec_list_push_tail(&state.emit_outputs, &emit_output->node);
892 }
893
894 /* During the shader we'll keep track of which vertex we're currently
895 * emitting for the EmitVertex test and how many vertices we emitted so we
896 * know to discard if didn't emit any. In most simple shaders, this can
897 * all be statically determined and gets optimized away.
898 */
899 state.vertex_count_var =
900 nir_local_variable_create(impl, glsl_uint_type(), "vertex_count");
901 state.emitted_vertex_var =
902 nir_local_variable_create(impl, glsl_uint_type(), "emitted_vertex");
903
904 /* Initialize to 0. */
905 b.cursor = nir_before_cf_list(&impl->body);
906 nir_store_var(&b, state.vertex_count_var, nir_imm_int(&b, 0), 0x1);
907 nir_store_var(&b, state.emitted_vertex_var, nir_imm_int(&b, 0), 0x1);
908 nir_store_var(&b, state.vertex_flags_out, nir_imm_int(&b, 4), 0x1);
909
910 nir_foreach_block_safe (block, impl)
911 lower_gs_block(block, &b, &state);
912
913 set_foreach(impl->end_block->predecessors, block_entry) {
914 struct nir_block *block = (void *)block_entry->key;
915 b.cursor = nir_after_block_before_jump(block);
916
917 nir_intrinsic_instr *discard_if =
918 nir_intrinsic_instr_create(b.shader, nir_intrinsic_discard_if);
919
920 nir_ssa_def *cond = nir_ieq(&b, nir_load_var(&b, state.emitted_vertex_var), nir_imm_int(&b, 0));
921
922 discard_if->src[0] = nir_src_for_ssa(cond);
923
924 nir_builder_instr_insert(&b, &discard_if->instr);
925
926 foreach_two_lists(dest_node, &shader->outputs, src_node, &state.emit_outputs) {
927 nir_variable *dest = exec_node_data(nir_variable, dest_node, node);
928 nir_variable *src = exec_node_data(nir_variable, src_node, node);
929 nir_copy_var(&b, dest, src);
930 }
931 }
932
933 exec_list_append(&shader->globals, &state.old_outputs);
934 exec_list_append(&shader->globals, &state.emit_outputs);
935
936 nir_metadata_preserve(impl, 0);
937
938 nir_lower_global_vars_to_local(shader);
939 nir_split_var_copies(shader);
940 nir_lower_var_copies(shader);
941
942 nir_fixup_deref_modes(shader);
943
944 if (shader_debug_enabled(shader->info.stage)) {
945 fprintf(stderr, "NIR (after gs lowering):\n");
946 nir_print_shader(shader, stderr);
947 }
948 }
949
950 uint32_t
951 ir3_link_geometry_stages(const struct ir3_shader_variant *producer,
952 const struct ir3_shader_variant *consumer,
953 uint32_t *locs)
954 {
955 uint32_t num_loc = 0, factor;
956
957 switch (consumer->type) {
958 case MESA_SHADER_TESS_CTRL:
959 case MESA_SHADER_GEOMETRY:
960 /* These stages load with ldlw, which expects byte offsets. */
961 factor = 4;
962 break;
963 case MESA_SHADER_TESS_EVAL:
964 /* The tess eval shader uses ldg, which takes dword offsets. */
965 factor = 1;
966 break;
967 default:
968 unreachable("bad shader stage");
969 }
970
971 nir_foreach_variable(in_var, &consumer->shader->nir->inputs) {
972 nir_foreach_variable(out_var, &producer->shader->nir->outputs) {
973 if (in_var->data.location == out_var->data.location) {
974 locs[in_var->data.driver_location] =
975 producer->shader->output_loc[out_var->data.driver_location] * factor;
976
977 debug_assert(num_loc <= in_var->data.driver_location + 1);
978 num_loc = in_var->data.driver_location + 1;
979 }
980 }
981 }
982
983 return num_loc;
984 }