freedreno/ir3: add generic get_barycentric()
[mesa.git] / src / freedreno / ir3 / ir3_nir_lower_tess.c
1 /*
2 * Copyright © 2019 Google, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include "ir3_nir.h"
25 #include "ir3_compiler.h"
26 #include "compiler/nir/nir_builder.h"
27
28 struct state {
29 uint32_t topology;
30
31 struct primitive_map {
32 unsigned loc[32];
33 unsigned size[32];
34 unsigned stride;
35 } map;
36
37 nir_ssa_def *header;
38
39 nir_variable *vertex_count_var;
40 nir_variable *emitted_vertex_var;
41 nir_variable *vertex_flags_out;
42
43 struct exec_list old_outputs;
44 struct exec_list emit_outputs;
45
46 nir_ssa_def *outer_levels[4];
47 nir_ssa_def *inner_levels[2];
48 };
49
50 static nir_ssa_def *
51 bitfield_extract(nir_builder *b, nir_ssa_def *v, uint32_t start, uint32_t mask)
52 {
53 return nir_iand(b, nir_ushr(b, v, nir_imm_int(b, start)),
54 nir_imm_int(b, mask));
55 }
56
57 static nir_ssa_def *
58 build_invocation_id(nir_builder *b, struct state *state)
59 {
60 return bitfield_extract(b, state->header, 11, 31);
61 }
62
63 static nir_ssa_def *
64 build_vertex_id(nir_builder *b, struct state *state)
65 {
66 return bitfield_extract(b, state->header, 6, 31);
67 }
68
69 static nir_ssa_def *
70 build_local_primitive_id(nir_builder *b, struct state *state)
71 {
72 return bitfield_extract(b, state->header, 0, 63);
73 }
74
75 static nir_variable *
76 get_var(struct exec_list *list, int driver_location)
77 {
78 nir_foreach_variable (v, list) {
79 if (v->data.driver_location == driver_location) {
80 return v;
81 }
82 }
83
84 return NULL;
85 }
86
87 static nir_ssa_def *
88 build_local_offset(nir_builder *b, struct state *state,
89 nir_ssa_def *vertex, uint32_t base, nir_ssa_def *offset)
90 {
91 nir_ssa_def *primitive_stride = nir_load_vs_primitive_stride_ir3(b);
92 nir_ssa_def *primitive_offset =
93 nir_imul24(b, build_local_primitive_id(b, state), primitive_stride);
94 nir_ssa_def *attr_offset;
95 nir_ssa_def *vertex_stride;
96
97 switch (b->shader->info.stage) {
98 case MESA_SHADER_VERTEX:
99 case MESA_SHADER_TESS_EVAL:
100 vertex_stride = nir_imm_int(b, state->map.stride * 4);
101 attr_offset = nir_imm_int(b, state->map.loc[base] * 4);
102 break;
103 case MESA_SHADER_TESS_CTRL:
104 case MESA_SHADER_GEOMETRY:
105 vertex_stride = nir_load_vs_vertex_stride_ir3(b);
106 attr_offset = nir_load_primitive_location_ir3(b, base);
107 break;
108 default:
109 unreachable("bad shader stage");
110 }
111
112 nir_ssa_def *vertex_offset = nir_imul24(b, vertex, vertex_stride);
113
114 return nir_iadd(b, nir_iadd(b, primitive_offset, vertex_offset),
115 nir_iadd(b, attr_offset, offset));
116 }
117
118 static nir_intrinsic_instr *
119 replace_intrinsic(nir_builder *b, nir_intrinsic_instr *intr,
120 nir_intrinsic_op op, nir_ssa_def *src0, nir_ssa_def *src1, nir_ssa_def *src2)
121 {
122 nir_intrinsic_instr *new_intr =
123 nir_intrinsic_instr_create(b->shader, op);
124
125 new_intr->src[0] = nir_src_for_ssa(src0);
126 if (src1)
127 new_intr->src[1] = nir_src_for_ssa(src1);
128 if (src2)
129 new_intr->src[2] = nir_src_for_ssa(src2);
130
131 new_intr->num_components = intr->num_components;
132
133 if (nir_intrinsic_infos[op].has_dest)
134 nir_ssa_dest_init(&new_intr->instr, &new_intr->dest,
135 intr->num_components, 32, NULL);
136
137 nir_builder_instr_insert(b, &new_intr->instr);
138
139 if (nir_intrinsic_infos[op].has_dest)
140 nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(&new_intr->dest.ssa));
141
142 nir_instr_remove(&intr->instr);
143
144 return new_intr;
145 }
146
147 static void
148 build_primitive_map(nir_shader *shader, struct primitive_map *map, struct exec_list *list)
149 {
150 nir_foreach_variable (var, list) {
151 switch (var->data.location) {
152 case VARYING_SLOT_TESS_LEVEL_OUTER:
153 case VARYING_SLOT_TESS_LEVEL_INNER:
154 continue;
155 }
156
157 unsigned size = glsl_count_attribute_slots(var->type, false) * 4;
158
159 assert(var->data.driver_location < ARRAY_SIZE(map->size));
160 map->size[var->data.driver_location] =
161 MAX2(map->size[var->data.driver_location], size);
162 }
163
164 unsigned loc = 0;
165 for (uint32_t i = 0; i < ARRAY_SIZE(map->size); i++) {
166 if (map->size[i] == 0)
167 continue;
168 nir_variable *var = get_var(list, i);
169 map->loc[i] = loc;
170 loc += map->size[i];
171
172 if (var->data.patch)
173 map->size[i] = 0;
174 else
175 map->size[i] = map->size[i] / glsl_get_length(var->type);
176 }
177
178 map->stride = loc;
179 }
180
181 static void
182 lower_block_to_explicit_output(nir_block *block, nir_builder *b, struct state *state)
183 {
184 nir_foreach_instr_safe (instr, block) {
185 if (instr->type != nir_instr_type_intrinsic)
186 continue;
187
188 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
189
190 switch (intr->intrinsic) {
191 case nir_intrinsic_store_output: {
192 // src[] = { value, offset }.
193
194 /* nir_lower_io_to_temporaries replaces all access to output
195 * variables with temp variables and then emits a nir_copy_var at
196 * the end of the shader. Thus, we should always get a full wrmask
197 * here.
198 */
199 assert(util_is_power_of_two_nonzero(nir_intrinsic_write_mask(intr) + 1));
200
201 b->cursor = nir_instr_remove(&intr->instr);
202
203 nir_ssa_def *vertex_id = build_vertex_id(b, state);
204 nir_ssa_def *offset = build_local_offset(b, state, vertex_id, nir_intrinsic_base(intr),
205 intr->src[1].ssa);
206 nir_intrinsic_instr *store =
207 nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_shared_ir3);
208
209 store->src[0] = nir_src_for_ssa(intr->src[0].ssa);
210 store->src[1] = nir_src_for_ssa(offset);
211 store->num_components = intr->num_components;
212
213 nir_builder_instr_insert(b, &store->instr);
214 break;
215 }
216
217 default:
218 break;
219 }
220 }
221 }
222
223 static nir_ssa_def *
224 local_thread_id(nir_builder *b)
225 {
226 return bitfield_extract(b, nir_load_gs_header_ir3(b), 16, 1023);
227 }
228
229 void
230 ir3_nir_lower_to_explicit_output(nir_shader *shader, struct ir3_shader_variant *v,
231 unsigned topology)
232 {
233 struct state state = { };
234
235 build_primitive_map(shader, &state.map, &shader->outputs);
236 memcpy(v->output_loc, state.map.loc, sizeof(v->output_loc));
237
238 nir_function_impl *impl = nir_shader_get_entrypoint(shader);
239 assert(impl);
240
241 nir_builder b;
242 nir_builder_init(&b, impl);
243 b.cursor = nir_before_cf_list(&impl->body);
244
245 if (v->type == MESA_SHADER_VERTEX && topology != IR3_TESS_NONE)
246 state.header = nir_load_tcs_header_ir3(&b);
247 else
248 state.header = nir_load_gs_header_ir3(&b);
249
250 nir_foreach_block_safe (block, impl)
251 lower_block_to_explicit_output(block, &b, &state);
252
253 nir_metadata_preserve(impl, nir_metadata_block_index |
254 nir_metadata_dominance);
255
256 v->output_size = state.map.stride;
257 }
258
259
260 static void
261 lower_block_to_explicit_input(nir_block *block, nir_builder *b, struct state *state)
262 {
263 nir_foreach_instr_safe (instr, block) {
264 if (instr->type != nir_instr_type_intrinsic)
265 continue;
266
267 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
268
269 switch (intr->intrinsic) {
270 case nir_intrinsic_load_per_vertex_input: {
271 // src[] = { vertex, offset }.
272
273 b->cursor = nir_before_instr(&intr->instr);
274
275 nir_ssa_def *offset = build_local_offset(b, state,
276 intr->src[0].ssa, // this is typically gl_InvocationID
277 nir_intrinsic_base(intr),
278 intr->src[1].ssa);
279
280 replace_intrinsic(b, intr, nir_intrinsic_load_shared_ir3, offset, NULL, NULL);
281 break;
282 }
283
284 case nir_intrinsic_load_invocation_id: {
285 b->cursor = nir_before_instr(&intr->instr);
286
287 nir_ssa_def *iid = build_invocation_id(b, state);
288 nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(iid));
289 nir_instr_remove(&intr->instr);
290 break;
291 }
292
293 default:
294 break;
295 }
296 }
297 }
298
299 void
300 ir3_nir_lower_to_explicit_input(nir_shader *shader)
301 {
302 struct state state = { };
303
304 nir_function_impl *impl = nir_shader_get_entrypoint(shader);
305 assert(impl);
306
307 nir_builder b;
308 nir_builder_init(&b, impl);
309 b.cursor = nir_before_cf_list(&impl->body);
310
311 if (shader->info.stage == MESA_SHADER_GEOMETRY)
312 state.header = nir_load_gs_header_ir3(&b);
313 else
314 state.header = nir_load_tcs_header_ir3(&b);
315
316 nir_foreach_block_safe (block, impl)
317 lower_block_to_explicit_input(block, &b, &state);
318 }
319
320
321 static nir_ssa_def *
322 build_per_vertex_offset(nir_builder *b, struct state *state,
323 nir_ssa_def *vertex, nir_ssa_def *offset, nir_variable *var)
324 {
325 nir_ssa_def *primitive_id = nir_load_primitive_id(b);
326 nir_ssa_def *patch_stride = nir_load_hs_patch_stride_ir3(b);
327 nir_ssa_def *patch_offset = nir_imul24(b, primitive_id, patch_stride);
328 nir_ssa_def *attr_offset;
329 int loc = var->data.driver_location;
330
331 switch (b->shader->info.stage) {
332 case MESA_SHADER_TESS_CTRL:
333 attr_offset = nir_imm_int(b, state->map.loc[loc]);
334 break;
335 case MESA_SHADER_TESS_EVAL:
336 attr_offset = nir_load_primitive_location_ir3(b, loc);
337 break;
338 default:
339 unreachable("bad shader state");
340 }
341
342 nir_ssa_def *attr_stride = nir_imm_int(b, state->map.size[loc]);
343 nir_ssa_def *vertex_offset = nir_imul24(b, vertex, attr_stride);
344
345 return nir_iadd(b, nir_iadd(b, patch_offset, attr_offset),
346 nir_iadd(b, vertex_offset, nir_ishl(b, offset, nir_imm_int(b, 2))));
347 }
348
349 static nir_ssa_def *
350 build_patch_offset(nir_builder *b, struct state *state, nir_ssa_def *offset, nir_variable *var)
351 {
352 debug_assert(var && var->data.patch);
353
354 return build_per_vertex_offset(b, state, nir_imm_int(b, 0), offset, var);
355 }
356
357 static nir_ssa_def *
358 build_tessfactor_base(nir_builder *b, gl_varying_slot slot, struct state *state)
359 {
360 uint32_t inner_levels, outer_levels;
361 switch (state->topology) {
362 case IR3_TESS_TRIANGLES:
363 inner_levels = 1;
364 outer_levels = 3;
365 break;
366 case IR3_TESS_QUADS:
367 inner_levels = 2;
368 outer_levels = 4;
369 break;
370 case IR3_TESS_ISOLINES:
371 inner_levels = 0;
372 outer_levels = 2;
373 break;
374 default:
375 unreachable("bad");
376 }
377
378 const uint32_t patch_stride = 1 + inner_levels + outer_levels;
379
380 nir_ssa_def *primitive_id = nir_load_primitive_id(b);
381
382 nir_ssa_def *patch_offset = nir_imul24(b, primitive_id, nir_imm_int(b, patch_stride));
383
384 uint32_t offset;
385 switch (slot) {
386 case VARYING_SLOT_TESS_LEVEL_OUTER:
387 /* There's some kind of header dword, tess levels start at index 1. */
388 offset = 1;
389 break;
390 case VARYING_SLOT_TESS_LEVEL_INNER:
391 offset = 1 + outer_levels;
392 break;
393 default:
394 unreachable("bad");
395 }
396
397 return nir_iadd(b, patch_offset, nir_imm_int(b, offset));
398 }
399
400 static void
401 lower_tess_ctrl_block(nir_block *block, nir_builder *b, struct state *state)
402 {
403 nir_foreach_instr_safe (instr, block) {
404 if (instr->type != nir_instr_type_intrinsic)
405 continue;
406
407 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
408
409 switch (intr->intrinsic) {
410 case nir_intrinsic_control_barrier:
411 case nir_intrinsic_memory_barrier_tcs_patch:
412 /* Hull shaders dispatch 32 wide so an entire patch will always
413 * fit in a single warp and execute in lock-step. Consequently,
414 * we don't need to do anything for TCS barriers so just remove
415 * the intrinsic. Otherwise we'll emit an actual barrier
416 * instructions, which will deadlock.
417 */
418 nir_instr_remove(&intr->instr);
419 break;
420
421 case nir_intrinsic_load_per_vertex_output: {
422 // src[] = { vertex, offset }.
423
424 b->cursor = nir_before_instr(&intr->instr);
425
426 nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
427 nir_variable *var = get_var(&b->shader->outputs, nir_intrinsic_base(intr));
428 nir_ssa_def *offset = build_per_vertex_offset(b, state,
429 intr->src[0].ssa, intr->src[1].ssa, var);
430
431 replace_intrinsic(b, intr, nir_intrinsic_load_global_ir3, address, offset, NULL);
432 break;
433 }
434
435 case nir_intrinsic_store_per_vertex_output: {
436 // src[] = { value, vertex, offset }.
437
438 b->cursor = nir_before_instr(&intr->instr);
439
440 /* nir_lower_io_to_temporaries replaces all access to output
441 * variables with temp variables and then emits a nir_copy_var at
442 * the end of the shader. Thus, we should always get a full wrmask
443 * here.
444 */
445 assert(util_is_power_of_two_nonzero(nir_intrinsic_write_mask(intr) + 1));
446
447 nir_ssa_def *value = intr->src[0].ssa;
448 nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
449 nir_variable *var = get_var(&b->shader->outputs, nir_intrinsic_base(intr));
450 nir_ssa_def *offset = build_per_vertex_offset(b, state,
451 intr->src[1].ssa, intr->src[2].ssa, var);
452
453 replace_intrinsic(b, intr, nir_intrinsic_store_global_ir3, value, address,
454 nir_iadd(b, offset, nir_imm_int(b, nir_intrinsic_component(intr))));
455
456 break;
457 }
458
459 case nir_intrinsic_load_tess_level_inner:
460 case nir_intrinsic_load_tess_level_outer: {
461 b->cursor = nir_before_instr(&intr->instr);
462
463 gl_varying_slot slot;
464 if (intr->intrinsic == nir_intrinsic_load_tess_level_inner)
465 slot = VARYING_SLOT_TESS_LEVEL_INNER;
466 else
467 slot = VARYING_SLOT_TESS_LEVEL_OUTER;
468
469 nir_ssa_def *address = nir_load_tess_factor_base_ir3(b);
470 nir_ssa_def *offset = build_tessfactor_base(b, slot, state);
471
472 replace_intrinsic(b, intr, nir_intrinsic_load_global_ir3, address, offset, NULL);
473 break;
474 }
475
476 case nir_intrinsic_load_output: {
477 // src[] = { offset }.
478
479 nir_variable *var = get_var(&b->shader->outputs, nir_intrinsic_base(intr));
480
481 b->cursor = nir_before_instr(&intr->instr);
482
483 nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
484 nir_ssa_def *offset = build_patch_offset(b, state, intr->src[0].ssa, var);
485
486 replace_intrinsic(b, intr, nir_intrinsic_load_global_ir3, address, offset, NULL);
487 break;
488 }
489
490 case nir_intrinsic_store_output: {
491 // src[] = { value, offset }.
492
493 /* write patch output to bo */
494
495 nir_variable *var = get_var(&b->shader->outputs, nir_intrinsic_base(intr));
496
497 nir_ssa_def **levels = NULL;
498 if (var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER)
499 levels = state->outer_levels;
500 else if (var->data.location == VARYING_SLOT_TESS_LEVEL_INNER)
501 levels = state->inner_levels;
502
503 b->cursor = nir_before_instr(&intr->instr);
504
505 if (levels) {
506 for (int i = 0; i < 4; i++) {
507 if (nir_intrinsic_write_mask(intr) & (1 << i)) {
508 uint32_t component = nir_intrinsic_component(intr);
509 levels[i + component] = nir_channel(b, intr->src[0].ssa, i);
510 }
511 }
512 nir_instr_remove(&intr->instr);
513 } else {
514 nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
515 nir_ssa_def *offset = build_patch_offset(b, state, intr->src[1].ssa, var);
516
517 debug_assert(nir_intrinsic_component(intr) == 0);
518
519 /* nir_lower_io_to_temporaries replaces all access to output
520 * variables with temp variables and then emits a nir_copy_var at
521 * the end of the shader. Thus, we should always get a full wrmask
522 * here.
523 */
524 assert(util_is_power_of_two_nonzero(nir_intrinsic_write_mask(intr) + 1));
525
526 replace_intrinsic(b, intr, nir_intrinsic_store_global_ir3,
527 intr->src[0].ssa, address, offset);
528 }
529 break;
530 }
531
532 default:
533 break;
534 }
535 }
536 }
537
538 static void
539 emit_tess_epilouge(nir_builder *b, struct state *state)
540 {
541 nir_ssa_def *tessfactor_address = nir_load_tess_factor_base_ir3(b);
542 nir_ssa_def *levels[2];
543
544 if (!state->outer_levels[0])
545 return;
546
547 /* Then emit the epilogue that actually writes out the tessellation levels
548 * to the BOs.
549 */
550 switch (state->topology) {
551 case IR3_TESS_TRIANGLES:
552 levels[0] = nir_vec4(b, state->outer_levels[0], state->outer_levels[1],
553 state->outer_levels[2], state->inner_levels[0]);
554 levels[1] = NULL;
555 break;
556 case IR3_TESS_QUADS:
557 levels[0] = nir_vec4(b, state->outer_levels[0], state->outer_levels[1],
558 state->outer_levels[2], state->outer_levels[3]);
559 levels[1] = nir_vec2(b, state->inner_levels[0], state->inner_levels[1]);
560 break;
561 case IR3_TESS_ISOLINES:
562 levels[0] = nir_vec2(b, state->outer_levels[0], state->outer_levels[1]);
563 levels[1] = NULL;
564 break;
565 default:
566 unreachable("nope");
567 }
568
569 nir_ssa_def *offset = build_tessfactor_base(b, VARYING_SLOT_TESS_LEVEL_OUTER, state);
570
571 nir_intrinsic_instr *store =
572 nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_global_ir3);
573
574 store->src[0] = nir_src_for_ssa(levels[0]);
575 store->src[1] = nir_src_for_ssa(tessfactor_address);
576 store->src[2] = nir_src_for_ssa(offset);
577 nir_builder_instr_insert(b, &store->instr);
578 store->num_components = levels[0]->num_components;
579
580 if (levels[1]) {
581 store = nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_global_ir3);
582 offset = nir_iadd(b, offset, nir_imm_int(b, levels[0]->num_components));
583
584 store->src[0] = nir_src_for_ssa(levels[1]);
585 store->src[1] = nir_src_for_ssa(tessfactor_address);
586 store->src[2] = nir_src_for_ssa(offset);
587 nir_builder_instr_insert(b, &store->instr);
588 store->num_components = levels[1]->num_components;
589 }
590
591 /* Finally, Insert endpatch instruction:
592 *
593 * TODO we should re-work this to use normal flow control.
594 */
595
596 nir_intrinsic_instr *end_patch =
597 nir_intrinsic_instr_create(b->shader, nir_intrinsic_end_patch_ir3);
598 nir_builder_instr_insert(b, &end_patch->instr);
599 }
600
601 void
602 ir3_nir_lower_tess_ctrl(nir_shader *shader, struct ir3_shader_variant *v,
603 unsigned topology)
604 {
605 struct state state = { .topology = topology };
606
607 if (shader_debug_enabled(shader->info.stage)) {
608 fprintf(stderr, "NIR (before tess lowering) for %s shader:\n",
609 _mesa_shader_stage_to_string(shader->info.stage));
610 nir_print_shader(shader, stderr);
611 }
612
613 build_primitive_map(shader, &state.map, &shader->outputs);
614 memcpy(v->output_loc, state.map.loc, sizeof(v->output_loc));
615 v->output_size = state.map.stride;
616
617 nir_function_impl *impl = nir_shader_get_entrypoint(shader);
618 assert(impl);
619
620 nir_builder b;
621 nir_builder_init(&b, impl);
622 b.cursor = nir_before_cf_list(&impl->body);
623
624 state.header = nir_load_tcs_header_ir3(&b);
625
626 nir_foreach_block_safe (block, impl)
627 lower_tess_ctrl_block(block, &b, &state);
628
629 /* Now move the body of the TCS into a conditional:
630 *
631 * if (gl_InvocationID < num_vertices)
632 * // body
633 *
634 */
635
636 nir_cf_list body;
637 nir_cf_extract(&body, nir_before_cf_list(&impl->body),
638 nir_after_cf_list(&impl->body));
639
640 b.cursor = nir_after_cf_list(&impl->body);
641
642 /* Re-emit the header, since the old one got moved into the if branch */
643 state.header = nir_load_tcs_header_ir3(&b);
644 nir_ssa_def *iid = build_invocation_id(&b, &state);
645
646 const uint32_t nvertices = shader->info.tess.tcs_vertices_out;
647 nir_ssa_def *cond = nir_ult(&b, iid, nir_imm_int(&b, nvertices));
648
649 nir_if *nif = nir_push_if(&b, cond);
650
651 nir_cf_reinsert(&body, b.cursor);
652
653 b.cursor = nir_after_cf_list(&nif->then_list);
654
655 /* Insert conditional exit for threads invocation id != 0 */
656 nir_ssa_def *iid0_cond = nir_ieq(&b, iid, nir_imm_int(&b, 0));
657 nir_intrinsic_instr *cond_end =
658 nir_intrinsic_instr_create(shader, nir_intrinsic_cond_end_ir3);
659 cond_end->src[0] = nir_src_for_ssa(iid0_cond);
660 nir_builder_instr_insert(&b, &cond_end->instr);
661
662 emit_tess_epilouge(&b, &state);
663
664 nir_pop_if(&b, nif);
665
666 nir_metadata_preserve(impl, 0);
667 }
668
669
670 static void
671 lower_tess_eval_block(nir_block *block, nir_builder *b, struct state *state)
672 {
673 nir_foreach_instr_safe (instr, block) {
674 if (instr->type != nir_instr_type_intrinsic)
675 continue;
676
677 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
678
679 switch (intr->intrinsic) {
680 case nir_intrinsic_load_tess_coord: {
681 b->cursor = nir_after_instr(&intr->instr);
682 nir_ssa_def *x = nir_channel(b, &intr->dest.ssa, 0);
683 nir_ssa_def *y = nir_channel(b, &intr->dest.ssa, 1);
684 nir_ssa_def *z;
685
686 if (state->topology == IR3_TESS_TRIANGLES)
687 z = nir_fsub(b, nir_fsub(b, nir_imm_float(b, 1.0f), y), x);
688 else
689 z = nir_imm_float(b, 0.0f);
690
691 nir_ssa_def *coord = nir_vec3(b, x, y, z);
692
693 nir_ssa_def_rewrite_uses_after(&intr->dest.ssa,
694 nir_src_for_ssa(coord),
695 b->cursor.instr);
696 break;
697 }
698
699 case nir_intrinsic_load_per_vertex_input: {
700 // src[] = { vertex, offset }.
701
702 b->cursor = nir_before_instr(&intr->instr);
703
704 nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
705 nir_variable *var = get_var(&b->shader->inputs, nir_intrinsic_base(intr));
706 nir_ssa_def *offset = build_per_vertex_offset(b, state,
707 intr->src[0].ssa, intr->src[1].ssa, var);
708
709 replace_intrinsic(b, intr, nir_intrinsic_load_global_ir3, address, offset, NULL);
710 break;
711 }
712
713 case nir_intrinsic_load_tess_level_inner:
714 case nir_intrinsic_load_tess_level_outer: {
715 unsigned dest_comp = nir_intrinsic_dest_components(intr);
716 b->cursor = nir_before_instr(&intr->instr);
717
718 gl_varying_slot slot;
719 if (intr->intrinsic == nir_intrinsic_load_tess_level_inner)
720 slot = VARYING_SLOT_TESS_LEVEL_INNER;
721 else
722 slot = VARYING_SLOT_TESS_LEVEL_OUTER;
723
724 nir_ssa_def *address = nir_load_tess_factor_base_ir3(b);
725 nir_ssa_def *offset = build_tessfactor_base(b, slot, state);
726
727 /* Loading across a vec4 (16b) memory boundary is problematic
728 * if we don't use components from the second vec4. The tess
729 * levels aren't guaranteed to be vec4 aligned and we don't
730 * know which levels are actually used, so we load each
731 * component individually.
732 */
733 nir_ssa_def *levels[4];
734 for (unsigned i = 0; i < dest_comp; i++) {
735 nir_intrinsic_instr *new_intr =
736 nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_global_ir3);
737
738 new_intr->src[0] = nir_src_for_ssa(address);
739 new_intr->src[1] = nir_src_for_ssa(nir_iadd(b, offset, nir_imm_int(b, i)));
740 new_intr->num_components = 1;
741 nir_ssa_dest_init(&new_intr->instr, &new_intr->dest, 1, 32, NULL);
742 nir_builder_instr_insert(b, &new_intr->instr);
743 levels[i] = &new_intr->dest.ssa;
744 }
745
746 nir_ssa_def *v = nir_vec(b, levels, dest_comp);
747
748 nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(v));
749
750 nir_instr_remove(&intr->instr);
751 break;
752 }
753
754 case nir_intrinsic_load_input: {
755 // src[] = { offset }.
756
757 nir_variable *var = get_var(&b->shader->inputs, nir_intrinsic_base(intr));
758
759 debug_assert(var->data.patch);
760
761 b->cursor = nir_before_instr(&intr->instr);
762
763 nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
764 nir_ssa_def *offset = build_patch_offset(b, state, intr->src[0].ssa, var);
765
766 replace_intrinsic(b, intr, nir_intrinsic_load_global_ir3, address, offset, NULL);
767 break;
768 }
769
770 default:
771 break;
772 }
773 }
774 }
775
776 void
777 ir3_nir_lower_tess_eval(nir_shader *shader, unsigned topology)
778 {
779 struct state state = { .topology = topology };
780
781 if (shader_debug_enabled(shader->info.stage)) {
782 fprintf(stderr, "NIR (before tess lowering) for %s shader:\n",
783 _mesa_shader_stage_to_string(shader->info.stage));
784 nir_print_shader(shader, stderr);
785 }
786
787 /* Build map of inputs so we have the sizes. */
788 build_primitive_map(shader, &state.map, &shader->inputs);
789
790 nir_function_impl *impl = nir_shader_get_entrypoint(shader);
791 assert(impl);
792
793 nir_builder b;
794 nir_builder_init(&b, impl);
795
796 nir_foreach_block_safe (block, impl)
797 lower_tess_eval_block(block, &b, &state);
798
799 nir_metadata_preserve(impl, 0);
800 }
801
802 static void
803 lower_gs_block(nir_block *block, nir_builder *b, struct state *state)
804 {
805 nir_foreach_instr_safe (instr, block) {
806 if (instr->type != nir_instr_type_intrinsic)
807 continue;
808
809 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
810
811 switch (intr->intrinsic) {
812 case nir_intrinsic_end_primitive: {
813 b->cursor = nir_before_instr(&intr->instr);
814 nir_store_var(b, state->vertex_flags_out, nir_imm_int(b, 4), 0x1);
815 nir_instr_remove(&intr->instr);
816 break;
817 }
818
819 case nir_intrinsic_emit_vertex: {
820 /* Load the vertex count */
821 b->cursor = nir_before_instr(&intr->instr);
822 nir_ssa_def *count = nir_load_var(b, state->vertex_count_var);
823
824 nir_push_if(b, nir_ieq(b, count, local_thread_id(b)));
825
826 foreach_two_lists(dest_node, &state->emit_outputs, src_node, &state->old_outputs) {
827 nir_variable *dest = exec_node_data(nir_variable, dest_node, node);
828 nir_variable *src = exec_node_data(nir_variable, src_node, node);
829 nir_copy_var(b, dest, src);
830 }
831
832 nir_instr_remove(&intr->instr);
833
834 nir_store_var(b, state->emitted_vertex_var,
835 nir_iadd(b, nir_load_var(b, state->emitted_vertex_var), nir_imm_int(b, 1)), 0x1);
836
837 nir_pop_if(b, NULL);
838
839 /* Increment the vertex count by 1 */
840 nir_store_var(b, state->vertex_count_var,
841 nir_iadd(b, count, nir_imm_int(b, 1)), 0x1); /* .x */
842 nir_store_var(b, state->vertex_flags_out, nir_imm_int(b, 0), 0x1);
843
844 break;
845 }
846
847 default:
848 break;
849 }
850 }
851 }
852
853 void
854 ir3_nir_lower_gs(nir_shader *shader)
855 {
856 struct state state = { };
857
858 if (shader_debug_enabled(shader->info.stage)) {
859 fprintf(stderr, "NIR (before gs lowering):\n");
860 nir_print_shader(shader, stderr);
861 }
862
863 build_primitive_map(shader, &state.map, &shader->inputs);
864
865 /* Create an output var for vertex_flags. This will be shadowed below,
866 * same way regular outputs get shadowed, and this variable will become a
867 * temporary.
868 */
869 state.vertex_flags_out = nir_variable_create(shader, nir_var_shader_out,
870 glsl_uint_type(), "vertex_flags");
871 state.vertex_flags_out->data.driver_location = shader->num_outputs++;
872 state.vertex_flags_out->data.location = VARYING_SLOT_GS_VERTEX_FLAGS_IR3;
873 state.vertex_flags_out->data.interpolation = INTERP_MODE_NONE;
874
875 nir_function_impl *impl = nir_shader_get_entrypoint(shader);
876 assert(impl);
877
878 nir_builder b;
879 nir_builder_init(&b, impl);
880 b.cursor = nir_before_cf_list(&impl->body);
881
882 state.header = nir_load_gs_header_ir3(&b);
883
884 /* Generate two set of shadow vars for the output variables. The first
885 * set replaces the real outputs and the second set (emit_outputs) we'll
886 * assign in the emit_vertex conditionals. Then at the end of the shader
887 * we copy the emit_outputs to the real outputs, so that we get
888 * store_output in uniform control flow.
889 */
890 exec_list_move_nodes_to(&shader->outputs, &state.old_outputs);
891 exec_list_make_empty(&state.emit_outputs);
892 nir_foreach_variable(var, &state.old_outputs) {
893 /* Create a new output var by cloning the original output var and
894 * stealing the name.
895 */
896 nir_variable *output = nir_variable_clone(var, shader);
897 exec_list_push_tail(&shader->outputs, &output->node);
898
899 /* Rewrite the original output to be a shadow variable. */
900 var->name = ralloc_asprintf(var, "%s@gs-temp", output->name);
901 var->data.mode = nir_var_shader_temp;
902
903 /* Clone the shadow variable to create the emit shadow variable that
904 * we'll assign in the emit conditionals.
905 */
906 nir_variable *emit_output = nir_variable_clone(var, shader);
907 emit_output->name = ralloc_asprintf(var, "%s@emit-temp", output->name);
908 exec_list_push_tail(&state.emit_outputs, &emit_output->node);
909 }
910
911 /* During the shader we'll keep track of which vertex we're currently
912 * emitting for the EmitVertex test and how many vertices we emitted so we
913 * know to discard if didn't emit any. In most simple shaders, this can
914 * all be statically determined and gets optimized away.
915 */
916 state.vertex_count_var =
917 nir_local_variable_create(impl, glsl_uint_type(), "vertex_count");
918 state.emitted_vertex_var =
919 nir_local_variable_create(impl, glsl_uint_type(), "emitted_vertex");
920
921 /* Initialize to 0. */
922 b.cursor = nir_before_cf_list(&impl->body);
923 nir_store_var(&b, state.vertex_count_var, nir_imm_int(&b, 0), 0x1);
924 nir_store_var(&b, state.emitted_vertex_var, nir_imm_int(&b, 0), 0x1);
925 nir_store_var(&b, state.vertex_flags_out, nir_imm_int(&b, 4), 0x1);
926
927 nir_foreach_block_safe (block, impl)
928 lower_gs_block(block, &b, &state);
929
930 set_foreach(impl->end_block->predecessors, block_entry) {
931 struct nir_block *block = (void *)block_entry->key;
932 b.cursor = nir_after_block_before_jump(block);
933
934 nir_intrinsic_instr *discard_if =
935 nir_intrinsic_instr_create(b.shader, nir_intrinsic_discard_if);
936
937 nir_ssa_def *cond = nir_ieq(&b, nir_load_var(&b, state.emitted_vertex_var), nir_imm_int(&b, 0));
938
939 discard_if->src[0] = nir_src_for_ssa(cond);
940
941 nir_builder_instr_insert(&b, &discard_if->instr);
942
943 foreach_two_lists(dest_node, &shader->outputs, src_node, &state.emit_outputs) {
944 nir_variable *dest = exec_node_data(nir_variable, dest_node, node);
945 nir_variable *src = exec_node_data(nir_variable, src_node, node);
946 nir_copy_var(&b, dest, src);
947 }
948 }
949
950 exec_list_append(&shader->globals, &state.old_outputs);
951 exec_list_append(&shader->globals, &state.emit_outputs);
952
953 nir_metadata_preserve(impl, 0);
954
955 nir_lower_global_vars_to_local(shader);
956 nir_split_var_copies(shader);
957 nir_lower_var_copies(shader);
958
959 nir_fixup_deref_modes(shader);
960
961 if (shader_debug_enabled(shader->info.stage)) {
962 fprintf(stderr, "NIR (after gs lowering):\n");
963 nir_print_shader(shader, stderr);
964 }
965 }
966
967 uint32_t
968 ir3_link_geometry_stages(const struct ir3_shader_variant *producer,
969 const struct ir3_shader_variant *consumer,
970 uint32_t *locs)
971 {
972 uint32_t num_loc = 0, factor;
973
974 switch (consumer->type) {
975 case MESA_SHADER_TESS_CTRL:
976 case MESA_SHADER_GEOMETRY:
977 /* These stages load with ldlw, which expects byte offsets. */
978 factor = 4;
979 break;
980 case MESA_SHADER_TESS_EVAL:
981 /* The tess eval shader uses ldg, which takes dword offsets. */
982 factor = 1;
983 break;
984 default:
985 unreachable("bad shader stage");
986 }
987
988 nir_foreach_variable(in_var, &consumer->shader->nir->inputs) {
989 nir_foreach_variable(out_var, &producer->shader->nir->outputs) {
990 if (in_var->data.location == out_var->data.location) {
991 locs[in_var->data.driver_location] =
992 producer->output_loc[out_var->data.driver_location] * factor;
993
994 debug_assert(num_loc <= in_var->data.driver_location + 1);
995 num_loc = in_var->data.driver_location + 1;
996 }
997 }
998 }
999
1000 return num_loc;
1001 }