730b550230d24c404af17e78d40d543cec0ee30f
[mesa.git] / src / freedreno / ir3 / ir3_nir_lower_tess.c
1 /*
2 * Copyright © 2019 Google, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include "ir3_nir.h"
25 #include "ir3_compiler.h"
26 #include "compiler/nir/nir_builder.h"
27
28 struct state {
29 uint32_t topology;
30
31 struct primitive_map {
32 unsigned loc[32];
33 unsigned size[32];
34 unsigned stride;
35 } map;
36
37 nir_ssa_def *header;
38
39 nir_variable *vertex_count_var;
40 nir_variable *emitted_vertex_var;
41 nir_variable *vertex_flags_out;
42
43 struct exec_list old_outputs;
44 struct exec_list emit_outputs;
45
46 /* tess ctrl shader on a650 gets the local primitive id at different bits: */
47 unsigned local_primitive_id_start;
48 };
49
50 static nir_ssa_def *
51 bitfield_extract(nir_builder *b, nir_ssa_def *v, uint32_t start, uint32_t mask)
52 {
53 return nir_iand(b, nir_ushr(b, v, nir_imm_int(b, start)),
54 nir_imm_int(b, mask));
55 }
56
57 static nir_ssa_def *
58 build_invocation_id(nir_builder *b, struct state *state)
59 {
60 return bitfield_extract(b, state->header, 11, 31);
61 }
62
63 static nir_ssa_def *
64 build_vertex_id(nir_builder *b, struct state *state)
65 {
66 return bitfield_extract(b, state->header, 6, 31);
67 }
68
69 static nir_ssa_def *
70 build_local_primitive_id(nir_builder *b, struct state *state)
71 {
72 return bitfield_extract(b, state->header, state->local_primitive_id_start, 63);
73 }
74
75 static nir_variable *
76 get_var(struct exec_list *list, int driver_location)
77 {
78 nir_foreach_variable (v, list) {
79 if (v->data.driver_location == driver_location) {
80 return v;
81 }
82 }
83
84 return NULL;
85 }
86
87 static bool
88 is_tess_levels(nir_variable *var)
89 {
90 return (var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER ||
91 var->data.location == VARYING_SLOT_TESS_LEVEL_INNER);
92 }
93
94 static nir_ssa_def *
95 build_local_offset(nir_builder *b, struct state *state,
96 nir_ssa_def *vertex, uint32_t base, nir_ssa_def *offset)
97 {
98 nir_ssa_def *primitive_stride = nir_load_vs_primitive_stride_ir3(b);
99 nir_ssa_def *primitive_offset =
100 nir_imul24(b, build_local_primitive_id(b, state), primitive_stride);
101 nir_ssa_def *attr_offset;
102 nir_ssa_def *vertex_stride;
103
104 switch (b->shader->info.stage) {
105 case MESA_SHADER_VERTEX:
106 case MESA_SHADER_TESS_EVAL:
107 vertex_stride = nir_imm_int(b, state->map.stride * 4);
108 attr_offset = nir_imm_int(b, state->map.loc[base] * 4);
109 break;
110 case MESA_SHADER_TESS_CTRL:
111 case MESA_SHADER_GEOMETRY:
112 vertex_stride = nir_load_vs_vertex_stride_ir3(b);
113 attr_offset = nir_load_primitive_location_ir3(b, base);
114 break;
115 default:
116 unreachable("bad shader stage");
117 }
118
119 nir_ssa_def *vertex_offset = nir_imul24(b, vertex, vertex_stride);
120
121 return nir_iadd(b, nir_iadd(b, primitive_offset, vertex_offset),
122 nir_iadd(b, attr_offset, offset));
123 }
124
125 static nir_intrinsic_instr *
126 replace_intrinsic(nir_builder *b, nir_intrinsic_instr *intr,
127 nir_intrinsic_op op, nir_ssa_def *src0, nir_ssa_def *src1, nir_ssa_def *src2)
128 {
129 nir_intrinsic_instr *new_intr =
130 nir_intrinsic_instr_create(b->shader, op);
131
132 new_intr->src[0] = nir_src_for_ssa(src0);
133 if (src1)
134 new_intr->src[1] = nir_src_for_ssa(src1);
135 if (src2)
136 new_intr->src[2] = nir_src_for_ssa(src2);
137
138 new_intr->num_components = intr->num_components;
139
140 if (nir_intrinsic_infos[op].has_dest)
141 nir_ssa_dest_init(&new_intr->instr, &new_intr->dest,
142 intr->num_components, 32, NULL);
143
144 nir_builder_instr_insert(b, &new_intr->instr);
145
146 if (nir_intrinsic_infos[op].has_dest)
147 nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(&new_intr->dest.ssa));
148
149 nir_instr_remove(&intr->instr);
150
151 return new_intr;
152 }
153
154 static void
155 build_primitive_map(nir_shader *shader, struct primitive_map *map, struct exec_list *list)
156 {
157 nir_foreach_variable (var, list) {
158 switch (var->data.location) {
159 case VARYING_SLOT_TESS_LEVEL_OUTER:
160 case VARYING_SLOT_TESS_LEVEL_INNER:
161 continue;
162 }
163
164 unsigned size = glsl_count_attribute_slots(var->type, false) * 4;
165
166 assert(var->data.driver_location < ARRAY_SIZE(map->size));
167 map->size[var->data.driver_location] =
168 MAX2(map->size[var->data.driver_location], size);
169 }
170
171 unsigned loc = 0;
172 for (uint32_t i = 0; i < ARRAY_SIZE(map->size); i++) {
173 if (map->size[i] == 0)
174 continue;
175 nir_variable *var = get_var(list, i);
176 map->loc[i] = loc;
177 loc += map->size[i];
178
179 if (var->data.patch)
180 map->size[i] = 0;
181 else
182 map->size[i] = map->size[i] / glsl_get_length(var->type);
183 }
184
185 map->stride = loc;
186 }
187
188 static void
189 lower_block_to_explicit_output(nir_block *block, nir_builder *b, struct state *state)
190 {
191 nir_foreach_instr_safe (instr, block) {
192 if (instr->type != nir_instr_type_intrinsic)
193 continue;
194
195 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
196
197 switch (intr->intrinsic) {
198 case nir_intrinsic_store_output: {
199 // src[] = { value, offset }.
200
201 /* nir_lower_io_to_temporaries replaces all access to output
202 * variables with temp variables and then emits a nir_copy_var at
203 * the end of the shader. Thus, we should always get a full wrmask
204 * here.
205 */
206 assert(util_is_power_of_two_nonzero(nir_intrinsic_write_mask(intr) + 1));
207
208 b->cursor = nir_instr_remove(&intr->instr);
209
210 nir_ssa_def *vertex_id = build_vertex_id(b, state);
211 nir_ssa_def *offset = build_local_offset(b, state, vertex_id, nir_intrinsic_base(intr),
212 intr->src[1].ssa);
213 nir_intrinsic_instr *store =
214 nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_shared_ir3);
215
216 store->src[0] = nir_src_for_ssa(intr->src[0].ssa);
217 store->src[1] = nir_src_for_ssa(offset);
218 store->num_components = intr->num_components;
219
220 nir_builder_instr_insert(b, &store->instr);
221 break;
222 }
223
224 default:
225 break;
226 }
227 }
228 }
229
230 static nir_ssa_def *
231 local_thread_id(nir_builder *b)
232 {
233 return bitfield_extract(b, nir_load_gs_header_ir3(b), 16, 1023);
234 }
235
236 void
237 ir3_nir_lower_to_explicit_output(nir_shader *shader, struct ir3_shader_variant *v,
238 unsigned topology)
239 {
240 struct state state = { };
241
242 build_primitive_map(shader, &state.map, &shader->outputs);
243 memcpy(v->output_loc, state.map.loc, sizeof(v->output_loc));
244
245 nir_function_impl *impl = nir_shader_get_entrypoint(shader);
246 assert(impl);
247
248 nir_builder b;
249 nir_builder_init(&b, impl);
250 b.cursor = nir_before_cf_list(&impl->body);
251
252 if (v->type == MESA_SHADER_VERTEX && topology != IR3_TESS_NONE)
253 state.header = nir_load_tcs_header_ir3(&b);
254 else
255 state.header = nir_load_gs_header_ir3(&b);
256
257 nir_foreach_block_safe (block, impl)
258 lower_block_to_explicit_output(block, &b, &state);
259
260 nir_metadata_preserve(impl, nir_metadata_block_index |
261 nir_metadata_dominance);
262
263 v->output_size = state.map.stride;
264 }
265
266
267 static void
268 lower_block_to_explicit_input(nir_block *block, nir_builder *b, struct state *state)
269 {
270 nir_foreach_instr_safe (instr, block) {
271 if (instr->type != nir_instr_type_intrinsic)
272 continue;
273
274 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
275
276 switch (intr->intrinsic) {
277 case nir_intrinsic_load_per_vertex_input: {
278 // src[] = { vertex, offset }.
279
280 b->cursor = nir_before_instr(&intr->instr);
281
282 nir_ssa_def *offset = build_local_offset(b, state,
283 intr->src[0].ssa, // this is typically gl_InvocationID
284 nir_intrinsic_base(intr),
285 intr->src[1].ssa);
286
287 replace_intrinsic(b, intr, nir_intrinsic_load_shared_ir3, offset, NULL, NULL);
288 break;
289 }
290
291 case nir_intrinsic_load_invocation_id: {
292 b->cursor = nir_before_instr(&intr->instr);
293
294 nir_ssa_def *iid = build_invocation_id(b, state);
295 nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(iid));
296 nir_instr_remove(&intr->instr);
297 break;
298 }
299
300 default:
301 break;
302 }
303 }
304 }
305
306 void
307 ir3_nir_lower_to_explicit_input(nir_shader *shader, struct ir3_compiler *compiler)
308 {
309 struct state state = { };
310
311 /* when using stl/ldl (instead of stlw/ldlw) for linking VS and HS,
312 * HS uses a different primitive id, which starts at bit 16 in the header
313 */
314 if (shader->info.stage == MESA_SHADER_TESS_CTRL && compiler->tess_use_shared)
315 state.local_primitive_id_start = 16;
316
317 nir_function_impl *impl = nir_shader_get_entrypoint(shader);
318 assert(impl);
319
320 nir_builder b;
321 nir_builder_init(&b, impl);
322 b.cursor = nir_before_cf_list(&impl->body);
323
324 if (shader->info.stage == MESA_SHADER_GEOMETRY)
325 state.header = nir_load_gs_header_ir3(&b);
326 else
327 state.header = nir_load_tcs_header_ir3(&b);
328
329 nir_foreach_block_safe (block, impl)
330 lower_block_to_explicit_input(block, &b, &state);
331 }
332
333
334 static nir_ssa_def *
335 build_per_vertex_offset(nir_builder *b, struct state *state,
336 nir_ssa_def *vertex, nir_ssa_def *offset, nir_variable *var)
337 {
338 nir_ssa_def *primitive_id = nir_load_primitive_id(b);
339 nir_ssa_def *patch_stride = nir_load_hs_patch_stride_ir3(b);
340 nir_ssa_def *patch_offset = nir_imul24(b, primitive_id, patch_stride);
341 nir_ssa_def *attr_offset;
342 int loc = var->data.driver_location;
343
344 switch (b->shader->info.stage) {
345 case MESA_SHADER_TESS_CTRL:
346 attr_offset = nir_imm_int(b, state->map.loc[loc]);
347 break;
348 case MESA_SHADER_TESS_EVAL:
349 attr_offset = nir_load_primitive_location_ir3(b, loc);
350 break;
351 default:
352 unreachable("bad shader state");
353 }
354
355 nir_ssa_def *attr_stride = nir_imm_int(b, state->map.size[loc]);
356 nir_ssa_def *vertex_offset = nir_imul24(b, vertex, attr_stride);
357
358 return nir_iadd(b, nir_iadd(b, patch_offset, attr_offset),
359 nir_iadd(b, vertex_offset, nir_ishl(b, offset, nir_imm_int(b, 2))));
360 }
361
362 static nir_ssa_def *
363 build_patch_offset(nir_builder *b, struct state *state, nir_ssa_def *offset, nir_variable *var)
364 {
365 debug_assert(var && var->data.patch);
366
367 return build_per_vertex_offset(b, state, nir_imm_int(b, 0), offset, var);
368 }
369
370 static void
371 tess_level_components(struct state *state, uint32_t *inner, uint32_t *outer)
372 {
373 switch (state->topology) {
374 case IR3_TESS_TRIANGLES:
375 *inner = 1;
376 *outer = 3;
377 break;
378 case IR3_TESS_QUADS:
379 *inner = 2;
380 *outer = 4;
381 break;
382 case IR3_TESS_ISOLINES:
383 *inner = 0;
384 *outer = 2;
385 break;
386 default:
387 unreachable("bad");
388 }
389 }
390
391 static nir_ssa_def *
392 build_tessfactor_base(nir_builder *b, gl_varying_slot slot, struct state *state)
393 {
394 uint32_t inner_levels, outer_levels;
395 tess_level_components(state, &inner_levels, &outer_levels);
396
397 const uint32_t patch_stride = 1 + inner_levels + outer_levels;
398
399 nir_ssa_def *primitive_id = nir_load_primitive_id(b);
400
401 nir_ssa_def *patch_offset = nir_imul24(b, primitive_id, nir_imm_int(b, patch_stride));
402
403 uint32_t offset;
404 switch (slot) {
405 case VARYING_SLOT_TESS_LEVEL_OUTER:
406 /* There's some kind of header dword, tess levels start at index 1. */
407 offset = 1;
408 break;
409 case VARYING_SLOT_TESS_LEVEL_INNER:
410 offset = 1 + outer_levels;
411 break;
412 default:
413 unreachable("bad");
414 }
415
416 return nir_iadd(b, patch_offset, nir_imm_int(b, offset));
417 }
418
419 static void
420 lower_tess_ctrl_block(nir_block *block, nir_builder *b, struct state *state)
421 {
422 nir_foreach_instr_safe (instr, block) {
423 if (instr->type != nir_instr_type_intrinsic)
424 continue;
425
426 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
427
428 switch (intr->intrinsic) {
429 case nir_intrinsic_control_barrier:
430 case nir_intrinsic_memory_barrier_tcs_patch:
431 /* Hull shaders dispatch 32 wide so an entire patch will always
432 * fit in a single warp and execute in lock-step. Consequently,
433 * we don't need to do anything for TCS barriers so just remove
434 * the intrinsic. Otherwise we'll emit an actual barrier
435 * instructions, which will deadlock.
436 */
437 nir_instr_remove(&intr->instr);
438 break;
439
440 case nir_intrinsic_load_per_vertex_output: {
441 // src[] = { vertex, offset }.
442
443 b->cursor = nir_before_instr(&intr->instr);
444
445 nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
446 nir_variable *var = get_var(&b->shader->outputs, nir_intrinsic_base(intr));
447 nir_ssa_def *offset = build_per_vertex_offset(b, state,
448 intr->src[0].ssa, intr->src[1].ssa, var);
449
450 replace_intrinsic(b, intr, nir_intrinsic_load_global_ir3, address, offset, NULL);
451 break;
452 }
453
454 case nir_intrinsic_store_per_vertex_output: {
455 // src[] = { value, vertex, offset }.
456
457 b->cursor = nir_before_instr(&intr->instr);
458
459 /* sparse writemask not supported */
460 assert(util_is_power_of_two_nonzero(nir_intrinsic_write_mask(intr) + 1));
461
462 nir_ssa_def *value = intr->src[0].ssa;
463 nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
464 nir_variable *var = get_var(&b->shader->outputs, nir_intrinsic_base(intr));
465 nir_ssa_def *offset = build_per_vertex_offset(b, state,
466 intr->src[1].ssa, intr->src[2].ssa, var);
467
468 replace_intrinsic(b, intr, nir_intrinsic_store_global_ir3, value, address,
469 nir_iadd(b, offset, nir_imm_int(b, nir_intrinsic_component(intr))));
470
471 break;
472 }
473
474 case nir_intrinsic_load_output: {
475 // src[] = { offset }.
476
477 nir_variable *var = get_var(&b->shader->outputs, nir_intrinsic_base(intr));
478
479 b->cursor = nir_before_instr(&intr->instr);
480
481 nir_ssa_def *address, *offset;
482
483 /* note if vectorization of the tess level loads ever happens:
484 * "ldg" across 16-byte boundaries can behave incorrectly if results
485 * are never used. most likely some issue with (sy) not properly
486 * syncing with values coming from a second memory transaction.
487 */
488 if (is_tess_levels(var)) {
489 assert(intr->dest.ssa.num_components == 1);
490 address = nir_load_tess_factor_base_ir3(b);
491 offset = build_tessfactor_base(b, var->data.location, state);
492 } else {
493 address = nir_load_tess_param_base_ir3(b);
494 offset = build_patch_offset(b, state, intr->src[0].ssa, var);
495 }
496
497 replace_intrinsic(b, intr, nir_intrinsic_load_global_ir3, address, offset, NULL);
498 break;
499 }
500
501 case nir_intrinsic_store_output: {
502 // src[] = { value, offset }.
503
504 /* write patch output to bo */
505
506 nir_variable *var = get_var(&b->shader->outputs, nir_intrinsic_base(intr));
507
508 b->cursor = nir_before_instr(&intr->instr);
509
510 /* sparse writemask not supported */
511 assert(util_is_power_of_two_nonzero(nir_intrinsic_write_mask(intr) + 1));
512
513 if (is_tess_levels(var)) {
514 /* with tess levels are defined as float[4] and float[2],
515 * but tess factor BO has smaller sizes for tris/isolines,
516 * so we have to discard any writes beyond the number of
517 * components for inner/outer levels */
518 uint32_t inner_levels, outer_levels, levels;
519 tess_level_components(state, &inner_levels, &outer_levels);
520
521 if (var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER)
522 levels = outer_levels;
523 else
524 levels = inner_levels;
525
526 assert(intr->src[0].ssa->num_components == 1);
527
528 nir_ssa_def *offset =
529 nir_iadd_imm(b, intr->src[1].ssa, nir_intrinsic_component(intr));
530
531 nir_if *nif = nir_push_if(b, nir_ult(b, offset, nir_imm_int(b, levels)));
532
533 replace_intrinsic(b, intr, nir_intrinsic_store_global_ir3,
534 intr->src[0].ssa,
535 nir_load_tess_factor_base_ir3(b),
536 nir_iadd(b, offset, build_tessfactor_base(b, var->data.location, state)));
537
538 nir_pop_if(b, nif);
539 } else {
540 nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
541 nir_ssa_def *offset = build_patch_offset(b, state, intr->src[1].ssa, var);
542
543 debug_assert(nir_intrinsic_component(intr) == 0);
544
545 replace_intrinsic(b, intr, nir_intrinsic_store_global_ir3,
546 intr->src[0].ssa, address, offset);
547 }
548 break;
549 }
550
551 default:
552 break;
553 }
554 }
555 }
556
557 static void
558 emit_tess_epilouge(nir_builder *b, struct state *state)
559 {
560 /* Insert endpatch instruction:
561 *
562 * TODO we should re-work this to use normal flow control.
563 */
564
565 nir_intrinsic_instr *end_patch =
566 nir_intrinsic_instr_create(b->shader, nir_intrinsic_end_patch_ir3);
567 nir_builder_instr_insert(b, &end_patch->instr);
568 }
569
570 void
571 ir3_nir_lower_tess_ctrl(nir_shader *shader, struct ir3_shader_variant *v,
572 unsigned topology)
573 {
574 struct state state = { .topology = topology };
575
576 if (shader_debug_enabled(shader->info.stage)) {
577 fprintf(stderr, "NIR (before tess lowering) for %s shader:\n",
578 _mesa_shader_stage_to_string(shader->info.stage));
579 nir_print_shader(shader, stderr);
580 }
581
582 build_primitive_map(shader, &state.map, &shader->outputs);
583 memcpy(v->output_loc, state.map.loc, sizeof(v->output_loc));
584 v->output_size = state.map.stride;
585
586 nir_function_impl *impl = nir_shader_get_entrypoint(shader);
587 assert(impl);
588
589 nir_builder b;
590 nir_builder_init(&b, impl);
591 b.cursor = nir_before_cf_list(&impl->body);
592
593 state.header = nir_load_tcs_header_ir3(&b);
594
595 nir_foreach_block_safe (block, impl)
596 lower_tess_ctrl_block(block, &b, &state);
597
598 /* Now move the body of the TCS into a conditional:
599 *
600 * if (gl_InvocationID < num_vertices)
601 * // body
602 *
603 */
604
605 nir_cf_list body;
606 nir_cf_extract(&body, nir_before_cf_list(&impl->body),
607 nir_after_cf_list(&impl->body));
608
609 b.cursor = nir_after_cf_list(&impl->body);
610
611 /* Re-emit the header, since the old one got moved into the if branch */
612 state.header = nir_load_tcs_header_ir3(&b);
613 nir_ssa_def *iid = build_invocation_id(&b, &state);
614
615 const uint32_t nvertices = shader->info.tess.tcs_vertices_out;
616 nir_ssa_def *cond = nir_ult(&b, iid, nir_imm_int(&b, nvertices));
617
618 nir_if *nif = nir_push_if(&b, cond);
619
620 nir_cf_reinsert(&body, b.cursor);
621
622 b.cursor = nir_after_cf_list(&nif->then_list);
623
624 /* Insert conditional exit for threads invocation id != 0 */
625 nir_ssa_def *iid0_cond = nir_ieq(&b, iid, nir_imm_int(&b, 0));
626 nir_intrinsic_instr *cond_end =
627 nir_intrinsic_instr_create(shader, nir_intrinsic_cond_end_ir3);
628 cond_end->src[0] = nir_src_for_ssa(iid0_cond);
629 nir_builder_instr_insert(&b, &cond_end->instr);
630
631 emit_tess_epilouge(&b, &state);
632
633 nir_pop_if(&b, nif);
634
635 nir_metadata_preserve(impl, 0);
636 }
637
638
639 static void
640 lower_tess_eval_block(nir_block *block, nir_builder *b, struct state *state)
641 {
642 nir_foreach_instr_safe (instr, block) {
643 if (instr->type != nir_instr_type_intrinsic)
644 continue;
645
646 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
647
648 switch (intr->intrinsic) {
649 case nir_intrinsic_load_tess_coord: {
650 b->cursor = nir_after_instr(&intr->instr);
651 nir_ssa_def *x = nir_channel(b, &intr->dest.ssa, 0);
652 nir_ssa_def *y = nir_channel(b, &intr->dest.ssa, 1);
653 nir_ssa_def *z;
654
655 if (state->topology == IR3_TESS_TRIANGLES)
656 z = nir_fsub(b, nir_fsub(b, nir_imm_float(b, 1.0f), y), x);
657 else
658 z = nir_imm_float(b, 0.0f);
659
660 nir_ssa_def *coord = nir_vec3(b, x, y, z);
661
662 nir_ssa_def_rewrite_uses_after(&intr->dest.ssa,
663 nir_src_for_ssa(coord),
664 b->cursor.instr);
665 break;
666 }
667
668 case nir_intrinsic_load_per_vertex_input: {
669 // src[] = { vertex, offset }.
670
671 b->cursor = nir_before_instr(&intr->instr);
672
673 nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
674 nir_variable *var = get_var(&b->shader->inputs, nir_intrinsic_base(intr));
675 nir_ssa_def *offset = build_per_vertex_offset(b, state,
676 intr->src[0].ssa, intr->src[1].ssa, var);
677
678 replace_intrinsic(b, intr, nir_intrinsic_load_global_ir3, address, offset, NULL);
679 break;
680 }
681
682 case nir_intrinsic_load_input: {
683 // src[] = { offset }.
684
685 nir_variable *var = get_var(&b->shader->inputs, nir_intrinsic_base(intr));
686
687 debug_assert(var->data.patch);
688
689 b->cursor = nir_before_instr(&intr->instr);
690
691 nir_ssa_def *address, *offset;
692
693 /* note if vectorization of the tess level loads ever happens:
694 * "ldg" across 16-byte boundaries can behave incorrectly if results
695 * are never used. most likely some issue with (sy) not properly
696 * syncing with values coming from a second memory transaction.
697 */
698 if (is_tess_levels(var)) {
699 assert(intr->dest.ssa.num_components == 1);
700 address = nir_load_tess_factor_base_ir3(b);
701 offset = build_tessfactor_base(b, var->data.location, state);
702 } else {
703 address = nir_load_tess_param_base_ir3(b);
704 offset = build_patch_offset(b, state, intr->src[0].ssa, var);
705 }
706
707 offset = nir_iadd(b, offset, nir_imm_int(b, nir_intrinsic_component(intr)));
708
709 replace_intrinsic(b, intr, nir_intrinsic_load_global_ir3, address, offset, NULL);
710 break;
711 }
712
713 default:
714 break;
715 }
716 }
717 }
718
719 void
720 ir3_nir_lower_tess_eval(nir_shader *shader, unsigned topology)
721 {
722 struct state state = { .topology = topology };
723
724 if (shader_debug_enabled(shader->info.stage)) {
725 fprintf(stderr, "NIR (before tess lowering) for %s shader:\n",
726 _mesa_shader_stage_to_string(shader->info.stage));
727 nir_print_shader(shader, stderr);
728 }
729
730 /* Build map of inputs so we have the sizes. */
731 build_primitive_map(shader, &state.map, &shader->inputs);
732
733 nir_function_impl *impl = nir_shader_get_entrypoint(shader);
734 assert(impl);
735
736 nir_builder b;
737 nir_builder_init(&b, impl);
738
739 nir_foreach_block_safe (block, impl)
740 lower_tess_eval_block(block, &b, &state);
741
742 nir_metadata_preserve(impl, 0);
743 }
744
745 static void
746 lower_gs_block(nir_block *block, nir_builder *b, struct state *state)
747 {
748 nir_foreach_instr_safe (instr, block) {
749 if (instr->type != nir_instr_type_intrinsic)
750 continue;
751
752 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
753
754 switch (intr->intrinsic) {
755 case nir_intrinsic_end_primitive: {
756 b->cursor = nir_before_instr(&intr->instr);
757 nir_store_var(b, state->vertex_flags_out, nir_imm_int(b, 4), 0x1);
758 nir_instr_remove(&intr->instr);
759 break;
760 }
761
762 case nir_intrinsic_emit_vertex: {
763 /* Load the vertex count */
764 b->cursor = nir_before_instr(&intr->instr);
765 nir_ssa_def *count = nir_load_var(b, state->vertex_count_var);
766
767 nir_push_if(b, nir_ieq(b, count, local_thread_id(b)));
768
769 foreach_two_lists(dest_node, &state->emit_outputs, src_node, &state->old_outputs) {
770 nir_variable *dest = exec_node_data(nir_variable, dest_node, node);
771 nir_variable *src = exec_node_data(nir_variable, src_node, node);
772 nir_copy_var(b, dest, src);
773 }
774
775 nir_instr_remove(&intr->instr);
776
777 nir_store_var(b, state->emitted_vertex_var,
778 nir_iadd(b, nir_load_var(b, state->emitted_vertex_var), nir_imm_int(b, 1)), 0x1);
779
780 nir_pop_if(b, NULL);
781
782 /* Increment the vertex count by 1 */
783 nir_store_var(b, state->vertex_count_var,
784 nir_iadd(b, count, nir_imm_int(b, 1)), 0x1); /* .x */
785 nir_store_var(b, state->vertex_flags_out, nir_imm_int(b, 0), 0x1);
786
787 break;
788 }
789
790 default:
791 break;
792 }
793 }
794 }
795
796 void
797 ir3_nir_lower_gs(nir_shader *shader)
798 {
799 struct state state = { };
800
801 if (shader_debug_enabled(shader->info.stage)) {
802 fprintf(stderr, "NIR (before gs lowering):\n");
803 nir_print_shader(shader, stderr);
804 }
805
806 build_primitive_map(shader, &state.map, &shader->inputs);
807
808 /* Create an output var for vertex_flags. This will be shadowed below,
809 * same way regular outputs get shadowed, and this variable will become a
810 * temporary.
811 */
812 state.vertex_flags_out = nir_variable_create(shader, nir_var_shader_out,
813 glsl_uint_type(), "vertex_flags");
814 state.vertex_flags_out->data.driver_location = shader->num_outputs++;
815 state.vertex_flags_out->data.location = VARYING_SLOT_GS_VERTEX_FLAGS_IR3;
816 state.vertex_flags_out->data.interpolation = INTERP_MODE_NONE;
817
818 nir_function_impl *impl = nir_shader_get_entrypoint(shader);
819 assert(impl);
820
821 nir_builder b;
822 nir_builder_init(&b, impl);
823 b.cursor = nir_before_cf_list(&impl->body);
824
825 state.header = nir_load_gs_header_ir3(&b);
826
827 /* Generate two set of shadow vars for the output variables. The first
828 * set replaces the real outputs and the second set (emit_outputs) we'll
829 * assign in the emit_vertex conditionals. Then at the end of the shader
830 * we copy the emit_outputs to the real outputs, so that we get
831 * store_output in uniform control flow.
832 */
833 exec_list_move_nodes_to(&shader->outputs, &state.old_outputs);
834 exec_list_make_empty(&state.emit_outputs);
835 nir_foreach_variable(var, &state.old_outputs) {
836 /* Create a new output var by cloning the original output var and
837 * stealing the name.
838 */
839 nir_variable *output = nir_variable_clone(var, shader);
840 exec_list_push_tail(&shader->outputs, &output->node);
841
842 /* Rewrite the original output to be a shadow variable. */
843 var->name = ralloc_asprintf(var, "%s@gs-temp", output->name);
844 var->data.mode = nir_var_shader_temp;
845
846 /* Clone the shadow variable to create the emit shadow variable that
847 * we'll assign in the emit conditionals.
848 */
849 nir_variable *emit_output = nir_variable_clone(var, shader);
850 emit_output->name = ralloc_asprintf(var, "%s@emit-temp", output->name);
851 exec_list_push_tail(&state.emit_outputs, &emit_output->node);
852 }
853
854 /* During the shader we'll keep track of which vertex we're currently
855 * emitting for the EmitVertex test and how many vertices we emitted so we
856 * know to discard if didn't emit any. In most simple shaders, this can
857 * all be statically determined and gets optimized away.
858 */
859 state.vertex_count_var =
860 nir_local_variable_create(impl, glsl_uint_type(), "vertex_count");
861 state.emitted_vertex_var =
862 nir_local_variable_create(impl, glsl_uint_type(), "emitted_vertex");
863
864 /* Initialize to 0. */
865 b.cursor = nir_before_cf_list(&impl->body);
866 nir_store_var(&b, state.vertex_count_var, nir_imm_int(&b, 0), 0x1);
867 nir_store_var(&b, state.emitted_vertex_var, nir_imm_int(&b, 0), 0x1);
868 nir_store_var(&b, state.vertex_flags_out, nir_imm_int(&b, 4), 0x1);
869
870 nir_foreach_block_safe (block, impl)
871 lower_gs_block(block, &b, &state);
872
873 set_foreach(impl->end_block->predecessors, block_entry) {
874 struct nir_block *block = (void *)block_entry->key;
875 b.cursor = nir_after_block_before_jump(block);
876
877 nir_intrinsic_instr *discard_if =
878 nir_intrinsic_instr_create(b.shader, nir_intrinsic_discard_if);
879
880 nir_ssa_def *cond = nir_ieq(&b, nir_load_var(&b, state.emitted_vertex_var), nir_imm_int(&b, 0));
881
882 discard_if->src[0] = nir_src_for_ssa(cond);
883
884 nir_builder_instr_insert(&b, &discard_if->instr);
885
886 foreach_two_lists(dest_node, &shader->outputs, src_node, &state.emit_outputs) {
887 nir_variable *dest = exec_node_data(nir_variable, dest_node, node);
888 nir_variable *src = exec_node_data(nir_variable, src_node, node);
889 nir_copy_var(&b, dest, src);
890 }
891 }
892
893 exec_list_append(&shader->globals, &state.old_outputs);
894 exec_list_append(&shader->globals, &state.emit_outputs);
895
896 nir_metadata_preserve(impl, 0);
897
898 nir_lower_global_vars_to_local(shader);
899 nir_split_var_copies(shader);
900 nir_lower_var_copies(shader);
901
902 nir_fixup_deref_modes(shader);
903
904 if (shader_debug_enabled(shader->info.stage)) {
905 fprintf(stderr, "NIR (after gs lowering):\n");
906 nir_print_shader(shader, stderr);
907 }
908 }
909
910 uint32_t
911 ir3_link_geometry_stages(const struct ir3_shader_variant *producer,
912 const struct ir3_shader_variant *consumer,
913 uint32_t *locs)
914 {
915 uint32_t num_loc = 0, factor;
916
917 switch (consumer->type) {
918 case MESA_SHADER_TESS_CTRL:
919 case MESA_SHADER_GEOMETRY:
920 /* These stages load with ldlw, which expects byte offsets. */
921 factor = 4;
922 break;
923 case MESA_SHADER_TESS_EVAL:
924 /* The tess eval shader uses ldg, which takes dword offsets. */
925 factor = 1;
926 break;
927 default:
928 unreachable("bad shader stage");
929 }
930
931 nir_foreach_variable(in_var, &consumer->shader->nir->inputs) {
932 nir_foreach_variable(out_var, &producer->shader->nir->outputs) {
933 if (in_var->data.location == out_var->data.location) {
934 locs[in_var->data.driver_location] =
935 producer->output_loc[out_var->data.driver_location] * factor;
936
937 debug_assert(num_loc <= in_var->data.driver_location + 1);
938 num_loc = in_var->data.driver_location + 1;
939 }
940 }
941 }
942
943 return num_loc;
944 }