b29903537b8823177fb2a8f5e3c984edac46150d
[mesa.git] / src / freedreno / ir3 / ir3_nir_lower_tess.c
1 /*
2 * Copyright © 2019 Google, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include "ir3_nir.h"
25 #include "ir3_compiler.h"
26 #include "compiler/nir/nir_builder.h"
27
28 struct state {
29 uint32_t topology;
30
31 struct primitive_map {
32 unsigned loc[32];
33 unsigned size[32];
34 unsigned stride;
35 } map;
36
37 nir_ssa_def *header;
38
39 nir_variable *vertex_count_var;
40 nir_variable *emitted_vertex_var;
41 nir_variable *vertex_flags_var;
42 nir_variable *vertex_flags_out;
43
44 nir_variable *output_vars[32];
45
46 nir_ssa_def *outer_levels[4];
47 nir_ssa_def *inner_levels[2];
48 };
49
50 static nir_ssa_def *
51 bitfield_extract(nir_builder *b, nir_ssa_def *v, uint32_t start, uint32_t mask)
52 {
53 return nir_iand(b, nir_ushr(b, v, nir_imm_int(b, start)),
54 nir_imm_int(b, mask));
55 }
56
57 static nir_ssa_def *
58 build_invocation_id(nir_builder *b, struct state *state)
59 {
60 return bitfield_extract(b, state->header, 11, 31);
61 }
62
63 static nir_ssa_def *
64 build_vertex_id(nir_builder *b, struct state *state)
65 {
66 return bitfield_extract(b, state->header, 6, 31);
67 }
68
69 static nir_ssa_def *
70 build_local_primitive_id(nir_builder *b, struct state *state)
71 {
72 return bitfield_extract(b, state->header, 0, 63);
73 }
74
75 static nir_variable *
76 get_var(struct exec_list *list, int driver_location)
77 {
78 nir_foreach_variable(v, list) {
79 if (v->data.driver_location == driver_location) {
80 return v;
81 }
82 }
83
84 return NULL;
85 }
86
87 static nir_ssa_def *
88 build_local_offset(nir_builder *b, struct state *state,
89 nir_ssa_def *vertex, uint32_t base, nir_ssa_def *offset)
90 {
91 nir_ssa_def *primitive_stride = nir_load_vs_primitive_stride_ir3(b);
92 nir_ssa_def *primitive_offset =
93 nir_imul24(b, build_local_primitive_id(b, state), primitive_stride);
94 nir_ssa_def *attr_offset;
95 nir_ssa_def *vertex_stride;
96
97 switch (b->shader->info.stage) {
98 case MESA_SHADER_VERTEX:
99 case MESA_SHADER_TESS_EVAL:
100 vertex_stride = nir_imm_int(b, state->map.stride * 4);
101 attr_offset = nir_imm_int(b, state->map.loc[base] * 4);
102 break;
103 case MESA_SHADER_TESS_CTRL:
104 case MESA_SHADER_GEOMETRY:
105 vertex_stride = nir_load_vs_vertex_stride_ir3(b);
106 attr_offset = nir_load_primitive_location_ir3(b, base);
107 break;
108 default:
109 unreachable("bad shader stage");
110 }
111
112 nir_ssa_def *vertex_offset = nir_imul24(b, vertex, vertex_stride);
113
114 return nir_iadd(b, nir_iadd(b, primitive_offset, vertex_offset),
115 nir_iadd(b, attr_offset, offset));
116 }
117
118 static nir_intrinsic_instr *
119 replace_intrinsic(nir_builder *b, nir_intrinsic_instr *intr,
120 nir_intrinsic_op op, nir_ssa_def *src0, nir_ssa_def *src1, nir_ssa_def *src2)
121 {
122 nir_intrinsic_instr *new_intr =
123 nir_intrinsic_instr_create(b->shader, op);
124
125 new_intr->src[0] = nir_src_for_ssa(src0);
126 if (src1)
127 new_intr->src[1] = nir_src_for_ssa(src1);
128 if (src2)
129 new_intr->src[2] = nir_src_for_ssa(src2);
130
131 new_intr->num_components = intr->num_components;
132
133 if (nir_intrinsic_infos[op].has_dest)
134 nir_ssa_dest_init(&new_intr->instr, &new_intr->dest,
135 intr->num_components, 32, NULL);
136
137 nir_builder_instr_insert(b, &new_intr->instr);
138
139 if (nir_intrinsic_infos[op].has_dest)
140 nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(&new_intr->dest.ssa));
141
142 nir_instr_remove(&intr->instr);
143
144 return new_intr;
145 }
146
147 static void
148 build_primitive_map(nir_shader *shader, struct primitive_map *map, struct exec_list *list)
149 {
150 nir_foreach_variable(var, list) {
151 switch (var->data.location) {
152 case VARYING_SLOT_TESS_LEVEL_OUTER:
153 case VARYING_SLOT_TESS_LEVEL_INNER:
154 continue;
155 }
156
157 unsigned size = glsl_count_attribute_slots(var->type, false) * 4;
158
159 assert(var->data.driver_location < ARRAY_SIZE(map->size));
160 map->size[var->data.driver_location] =
161 MAX2(map->size[var->data.driver_location], size);
162 }
163
164 unsigned loc = 0;
165 for (uint32_t i = 0; i < ARRAY_SIZE(map->size); i++) {
166 if (map->size[i] == 0)
167 continue;
168 nir_variable *var = get_var(list, i);
169 map->loc[i] = loc;
170 loc += map->size[i];
171
172 if (var->data.patch)
173 map->size[i] = 0;
174 else
175 map->size[i] = map->size[i] / glsl_get_length(var->type);
176 }
177
178 map->stride = loc;
179 }
180
181 static void
182 lower_vs_block(nir_block *block, nir_builder *b, struct state *state)
183 {
184 nir_foreach_instr_safe(instr, block) {
185 if (instr->type != nir_instr_type_intrinsic)
186 continue;
187
188 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
189
190 switch (intr->intrinsic) {
191 case nir_intrinsic_store_output: {
192 // src[] = { value, offset }.
193
194 b->cursor = nir_instr_remove(&intr->instr);
195
196 nir_ssa_def *vertex_id = build_vertex_id(b, state);
197 nir_ssa_def *offset = build_local_offset(b, state, vertex_id, nir_intrinsic_base(intr),
198 intr->src[1].ssa);
199 nir_intrinsic_instr *store =
200 nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_shared_ir3);
201
202 nir_intrinsic_set_write_mask(store, MASK(intr->num_components));
203 store->src[0] = nir_src_for_ssa(intr->src[0].ssa);
204 store->src[1] = nir_src_for_ssa(offset);
205
206 store->num_components = intr->num_components;
207
208 nir_builder_instr_insert(b, &store->instr);
209 break;
210 }
211
212 default:
213 break;
214 }
215 }
216 }
217
218 static nir_ssa_def *
219 local_thread_id(nir_builder *b)
220 {
221 return bitfield_extract(b, nir_load_gs_header_ir3(b), 16, 1023);
222 }
223
224 void
225 ir3_nir_lower_to_explicit_io(nir_shader *shader, struct ir3_shader *s, unsigned topology)
226 {
227 struct state state = { };
228
229 build_primitive_map(shader, &state.map, &shader->outputs);
230 memcpy(s->output_loc, state.map.loc, sizeof(s->output_loc));
231
232 nir_function_impl *impl = nir_shader_get_entrypoint(shader);
233 assert(impl);
234
235 nir_builder b;
236 nir_builder_init(&b, impl);
237 b.cursor = nir_before_cf_list(&impl->body);
238
239 if (s->type == MESA_SHADER_VERTEX && topology != IR3_TESS_NONE)
240 state.header = nir_load_tcs_header_ir3(&b);
241 else
242 state.header = nir_load_gs_header_ir3(&b);
243
244 nir_foreach_block_safe(block, impl)
245 lower_vs_block(block, &b, &state);
246
247 nir_metadata_preserve(impl, nir_metadata_block_index |
248 nir_metadata_dominance);
249
250 s->output_size = state.map.stride;
251 }
252
253 static nir_ssa_def *
254 build_per_vertex_offset(nir_builder *b, struct state *state,
255 nir_ssa_def *vertex, nir_ssa_def *offset, nir_variable *var)
256 {
257 nir_ssa_def *primitive_id = nir_load_primitive_id(b);
258 nir_ssa_def *patch_stride = nir_load_hs_patch_stride_ir3(b);
259 nir_ssa_def *patch_offset = nir_imul24(b, primitive_id, patch_stride);
260 nir_ssa_def *attr_offset;
261 int loc = var->data.driver_location;
262
263 switch (b->shader->info.stage) {
264 case MESA_SHADER_TESS_CTRL:
265 attr_offset = nir_imm_int(b, state->map.loc[loc]);
266 break;
267 case MESA_SHADER_TESS_EVAL:
268 attr_offset = nir_load_primitive_location_ir3(b, loc);
269 break;
270 default:
271 unreachable("bad shader state");
272 }
273
274 nir_ssa_def *attr_stride = nir_imm_int(b, state->map.size[loc]);
275 nir_ssa_def *vertex_offset = nir_imul24(b, vertex, attr_stride);
276
277 return nir_iadd(b, nir_iadd(b, patch_offset, attr_offset),
278 nir_iadd(b, vertex_offset, nir_ishl(b, offset, nir_imm_int(b, 2))));
279 }
280
281 static nir_ssa_def *
282 build_patch_offset(nir_builder *b, struct state *state, nir_ssa_def *offset, nir_variable *var)
283 {
284 debug_assert(var && var->data.patch);
285
286 return build_per_vertex_offset(b, state, nir_imm_int(b, 0), offset, var);
287 }
288
289 static nir_ssa_def *
290 build_tessfactor_base(nir_builder *b, gl_varying_slot slot, struct state *state)
291 {
292 uint32_t inner_levels, outer_levels;
293 switch (state->topology) {
294 case IR3_TESS_TRIANGLES:
295 inner_levels = 1;
296 outer_levels = 3;
297 break;
298 case IR3_TESS_QUADS:
299 inner_levels = 2;
300 outer_levels = 4;
301 break;
302 case IR3_TESS_ISOLINES:
303 inner_levels = 0;
304 outer_levels = 2;
305 break;
306 default:
307 unreachable("bad");
308 }
309
310 const uint32_t patch_stride = 1 + inner_levels + outer_levels;
311
312 nir_ssa_def *primitive_id = nir_load_primitive_id(b);
313
314 nir_ssa_def *patch_offset = nir_imul24(b, primitive_id, nir_imm_int(b, patch_stride));
315
316 uint32_t offset;
317 switch (slot) {
318 case VARYING_SLOT_TESS_LEVEL_OUTER:
319 /* There's some kind of header dword, tess levels start at index 1. */
320 offset = 1;
321 break;
322 case VARYING_SLOT_TESS_LEVEL_INNER:
323 offset = 1 + outer_levels;
324 break;
325 default:
326 unreachable("bad");
327 }
328
329 return nir_iadd(b, patch_offset, nir_imm_int(b, offset));
330 }
331
332 static void
333 lower_tess_ctrl_block(nir_block *block, nir_builder *b, struct state *state)
334 {
335 nir_foreach_instr_safe(instr, block) {
336 if (instr->type != nir_instr_type_intrinsic)
337 continue;
338
339 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
340
341 switch (intr->intrinsic) {
342 case nir_intrinsic_load_invocation_id:
343 b->cursor = nir_before_instr(&intr->instr);
344
345 nir_ssa_def *invocation_id = build_invocation_id(b, state);
346 nir_ssa_def_rewrite_uses(&intr->dest.ssa,
347 nir_src_for_ssa(invocation_id));
348 nir_instr_remove(&intr->instr);
349 break;
350
351 case nir_intrinsic_barrier:
352 case nir_intrinsic_memory_barrier_tcs_patch:
353 /* Hull shaders dispatch 32 wide so an entire patch will always
354 * fit in a single warp and execute in lock-step. Consequently,
355 * we don't need to do anything for TCS barriers so just remove
356 * the intrinsic. Otherwise we'll emit an actual barrier
357 * instructions, which will deadlock.
358 */
359 nir_instr_remove(&intr->instr);
360 break;
361
362 case nir_intrinsic_load_per_vertex_output: {
363 // src[] = { vertex, offset }.
364
365 b->cursor = nir_before_instr(&intr->instr);
366
367 nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
368 nir_variable *var = get_var(&b->shader->outputs, nir_intrinsic_base(intr));
369 nir_ssa_def *offset = build_per_vertex_offset(b, state,
370 intr->src[0].ssa, intr->src[1].ssa, var);
371
372 replace_intrinsic(b, intr, nir_intrinsic_load_global_ir3, address, offset, NULL);
373 break;
374 }
375
376 case nir_intrinsic_store_per_vertex_output: {
377 // src[] = { value, vertex, offset }.
378
379 b->cursor = nir_before_instr(&intr->instr);
380
381 nir_ssa_def *value = intr->src[0].ssa;
382 nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
383 nir_variable *var = get_var(&b->shader->outputs, nir_intrinsic_base(intr));
384 nir_ssa_def *offset = build_per_vertex_offset(b, state,
385 intr->src[1].ssa, intr->src[2].ssa, var);
386
387 nir_intrinsic_instr *store =
388 replace_intrinsic(b, intr, nir_intrinsic_store_global_ir3, value, address,
389 nir_iadd(b, offset, nir_imm_int(b, nir_intrinsic_component(intr))));
390
391 nir_intrinsic_set_write_mask(store, nir_intrinsic_write_mask(intr));
392
393 break;
394 }
395
396 case nir_intrinsic_load_per_vertex_input: {
397 // src[] = { vertex, offset }.
398
399 b->cursor = nir_before_instr(&intr->instr);
400
401 nir_ssa_def *offset = build_local_offset(b, state,
402 intr->src[0].ssa, // this is typically gl_InvocationID
403 nir_intrinsic_base(intr),
404 intr->src[1].ssa);
405
406 replace_intrinsic(b, intr, nir_intrinsic_load_shared_ir3, offset, NULL, NULL);
407 break;
408 }
409
410 case nir_intrinsic_load_tess_level_inner:
411 case nir_intrinsic_load_tess_level_outer: {
412 b->cursor = nir_before_instr(&intr->instr);
413
414 gl_varying_slot slot;
415 if (intr->intrinsic == nir_intrinsic_load_tess_level_inner)
416 slot = VARYING_SLOT_TESS_LEVEL_INNER;
417 else
418 slot = VARYING_SLOT_TESS_LEVEL_OUTER;
419
420 nir_ssa_def *address = nir_load_tess_factor_base_ir3(b);
421 nir_ssa_def *offset = build_tessfactor_base(b, slot, state);
422
423 replace_intrinsic(b, intr, nir_intrinsic_load_global_ir3, address, offset, NULL);
424 break;
425 }
426
427 case nir_intrinsic_load_output: {
428 // src[] = { offset }.
429
430 nir_variable *var = get_var(&b->shader->outputs, nir_intrinsic_base(intr));
431
432 b->cursor = nir_before_instr(&intr->instr);
433
434 nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
435 nir_ssa_def *offset = build_patch_offset(b, state, intr->src[0].ssa, var);
436
437 replace_intrinsic(b, intr, nir_intrinsic_load_global_ir3, address, offset, NULL);
438 break;
439 }
440
441 case nir_intrinsic_store_output: {
442 // src[] = { value, offset }.
443
444 /* write patch output to bo */
445
446 nir_variable *var = get_var(&b->shader->outputs, nir_intrinsic_base(intr));
447
448 nir_ssa_def **levels = NULL;
449 if (var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER)
450 levels = state->outer_levels;
451 else if (var->data.location == VARYING_SLOT_TESS_LEVEL_INNER)
452 levels = state->inner_levels;
453
454 b->cursor = nir_before_instr(&intr->instr);
455
456 if (levels) {
457 for (int i = 0; i < 4; i++)
458 if (nir_intrinsic_write_mask(intr) & (1 << i))
459 levels[i] = nir_channel(b, intr->src[0].ssa, i);
460 nir_instr_remove(&intr->instr);
461 } else {
462 nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
463 nir_ssa_def *offset = build_patch_offset(b, state, intr->src[1].ssa, var);
464
465 debug_assert(nir_intrinsic_component(intr) == 0);
466
467 nir_intrinsic_instr *store =
468 replace_intrinsic(b, intr, nir_intrinsic_store_global_ir3,
469 intr->src[0].ssa, address, offset);
470
471 nir_intrinsic_set_write_mask(store, nir_intrinsic_write_mask(intr));
472 }
473 break;
474 }
475
476 default:
477 break;
478 }
479 }
480 }
481
482 static void
483 emit_tess_epilouge(nir_builder *b, struct state *state)
484 {
485 nir_ssa_def *tessfactor_address = nir_load_tess_factor_base_ir3(b);
486 nir_ssa_def *levels[2];
487
488 /* Then emit the epilogue that actually writes out the tessellation levels
489 * to the BOs.
490 */
491 switch (state->topology) {
492 case IR3_TESS_TRIANGLES:
493 levels[0] = nir_vec4(b, state->outer_levels[0], state->outer_levels[1],
494 state->outer_levels[2], state->inner_levels[0]);
495 levels[1] = NULL;
496 break;
497 case IR3_TESS_QUADS:
498 levels[0] = nir_vec4(b, state->outer_levels[0], state->outer_levels[1],
499 state->outer_levels[2], state->outer_levels[3]);
500 levels[1] = nir_vec2(b, state->inner_levels[0], state->inner_levels[1]);
501 break;
502 case IR3_TESS_ISOLINES:
503 levels[0] = nir_vec2(b, state->outer_levels[0], state->outer_levels[1]);
504 levels[1] = NULL;
505 break;
506 default:
507 unreachable("nope");
508 }
509
510 nir_ssa_def *offset = build_tessfactor_base(b, VARYING_SLOT_TESS_LEVEL_OUTER, state);
511
512 nir_intrinsic_instr *store =
513 nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_global_ir3);
514
515 store->src[0] = nir_src_for_ssa(levels[0]);
516 store->src[1] = nir_src_for_ssa(tessfactor_address);
517 store->src[2] = nir_src_for_ssa(offset);
518 nir_builder_instr_insert(b, &store->instr);
519 store->num_components = levels[0]->num_components;
520 nir_intrinsic_set_write_mask(store, (1 << levels[0]->num_components) - 1);
521
522 if (levels[1]) {
523 store = nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_global_ir3);
524 offset = nir_iadd(b, offset, nir_imm_int(b, levels[0]->num_components));
525
526 store->src[0] = nir_src_for_ssa(levels[1]);
527 store->src[1] = nir_src_for_ssa(tessfactor_address);
528 store->src[2] = nir_src_for_ssa(offset);
529 nir_builder_instr_insert(b, &store->instr);
530 store->num_components = levels[1]->num_components;
531 nir_intrinsic_set_write_mask(store, (1 << levels[1]->num_components) - 1);
532 }
533
534 /* Finally, Insert endpatch instruction, maybe signalling the tess engine
535 * that another primitive is ready?
536 */
537
538 nir_intrinsic_instr *end_patch =
539 nir_intrinsic_instr_create(b->shader, nir_intrinsic_end_patch_ir3);
540 nir_builder_instr_insert(b, &end_patch->instr);
541 }
542
543 void
544 ir3_nir_lower_tess_ctrl(nir_shader *shader, struct ir3_shader *s, unsigned topology)
545 {
546 struct state state = { .topology = topology };
547
548 if (shader_debug_enabled(shader->info.stage)) {
549 fprintf(stderr, "NIR (before tess lowering) for %s shader:\n",
550 _mesa_shader_stage_to_string(shader->info.stage));
551 nir_print_shader(shader, stderr);
552 }
553
554 build_primitive_map(shader, &state.map, &shader->outputs);
555 memcpy(s->output_loc, state.map.loc, sizeof(s->output_loc));
556 s->output_size = state.map.stride;
557
558 nir_function_impl *impl = nir_shader_get_entrypoint(shader);
559 assert(impl);
560
561 nir_builder b;
562 nir_builder_init(&b, impl);
563 b.cursor = nir_before_cf_list(&impl->body);
564
565 state.header = nir_load_tcs_header_ir3(&b);
566
567 nir_foreach_block_safe(block, impl)
568 lower_tess_ctrl_block(block, &b, &state);
569
570 /* Now move the body of the TCS into a conditional:
571 *
572 * if (gl_InvocationID < num_vertices)
573 * // body
574 *
575 */
576
577 nir_cf_list body;
578 nir_cf_extract(&body, nir_before_cf_list(&impl->body),
579 nir_after_cf_list(&impl->body));
580
581 b.cursor = nir_after_cf_list(&impl->body);
582
583 /* Re-emit the header, since the old one got moved into the if branch */
584 state.header = nir_load_tcs_header_ir3(&b);
585 nir_ssa_def *iid = build_invocation_id(&b, &state);
586
587 const uint32_t nvertices = shader->info.tess.tcs_vertices_out;
588 nir_ssa_def *cond = nir_ult(&b, iid, nir_imm_int(&b, nvertices));
589
590 nir_if *nif = nir_push_if(&b, cond);
591
592 nir_cf_reinsert(&body, b.cursor);
593
594 b.cursor = nir_after_cf_list(&nif->then_list);
595
596 /* Insert conditional exit for threads invocation id != 0 */
597 nir_ssa_def *iid0_cond = nir_ieq(&b, iid, nir_imm_int(&b, 0));
598 nir_intrinsic_instr *cond_end =
599 nir_intrinsic_instr_create(shader, nir_intrinsic_cond_end_ir3);
600 cond_end->src[0] = nir_src_for_ssa(iid0_cond);
601 nir_builder_instr_insert(&b, &cond_end->instr);
602
603 emit_tess_epilouge(&b, &state);
604
605 nir_pop_if(&b, nif);
606
607 nir_metadata_preserve(impl, 0);
608 }
609
610
611 static void
612 lower_tess_eval_block(nir_block *block, nir_builder *b, struct state *state)
613 {
614 nir_foreach_instr_safe(instr, block) {
615 if (instr->type != nir_instr_type_intrinsic)
616 continue;
617
618 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
619
620 switch (intr->intrinsic) {
621 case nir_intrinsic_load_tess_coord: {
622 b->cursor = nir_after_instr(&intr->instr);
623 nir_ssa_def *x = nir_channel(b, &intr->dest.ssa, 0);
624 nir_ssa_def *y = nir_channel(b, &intr->dest.ssa, 1);
625 nir_ssa_def *z;
626
627 if (state->topology == IR3_TESS_TRIANGLES)
628 z = nir_fsub(b, nir_fsub(b, nir_imm_float(b, 1.0f), y), x);
629 else
630 z = nir_imm_float(b, 0.0f);
631
632 nir_ssa_def *coord = nir_vec3(b, x, y, z);
633
634 nir_ssa_def_rewrite_uses_after(&intr->dest.ssa,
635 nir_src_for_ssa(coord),
636 b->cursor.instr);
637 break;
638 }
639
640 case nir_intrinsic_load_per_vertex_input: {
641 // src[] = { vertex, offset }.
642
643 b->cursor = nir_before_instr(&intr->instr);
644
645 nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
646 nir_variable *var = get_var(&b->shader->inputs, nir_intrinsic_base(intr));
647 nir_ssa_def *offset = build_per_vertex_offset(b, state,
648 intr->src[0].ssa, intr->src[1].ssa, var);
649
650 replace_intrinsic(b, intr, nir_intrinsic_load_global_ir3, address, offset, NULL);
651 break;
652 }
653
654 case nir_intrinsic_load_tess_level_inner:
655 case nir_intrinsic_load_tess_level_outer: {
656 b->cursor = nir_before_instr(&intr->instr);
657
658 gl_varying_slot slot;
659 if (intr->intrinsic == nir_intrinsic_load_tess_level_inner)
660 slot = VARYING_SLOT_TESS_LEVEL_INNER;
661 else
662 slot = VARYING_SLOT_TESS_LEVEL_OUTER;
663
664 nir_ssa_def *address = nir_load_tess_factor_base_ir3(b);
665 nir_ssa_def *offset = build_tessfactor_base(b, slot, state);
666
667 /* Loading across a vec4 (16b) memory boundary is problematic
668 * if we don't use components from the second vec4. The tess
669 * levels aren't guaranteed to be vec4 aligned and we don't
670 * know which levels are actually used, so we load each
671 * component individually.
672 */
673 nir_ssa_def *levels[4];
674 for (unsigned i = 0; i < intr->num_components; i++) {
675 nir_intrinsic_instr *new_intr =
676 nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_global_ir3);
677
678 new_intr->src[0] = nir_src_for_ssa(address);
679 new_intr->src[1] = nir_src_for_ssa(nir_iadd(b, offset, nir_imm_int(b, i)));
680 new_intr->num_components = 1;
681 nir_ssa_dest_init(&new_intr->instr, &new_intr->dest, 1, 32, NULL);
682 nir_builder_instr_insert(b, &new_intr->instr);
683 levels[i] = &new_intr->dest.ssa;
684 }
685
686 nir_ssa_def *v = nir_vec(b, levels, intr->num_components);
687
688 nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(v));
689
690 nir_instr_remove(&intr->instr);
691 break;
692 }
693
694 case nir_intrinsic_load_input: {
695 // src[] = { offset }.
696
697 nir_variable *var = get_var(&b->shader->inputs, nir_intrinsic_base(intr));
698
699 debug_assert(var->data.patch);
700
701 b->cursor = nir_before_instr(&intr->instr);
702
703 nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
704 nir_ssa_def *offset = build_patch_offset(b, state, intr->src[0].ssa, var);
705
706 replace_intrinsic(b, intr, nir_intrinsic_load_global_ir3, address, offset, NULL);
707 break;
708 }
709
710 default:
711 break;
712 }
713 }
714 }
715
716 void
717 ir3_nir_lower_tess_eval(nir_shader *shader, unsigned topology)
718 {
719 struct state state = { .topology = topology };
720
721 if (shader_debug_enabled(shader->info.stage)) {
722 fprintf(stderr, "NIR (before tess lowering) for %s shader:\n",
723 _mesa_shader_stage_to_string(shader->info.stage));
724 nir_print_shader(shader, stderr);
725 }
726
727 /* Build map of inputs so we have the sizes. */
728 build_primitive_map(shader, &state.map, &shader->inputs);
729
730 nir_function_impl *impl = nir_shader_get_entrypoint(shader);
731 assert(impl);
732
733 nir_builder b;
734 nir_builder_init(&b, impl);
735
736 nir_foreach_block_safe(block, impl)
737 lower_tess_eval_block(block, &b, &state);
738
739 nir_metadata_preserve(impl, 0);
740 }
741
742 static void
743 lower_gs_block(nir_block *block, nir_builder *b, struct state *state)
744 {
745 nir_intrinsic_instr *outputs[32] = {};
746
747 nir_foreach_instr_safe(instr, block) {
748 if (instr->type != nir_instr_type_intrinsic)
749 continue;
750
751 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
752
753 switch (intr->intrinsic) {
754 case nir_intrinsic_store_output: {
755 // src[] = { value, offset }.
756
757 uint32_t loc = nir_intrinsic_base(intr);
758 outputs[loc] = intr;
759 break;
760 }
761
762 case nir_intrinsic_end_primitive: {
763 b->cursor = nir_before_instr(&intr->instr);
764 nir_store_var(b, state->vertex_flags_var, nir_imm_int(b, 4), 0x1);
765 nir_instr_remove(&intr->instr);
766 break;
767 }
768
769 case nir_intrinsic_emit_vertex: {
770
771 /* Load the vertex count */
772 b->cursor = nir_before_instr(&intr->instr);
773 nir_ssa_def *count = nir_load_var(b, state->vertex_count_var);
774
775 nir_push_if(b, nir_ieq(b, count, local_thread_id(b)));
776
777 for (uint32_t i = 0; i < ARRAY_SIZE(outputs); i++) {
778 if (outputs[i]) {
779 nir_store_var(b, state->output_vars[i],
780 outputs[i]->src[0].ssa,
781 (1 << outputs[i]->num_components) - 1);
782
783 nir_instr_remove(&outputs[i]->instr);
784 }
785 outputs[i] = NULL;
786 }
787
788 nir_instr_remove(&intr->instr);
789
790 nir_store_var(b, state->emitted_vertex_var,
791 nir_iadd(b, nir_load_var(b, state->emitted_vertex_var), nir_imm_int(b, 1)), 0x1);
792
793 nir_store_var(b, state->vertex_flags_out,
794 nir_load_var(b, state->vertex_flags_var), 0x1);
795
796 nir_pop_if(b, NULL);
797
798 /* Increment the vertex count by 1 */
799 nir_store_var(b, state->vertex_count_var,
800 nir_iadd(b, count, nir_imm_int(b, 1)), 0x1); /* .x */
801 nir_store_var(b, state->vertex_flags_var, nir_imm_int(b, 0), 0x1);
802
803 break;
804 }
805
806 case nir_intrinsic_load_per_vertex_input: {
807 // src[] = { vertex, offset }.
808
809 b->cursor = nir_before_instr(&intr->instr);
810
811 nir_ssa_def *offset = build_local_offset(b, state,
812 intr->src[0].ssa, // this is typically gl_InvocationID
813 nir_intrinsic_base(intr),
814 intr->src[1].ssa);
815
816 replace_intrinsic(b, intr, nir_intrinsic_load_shared_ir3, offset, NULL, NULL);
817 break;
818 }
819
820 case nir_intrinsic_load_invocation_id: {
821 b->cursor = nir_before_instr(&intr->instr);
822
823 nir_ssa_def *iid = build_invocation_id(b, state);
824 nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(iid));
825 nir_instr_remove(&intr->instr);
826 break;
827 }
828
829 default:
830 break;
831 }
832 }
833 }
834
835 static void
836 emit_store_outputs(nir_builder *b, struct state *state)
837 {
838 /* This also stores the internally added vertex_flags output. */
839
840 for (uint32_t i = 0; i < ARRAY_SIZE(state->output_vars); i++) {
841 if (!state->output_vars[i])
842 continue;
843
844 nir_intrinsic_instr *store =
845 nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_output);
846
847 nir_intrinsic_set_base(store, i);
848 store->src[0] = nir_src_for_ssa(nir_load_var(b, state->output_vars[i]));
849 store->src[1] = nir_src_for_ssa(nir_imm_int(b, 0));
850 store->num_components = store->src[0].ssa->num_components;
851
852 nir_builder_instr_insert(b, &store->instr);
853 }
854 }
855
856 static void
857 clean_up_split_vars(nir_shader *shader, struct exec_list *list)
858 {
859 uint32_t components[32] = {};
860
861 nir_foreach_variable(var, list) {
862 uint32_t mask =
863 ((1 << glsl_get_components(glsl_without_array(var->type))) - 1) << var->data.location_frac;
864 components[var->data.driver_location] |= mask;
865 }
866
867 nir_foreach_variable_safe(var, list) {
868 uint32_t mask =
869 ((1 << glsl_get_components(glsl_without_array(var->type))) - 1) << var->data.location_frac;
870 bool subset =
871 (components[var->data.driver_location] | mask) != mask;
872 if (subset)
873 exec_node_remove(&var->node);
874 }
875 }
876
877 void
878 ir3_nir_lower_gs(nir_shader *shader, struct ir3_shader *s)
879 {
880 struct state state = { };
881
882 if (shader_debug_enabled(shader->info.stage)) {
883 fprintf(stderr, "NIR (before gs lowering):\n");
884 nir_print_shader(shader, stderr);
885 }
886
887 clean_up_split_vars(shader, &shader->inputs);
888 clean_up_split_vars(shader, &shader->outputs);
889
890 build_primitive_map(shader, &state.map, &shader->inputs);
891
892 uint32_t loc = 0;
893 nir_foreach_variable(var, &shader->outputs) {
894 uint32_t end = var->data.driver_location + glsl_count_attribute_slots(var->type, false);
895 loc = MAX2(loc, end);
896 }
897
898 state.vertex_flags_out = nir_variable_create(shader, nir_var_shader_out,
899 glsl_uint_type(), "vertex_flags");
900 state.vertex_flags_out->data.driver_location = loc;
901 state.vertex_flags_out->data.location = VARYING_SLOT_GS_VERTEX_FLAGS_IR3;
902
903 nir_function_impl *impl = nir_shader_get_entrypoint(shader);
904 assert(impl);
905
906 nir_builder b;
907 nir_builder_init(&b, impl);
908 b.cursor = nir_before_cf_list(&impl->body);
909
910 state.header = nir_load_gs_header_ir3(&b);
911
912 nir_foreach_variable(var, &shader->outputs) {
913 state.output_vars[var->data.driver_location] =
914 nir_local_variable_create(impl, var->type,
915 ralloc_asprintf(var, "%s:gs-temp", var->name));
916 }
917
918 state.vertex_count_var =
919 nir_local_variable_create(impl, glsl_uint_type(), "vertex_count");
920 state.emitted_vertex_var =
921 nir_local_variable_create(impl, glsl_uint_type(), "emitted_vertex");
922 state.vertex_flags_var =
923 nir_local_variable_create(impl, glsl_uint_type(), "vertex_flags");
924 state.vertex_flags_out = state.output_vars[state.vertex_flags_out->data.driver_location];
925
926 /* initialize to 0 */
927 b.cursor = nir_before_cf_list(&impl->body);
928 nir_store_var(&b, state.vertex_count_var, nir_imm_int(&b, 0), 0x1);
929 nir_store_var(&b, state.emitted_vertex_var, nir_imm_int(&b, 0), 0x1);
930 nir_store_var(&b, state.vertex_flags_var, nir_imm_int(&b, 4), 0x1);
931
932 nir_foreach_block_safe(block, impl)
933 lower_gs_block(block, &b, &state);
934
935 set_foreach(impl->end_block->predecessors, block_entry) {
936 struct nir_block *block = (void *)block_entry->key;
937 b.cursor = nir_after_block_before_jump(block);
938
939 nir_intrinsic_instr *discard_if =
940 nir_intrinsic_instr_create(b.shader, nir_intrinsic_discard_if);
941
942 nir_ssa_def *cond = nir_ieq(&b, nir_load_var(&b, state.emitted_vertex_var), nir_imm_int(&b, 0));
943
944 discard_if->src[0] = nir_src_for_ssa(cond);
945
946 nir_builder_instr_insert(&b, &discard_if->instr);
947
948 emit_store_outputs(&b, &state);
949 }
950
951 nir_metadata_preserve(impl, 0);
952
953 if (shader_debug_enabled(shader->info.stage)) {
954 fprintf(stderr, "NIR (after gs lowering):\n");
955 nir_print_shader(shader, stderr);
956 }
957 }