2 * Copyright © 2019 Google, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25 #include "ir3_compiler.h"
26 #include "compiler/nir/nir_builder.h"
31 struct primitive_map
{
39 nir_variable
*vertex_count_var
;
40 nir_variable
*emitted_vertex_var
;
41 nir_variable
*vertex_flags_out
;
43 struct exec_list old_outputs
;
44 struct exec_list emit_outputs
;
48 bitfield_extract(nir_builder
*b
, nir_ssa_def
*v
, uint32_t start
, uint32_t mask
)
50 return nir_iand(b
, nir_ushr(b
, v
, nir_imm_int(b
, start
)),
51 nir_imm_int(b
, mask
));
55 build_invocation_id(nir_builder
*b
, struct state
*state
)
57 return bitfield_extract(b
, state
->header
, 11, 31);
61 build_vertex_id(nir_builder
*b
, struct state
*state
)
63 return bitfield_extract(b
, state
->header
, 6, 31);
67 build_local_primitive_id(nir_builder
*b
, struct state
*state
)
69 return bitfield_extract(b
, state
->header
, 0, 63);
73 get_var(struct exec_list
*list
, int driver_location
)
75 nir_foreach_variable (v
, list
) {
76 if (v
->data
.driver_location
== driver_location
) {
85 is_tess_levels(nir_variable
*var
)
87 return (var
->data
.location
== VARYING_SLOT_TESS_LEVEL_OUTER
||
88 var
->data
.location
== VARYING_SLOT_TESS_LEVEL_INNER
);
92 build_local_offset(nir_builder
*b
, struct state
*state
,
93 nir_ssa_def
*vertex
, uint32_t base
, nir_ssa_def
*offset
)
95 nir_ssa_def
*primitive_stride
= nir_load_vs_primitive_stride_ir3(b
);
96 nir_ssa_def
*primitive_offset
=
97 nir_imul24(b
, build_local_primitive_id(b
, state
), primitive_stride
);
98 nir_ssa_def
*attr_offset
;
99 nir_ssa_def
*vertex_stride
;
101 switch (b
->shader
->info
.stage
) {
102 case MESA_SHADER_VERTEX
:
103 case MESA_SHADER_TESS_EVAL
:
104 vertex_stride
= nir_imm_int(b
, state
->map
.stride
* 4);
105 attr_offset
= nir_imm_int(b
, state
->map
.loc
[base
] * 4);
107 case MESA_SHADER_TESS_CTRL
:
108 case MESA_SHADER_GEOMETRY
:
109 vertex_stride
= nir_load_vs_vertex_stride_ir3(b
);
110 attr_offset
= nir_load_primitive_location_ir3(b
, base
);
113 unreachable("bad shader stage");
116 nir_ssa_def
*vertex_offset
= nir_imul24(b
, vertex
, vertex_stride
);
118 return nir_iadd(b
, nir_iadd(b
, primitive_offset
, vertex_offset
),
119 nir_iadd(b
, attr_offset
, offset
));
122 static nir_intrinsic_instr
*
123 replace_intrinsic(nir_builder
*b
, nir_intrinsic_instr
*intr
,
124 nir_intrinsic_op op
, nir_ssa_def
*src0
, nir_ssa_def
*src1
, nir_ssa_def
*src2
)
126 nir_intrinsic_instr
*new_intr
=
127 nir_intrinsic_instr_create(b
->shader
, op
);
129 new_intr
->src
[0] = nir_src_for_ssa(src0
);
131 new_intr
->src
[1] = nir_src_for_ssa(src1
);
133 new_intr
->src
[2] = nir_src_for_ssa(src2
);
135 new_intr
->num_components
= intr
->num_components
;
137 if (nir_intrinsic_infos
[op
].has_dest
)
138 nir_ssa_dest_init(&new_intr
->instr
, &new_intr
->dest
,
139 intr
->num_components
, 32, NULL
);
141 nir_builder_instr_insert(b
, &new_intr
->instr
);
143 if (nir_intrinsic_infos
[op
].has_dest
)
144 nir_ssa_def_rewrite_uses(&intr
->dest
.ssa
, nir_src_for_ssa(&new_intr
->dest
.ssa
));
146 nir_instr_remove(&intr
->instr
);
152 build_primitive_map(nir_shader
*shader
, struct primitive_map
*map
, struct exec_list
*list
)
154 nir_foreach_variable (var
, list
) {
155 switch (var
->data
.location
) {
156 case VARYING_SLOT_TESS_LEVEL_OUTER
:
157 case VARYING_SLOT_TESS_LEVEL_INNER
:
161 unsigned size
= glsl_count_attribute_slots(var
->type
, false) * 4;
163 assert(var
->data
.driver_location
< ARRAY_SIZE(map
->size
));
164 map
->size
[var
->data
.driver_location
] =
165 MAX2(map
->size
[var
->data
.driver_location
], size
);
169 for (uint32_t i
= 0; i
< ARRAY_SIZE(map
->size
); i
++) {
170 if (map
->size
[i
] == 0)
172 nir_variable
*var
= get_var(list
, i
);
179 map
->size
[i
] = map
->size
[i
] / glsl_get_length(var
->type
);
186 lower_block_to_explicit_output(nir_block
*block
, nir_builder
*b
, struct state
*state
)
188 nir_foreach_instr_safe (instr
, block
) {
189 if (instr
->type
!= nir_instr_type_intrinsic
)
192 nir_intrinsic_instr
*intr
= nir_instr_as_intrinsic(instr
);
194 switch (intr
->intrinsic
) {
195 case nir_intrinsic_store_output
: {
196 // src[] = { value, offset }.
198 /* nir_lower_io_to_temporaries replaces all access to output
199 * variables with temp variables and then emits a nir_copy_var at
200 * the end of the shader. Thus, we should always get a full wrmask
203 assert(util_is_power_of_two_nonzero(nir_intrinsic_write_mask(intr
) + 1));
205 b
->cursor
= nir_instr_remove(&intr
->instr
);
207 nir_ssa_def
*vertex_id
= build_vertex_id(b
, state
);
208 nir_ssa_def
*offset
= build_local_offset(b
, state
, vertex_id
, nir_intrinsic_base(intr
),
210 nir_intrinsic_instr
*store
=
211 nir_intrinsic_instr_create(b
->shader
, nir_intrinsic_store_shared_ir3
);
213 store
->src
[0] = nir_src_for_ssa(intr
->src
[0].ssa
);
214 store
->src
[1] = nir_src_for_ssa(offset
);
215 store
->num_components
= intr
->num_components
;
217 nir_builder_instr_insert(b
, &store
->instr
);
228 local_thread_id(nir_builder
*b
)
230 return bitfield_extract(b
, nir_load_gs_header_ir3(b
), 16, 1023);
234 ir3_nir_lower_to_explicit_output(nir_shader
*shader
, struct ir3_shader_variant
*v
,
237 struct state state
= { };
239 build_primitive_map(shader
, &state
.map
, &shader
->outputs
);
240 memcpy(v
->output_loc
, state
.map
.loc
, sizeof(v
->output_loc
));
242 nir_function_impl
*impl
= nir_shader_get_entrypoint(shader
);
246 nir_builder_init(&b
, impl
);
247 b
.cursor
= nir_before_cf_list(&impl
->body
);
249 if (v
->type
== MESA_SHADER_VERTEX
&& topology
!= IR3_TESS_NONE
)
250 state
.header
= nir_load_tcs_header_ir3(&b
);
252 state
.header
= nir_load_gs_header_ir3(&b
);
254 nir_foreach_block_safe (block
, impl
)
255 lower_block_to_explicit_output(block
, &b
, &state
);
257 nir_metadata_preserve(impl
, nir_metadata_block_index
|
258 nir_metadata_dominance
);
260 v
->output_size
= state
.map
.stride
;
265 lower_block_to_explicit_input(nir_block
*block
, nir_builder
*b
, struct state
*state
)
267 nir_foreach_instr_safe (instr
, block
) {
268 if (instr
->type
!= nir_instr_type_intrinsic
)
271 nir_intrinsic_instr
*intr
= nir_instr_as_intrinsic(instr
);
273 switch (intr
->intrinsic
) {
274 case nir_intrinsic_load_per_vertex_input
: {
275 // src[] = { vertex, offset }.
277 b
->cursor
= nir_before_instr(&intr
->instr
);
279 nir_ssa_def
*offset
= build_local_offset(b
, state
,
280 intr
->src
[0].ssa
, // this is typically gl_InvocationID
281 nir_intrinsic_base(intr
),
284 replace_intrinsic(b
, intr
, nir_intrinsic_load_shared_ir3
, offset
, NULL
, NULL
);
288 case nir_intrinsic_load_invocation_id
: {
289 b
->cursor
= nir_before_instr(&intr
->instr
);
291 nir_ssa_def
*iid
= build_invocation_id(b
, state
);
292 nir_ssa_def_rewrite_uses(&intr
->dest
.ssa
, nir_src_for_ssa(iid
));
293 nir_instr_remove(&intr
->instr
);
304 ir3_nir_lower_to_explicit_input(nir_shader
*shader
)
306 struct state state
= { };
308 nir_function_impl
*impl
= nir_shader_get_entrypoint(shader
);
312 nir_builder_init(&b
, impl
);
313 b
.cursor
= nir_before_cf_list(&impl
->body
);
315 if (shader
->info
.stage
== MESA_SHADER_GEOMETRY
)
316 state
.header
= nir_load_gs_header_ir3(&b
);
318 state
.header
= nir_load_tcs_header_ir3(&b
);
320 nir_foreach_block_safe (block
, impl
)
321 lower_block_to_explicit_input(block
, &b
, &state
);
326 build_per_vertex_offset(nir_builder
*b
, struct state
*state
,
327 nir_ssa_def
*vertex
, nir_ssa_def
*offset
, nir_variable
*var
)
329 nir_ssa_def
*primitive_id
= nir_load_primitive_id(b
);
330 nir_ssa_def
*patch_stride
= nir_load_hs_patch_stride_ir3(b
);
331 nir_ssa_def
*patch_offset
= nir_imul24(b
, primitive_id
, patch_stride
);
332 nir_ssa_def
*attr_offset
;
333 int loc
= var
->data
.driver_location
;
335 switch (b
->shader
->info
.stage
) {
336 case MESA_SHADER_TESS_CTRL
:
337 attr_offset
= nir_imm_int(b
, state
->map
.loc
[loc
]);
339 case MESA_SHADER_TESS_EVAL
:
340 attr_offset
= nir_load_primitive_location_ir3(b
, loc
);
343 unreachable("bad shader state");
346 nir_ssa_def
*attr_stride
= nir_imm_int(b
, state
->map
.size
[loc
]);
347 nir_ssa_def
*vertex_offset
= nir_imul24(b
, vertex
, attr_stride
);
349 return nir_iadd(b
, nir_iadd(b
, patch_offset
, attr_offset
),
350 nir_iadd(b
, vertex_offset
, nir_ishl(b
, offset
, nir_imm_int(b
, 2))));
354 build_patch_offset(nir_builder
*b
, struct state
*state
, nir_ssa_def
*offset
, nir_variable
*var
)
356 debug_assert(var
&& var
->data
.patch
);
358 return build_per_vertex_offset(b
, state
, nir_imm_int(b
, 0), offset
, var
);
362 tess_level_components(struct state
*state
, uint32_t *inner
, uint32_t *outer
)
364 switch (state
->topology
) {
365 case IR3_TESS_TRIANGLES
:
373 case IR3_TESS_ISOLINES
:
383 build_tessfactor_base(nir_builder
*b
, gl_varying_slot slot
, struct state
*state
)
385 uint32_t inner_levels
, outer_levels
;
386 tess_level_components(state
, &inner_levels
, &outer_levels
);
388 const uint32_t patch_stride
= 1 + inner_levels
+ outer_levels
;
390 nir_ssa_def
*primitive_id
= nir_load_primitive_id(b
);
392 nir_ssa_def
*patch_offset
= nir_imul24(b
, primitive_id
, nir_imm_int(b
, patch_stride
));
396 case VARYING_SLOT_TESS_LEVEL_OUTER
:
397 /* There's some kind of header dword, tess levels start at index 1. */
400 case VARYING_SLOT_TESS_LEVEL_INNER
:
401 offset
= 1 + outer_levels
;
407 return nir_iadd(b
, patch_offset
, nir_imm_int(b
, offset
));
411 lower_tess_ctrl_block(nir_block
*block
, nir_builder
*b
, struct state
*state
)
413 nir_foreach_instr_safe (instr
, block
) {
414 if (instr
->type
!= nir_instr_type_intrinsic
)
417 nir_intrinsic_instr
*intr
= nir_instr_as_intrinsic(instr
);
419 switch (intr
->intrinsic
) {
420 case nir_intrinsic_control_barrier
:
421 case nir_intrinsic_memory_barrier_tcs_patch
:
422 /* Hull shaders dispatch 32 wide so an entire patch will always
423 * fit in a single warp and execute in lock-step. Consequently,
424 * we don't need to do anything for TCS barriers so just remove
425 * the intrinsic. Otherwise we'll emit an actual barrier
426 * instructions, which will deadlock.
428 nir_instr_remove(&intr
->instr
);
431 case nir_intrinsic_load_per_vertex_output
: {
432 // src[] = { vertex, offset }.
434 b
->cursor
= nir_before_instr(&intr
->instr
);
436 nir_ssa_def
*address
= nir_load_tess_param_base_ir3(b
);
437 nir_variable
*var
= get_var(&b
->shader
->outputs
, nir_intrinsic_base(intr
));
438 nir_ssa_def
*offset
= build_per_vertex_offset(b
, state
,
439 intr
->src
[0].ssa
, intr
->src
[1].ssa
, var
);
441 replace_intrinsic(b
, intr
, nir_intrinsic_load_global_ir3
, address
, offset
, NULL
);
445 case nir_intrinsic_store_per_vertex_output
: {
446 // src[] = { value, vertex, offset }.
448 b
->cursor
= nir_before_instr(&intr
->instr
);
450 /* sparse writemask not supported */
451 assert(util_is_power_of_two_nonzero(nir_intrinsic_write_mask(intr
) + 1));
453 nir_ssa_def
*value
= intr
->src
[0].ssa
;
454 nir_ssa_def
*address
= nir_load_tess_param_base_ir3(b
);
455 nir_variable
*var
= get_var(&b
->shader
->outputs
, nir_intrinsic_base(intr
));
456 nir_ssa_def
*offset
= build_per_vertex_offset(b
, state
,
457 intr
->src
[1].ssa
, intr
->src
[2].ssa
, var
);
459 replace_intrinsic(b
, intr
, nir_intrinsic_store_global_ir3
, value
, address
,
460 nir_iadd(b
, offset
, nir_imm_int(b
, nir_intrinsic_component(intr
))));
465 case nir_intrinsic_load_output
: {
466 // src[] = { offset }.
468 nir_variable
*var
= get_var(&b
->shader
->outputs
, nir_intrinsic_base(intr
));
470 b
->cursor
= nir_before_instr(&intr
->instr
);
472 nir_ssa_def
*address
, *offset
;
474 /* note if vectorization of the tess level loads ever happens:
475 * "ldg" across 16-byte boundaries can behave incorrectly if results
476 * are never used. most likely some issue with (sy) not properly
477 * syncing with values coming from a second memory transaction.
479 if (is_tess_levels(var
)) {
480 assert(intr
->dest
.ssa
.num_components
== 1);
481 address
= nir_load_tess_factor_base_ir3(b
);
482 offset
= build_tessfactor_base(b
, var
->data
.location
, state
);
484 address
= nir_load_tess_param_base_ir3(b
);
485 offset
= build_patch_offset(b
, state
, intr
->src
[0].ssa
, var
);
488 replace_intrinsic(b
, intr
, nir_intrinsic_load_global_ir3
, address
, offset
, NULL
);
492 case nir_intrinsic_store_output
: {
493 // src[] = { value, offset }.
495 /* write patch output to bo */
497 nir_variable
*var
= get_var(&b
->shader
->outputs
, nir_intrinsic_base(intr
));
499 b
->cursor
= nir_before_instr(&intr
->instr
);
501 /* sparse writemask not supported */
502 assert(util_is_power_of_two_nonzero(nir_intrinsic_write_mask(intr
) + 1));
504 if (is_tess_levels(var
)) {
505 /* with tess levels are defined as float[4] and float[2],
506 * but tess factor BO has smaller sizes for tris/isolines,
507 * so we have to discard any writes beyond the number of
508 * components for inner/outer levels */
509 uint32_t inner_levels
, outer_levels
, levels
;
510 tess_level_components(state
, &inner_levels
, &outer_levels
);
512 if (var
->data
.location
== VARYING_SLOT_TESS_LEVEL_OUTER
)
513 levels
= outer_levels
;
515 levels
= inner_levels
;
517 assert(intr
->src
[0].ssa
->num_components
== 1);
519 nir_ssa_def
*offset
=
520 nir_iadd_imm(b
, intr
->src
[1].ssa
, nir_intrinsic_component(intr
));
522 nir_if
*nif
= nir_push_if(b
, nir_ult(b
, offset
, nir_imm_int(b
, levels
)));
524 replace_intrinsic(b
, intr
, nir_intrinsic_store_global_ir3
,
526 nir_load_tess_factor_base_ir3(b
),
527 nir_iadd(b
, offset
, build_tessfactor_base(b
, var
->data
.location
, state
)));
531 nir_ssa_def
*address
= nir_load_tess_param_base_ir3(b
);
532 nir_ssa_def
*offset
= build_patch_offset(b
, state
, intr
->src
[1].ssa
, var
);
534 debug_assert(nir_intrinsic_component(intr
) == 0);
536 replace_intrinsic(b
, intr
, nir_intrinsic_store_global_ir3
,
537 intr
->src
[0].ssa
, address
, offset
);
549 emit_tess_epilouge(nir_builder
*b
, struct state
*state
)
551 /* Insert endpatch instruction:
553 * TODO we should re-work this to use normal flow control.
556 nir_intrinsic_instr
*end_patch
=
557 nir_intrinsic_instr_create(b
->shader
, nir_intrinsic_end_patch_ir3
);
558 nir_builder_instr_insert(b
, &end_patch
->instr
);
562 ir3_nir_lower_tess_ctrl(nir_shader
*shader
, struct ir3_shader_variant
*v
,
565 struct state state
= { .topology
= topology
};
567 if (shader_debug_enabled(shader
->info
.stage
)) {
568 fprintf(stderr
, "NIR (before tess lowering) for %s shader:\n",
569 _mesa_shader_stage_to_string(shader
->info
.stage
));
570 nir_print_shader(shader
, stderr
);
573 build_primitive_map(shader
, &state
.map
, &shader
->outputs
);
574 memcpy(v
->output_loc
, state
.map
.loc
, sizeof(v
->output_loc
));
575 v
->output_size
= state
.map
.stride
;
577 nir_function_impl
*impl
= nir_shader_get_entrypoint(shader
);
581 nir_builder_init(&b
, impl
);
582 b
.cursor
= nir_before_cf_list(&impl
->body
);
584 state
.header
= nir_load_tcs_header_ir3(&b
);
586 nir_foreach_block_safe (block
, impl
)
587 lower_tess_ctrl_block(block
, &b
, &state
);
589 /* Now move the body of the TCS into a conditional:
591 * if (gl_InvocationID < num_vertices)
597 nir_cf_extract(&body
, nir_before_cf_list(&impl
->body
),
598 nir_after_cf_list(&impl
->body
));
600 b
.cursor
= nir_after_cf_list(&impl
->body
);
602 /* Re-emit the header, since the old one got moved into the if branch */
603 state
.header
= nir_load_tcs_header_ir3(&b
);
604 nir_ssa_def
*iid
= build_invocation_id(&b
, &state
);
606 const uint32_t nvertices
= shader
->info
.tess
.tcs_vertices_out
;
607 nir_ssa_def
*cond
= nir_ult(&b
, iid
, nir_imm_int(&b
, nvertices
));
609 nir_if
*nif
= nir_push_if(&b
, cond
);
611 nir_cf_reinsert(&body
, b
.cursor
);
613 b
.cursor
= nir_after_cf_list(&nif
->then_list
);
615 /* Insert conditional exit for threads invocation id != 0 */
616 nir_ssa_def
*iid0_cond
= nir_ieq(&b
, iid
, nir_imm_int(&b
, 0));
617 nir_intrinsic_instr
*cond_end
=
618 nir_intrinsic_instr_create(shader
, nir_intrinsic_cond_end_ir3
);
619 cond_end
->src
[0] = nir_src_for_ssa(iid0_cond
);
620 nir_builder_instr_insert(&b
, &cond_end
->instr
);
622 emit_tess_epilouge(&b
, &state
);
626 nir_metadata_preserve(impl
, 0);
631 lower_tess_eval_block(nir_block
*block
, nir_builder
*b
, struct state
*state
)
633 nir_foreach_instr_safe (instr
, block
) {
634 if (instr
->type
!= nir_instr_type_intrinsic
)
637 nir_intrinsic_instr
*intr
= nir_instr_as_intrinsic(instr
);
639 switch (intr
->intrinsic
) {
640 case nir_intrinsic_load_tess_coord
: {
641 b
->cursor
= nir_after_instr(&intr
->instr
);
642 nir_ssa_def
*x
= nir_channel(b
, &intr
->dest
.ssa
, 0);
643 nir_ssa_def
*y
= nir_channel(b
, &intr
->dest
.ssa
, 1);
646 if (state
->topology
== IR3_TESS_TRIANGLES
)
647 z
= nir_fsub(b
, nir_fsub(b
, nir_imm_float(b
, 1.0f
), y
), x
);
649 z
= nir_imm_float(b
, 0.0f
);
651 nir_ssa_def
*coord
= nir_vec3(b
, x
, y
, z
);
653 nir_ssa_def_rewrite_uses_after(&intr
->dest
.ssa
,
654 nir_src_for_ssa(coord
),
659 case nir_intrinsic_load_per_vertex_input
: {
660 // src[] = { vertex, offset }.
662 b
->cursor
= nir_before_instr(&intr
->instr
);
664 nir_ssa_def
*address
= nir_load_tess_param_base_ir3(b
);
665 nir_variable
*var
= get_var(&b
->shader
->inputs
, nir_intrinsic_base(intr
));
666 nir_ssa_def
*offset
= build_per_vertex_offset(b
, state
,
667 intr
->src
[0].ssa
, intr
->src
[1].ssa
, var
);
669 replace_intrinsic(b
, intr
, nir_intrinsic_load_global_ir3
, address
, offset
, NULL
);
673 case nir_intrinsic_load_input
: {
674 // src[] = { offset }.
676 nir_variable
*var
= get_var(&b
->shader
->inputs
, nir_intrinsic_base(intr
));
678 debug_assert(var
->data
.patch
);
680 b
->cursor
= nir_before_instr(&intr
->instr
);
682 nir_ssa_def
*address
, *offset
;
684 /* note if vectorization of the tess level loads ever happens:
685 * "ldg" across 16-byte boundaries can behave incorrectly if results
686 * are never used. most likely some issue with (sy) not properly
687 * syncing with values coming from a second memory transaction.
689 if (is_tess_levels(var
)) {
690 assert(intr
->dest
.ssa
.num_components
== 1);
691 address
= nir_load_tess_factor_base_ir3(b
);
692 offset
= build_tessfactor_base(b
, var
->data
.location
, state
);
694 address
= nir_load_tess_param_base_ir3(b
);
695 offset
= build_patch_offset(b
, state
, intr
->src
[0].ssa
, var
);
698 offset
= nir_iadd(b
, offset
, nir_imm_int(b
, nir_intrinsic_component(intr
)));
700 replace_intrinsic(b
, intr
, nir_intrinsic_load_global_ir3
, address
, offset
, NULL
);
711 ir3_nir_lower_tess_eval(nir_shader
*shader
, unsigned topology
)
713 struct state state
= { .topology
= topology
};
715 if (shader_debug_enabled(shader
->info
.stage
)) {
716 fprintf(stderr
, "NIR (before tess lowering) for %s shader:\n",
717 _mesa_shader_stage_to_string(shader
->info
.stage
));
718 nir_print_shader(shader
, stderr
);
721 /* Build map of inputs so we have the sizes. */
722 build_primitive_map(shader
, &state
.map
, &shader
->inputs
);
724 nir_function_impl
*impl
= nir_shader_get_entrypoint(shader
);
728 nir_builder_init(&b
, impl
);
730 nir_foreach_block_safe (block
, impl
)
731 lower_tess_eval_block(block
, &b
, &state
);
733 nir_metadata_preserve(impl
, 0);
737 lower_gs_block(nir_block
*block
, nir_builder
*b
, struct state
*state
)
739 nir_foreach_instr_safe (instr
, block
) {
740 if (instr
->type
!= nir_instr_type_intrinsic
)
743 nir_intrinsic_instr
*intr
= nir_instr_as_intrinsic(instr
);
745 switch (intr
->intrinsic
) {
746 case nir_intrinsic_end_primitive
: {
747 b
->cursor
= nir_before_instr(&intr
->instr
);
748 nir_store_var(b
, state
->vertex_flags_out
, nir_imm_int(b
, 4), 0x1);
749 nir_instr_remove(&intr
->instr
);
753 case nir_intrinsic_emit_vertex
: {
754 /* Load the vertex count */
755 b
->cursor
= nir_before_instr(&intr
->instr
);
756 nir_ssa_def
*count
= nir_load_var(b
, state
->vertex_count_var
);
758 nir_push_if(b
, nir_ieq(b
, count
, local_thread_id(b
)));
760 foreach_two_lists(dest_node
, &state
->emit_outputs
, src_node
, &state
->old_outputs
) {
761 nir_variable
*dest
= exec_node_data(nir_variable
, dest_node
, node
);
762 nir_variable
*src
= exec_node_data(nir_variable
, src_node
, node
);
763 nir_copy_var(b
, dest
, src
);
766 nir_instr_remove(&intr
->instr
);
768 nir_store_var(b
, state
->emitted_vertex_var
,
769 nir_iadd(b
, nir_load_var(b
, state
->emitted_vertex_var
), nir_imm_int(b
, 1)), 0x1);
773 /* Increment the vertex count by 1 */
774 nir_store_var(b
, state
->vertex_count_var
,
775 nir_iadd(b
, count
, nir_imm_int(b
, 1)), 0x1); /* .x */
776 nir_store_var(b
, state
->vertex_flags_out
, nir_imm_int(b
, 0), 0x1);
788 ir3_nir_lower_gs(nir_shader
*shader
)
790 struct state state
= { };
792 if (shader_debug_enabled(shader
->info
.stage
)) {
793 fprintf(stderr
, "NIR (before gs lowering):\n");
794 nir_print_shader(shader
, stderr
);
797 build_primitive_map(shader
, &state
.map
, &shader
->inputs
);
799 /* Create an output var for vertex_flags. This will be shadowed below,
800 * same way regular outputs get shadowed, and this variable will become a
803 state
.vertex_flags_out
= nir_variable_create(shader
, nir_var_shader_out
,
804 glsl_uint_type(), "vertex_flags");
805 state
.vertex_flags_out
->data
.driver_location
= shader
->num_outputs
++;
806 state
.vertex_flags_out
->data
.location
= VARYING_SLOT_GS_VERTEX_FLAGS_IR3
;
807 state
.vertex_flags_out
->data
.interpolation
= INTERP_MODE_NONE
;
809 nir_function_impl
*impl
= nir_shader_get_entrypoint(shader
);
813 nir_builder_init(&b
, impl
);
814 b
.cursor
= nir_before_cf_list(&impl
->body
);
816 state
.header
= nir_load_gs_header_ir3(&b
);
818 /* Generate two set of shadow vars for the output variables. The first
819 * set replaces the real outputs and the second set (emit_outputs) we'll
820 * assign in the emit_vertex conditionals. Then at the end of the shader
821 * we copy the emit_outputs to the real outputs, so that we get
822 * store_output in uniform control flow.
824 exec_list_move_nodes_to(&shader
->outputs
, &state
.old_outputs
);
825 exec_list_make_empty(&state
.emit_outputs
);
826 nir_foreach_variable(var
, &state
.old_outputs
) {
827 /* Create a new output var by cloning the original output var and
830 nir_variable
*output
= nir_variable_clone(var
, shader
);
831 exec_list_push_tail(&shader
->outputs
, &output
->node
);
833 /* Rewrite the original output to be a shadow variable. */
834 var
->name
= ralloc_asprintf(var
, "%s@gs-temp", output
->name
);
835 var
->data
.mode
= nir_var_shader_temp
;
837 /* Clone the shadow variable to create the emit shadow variable that
838 * we'll assign in the emit conditionals.
840 nir_variable
*emit_output
= nir_variable_clone(var
, shader
);
841 emit_output
->name
= ralloc_asprintf(var
, "%s@emit-temp", output
->name
);
842 exec_list_push_tail(&state
.emit_outputs
, &emit_output
->node
);
845 /* During the shader we'll keep track of which vertex we're currently
846 * emitting for the EmitVertex test and how many vertices we emitted so we
847 * know to discard if didn't emit any. In most simple shaders, this can
848 * all be statically determined and gets optimized away.
850 state
.vertex_count_var
=
851 nir_local_variable_create(impl
, glsl_uint_type(), "vertex_count");
852 state
.emitted_vertex_var
=
853 nir_local_variable_create(impl
, glsl_uint_type(), "emitted_vertex");
855 /* Initialize to 0. */
856 b
.cursor
= nir_before_cf_list(&impl
->body
);
857 nir_store_var(&b
, state
.vertex_count_var
, nir_imm_int(&b
, 0), 0x1);
858 nir_store_var(&b
, state
.emitted_vertex_var
, nir_imm_int(&b
, 0), 0x1);
859 nir_store_var(&b
, state
.vertex_flags_out
, nir_imm_int(&b
, 4), 0x1);
861 nir_foreach_block_safe (block
, impl
)
862 lower_gs_block(block
, &b
, &state
);
864 set_foreach(impl
->end_block
->predecessors
, block_entry
) {
865 struct nir_block
*block
= (void *)block_entry
->key
;
866 b
.cursor
= nir_after_block_before_jump(block
);
868 nir_intrinsic_instr
*discard_if
=
869 nir_intrinsic_instr_create(b
.shader
, nir_intrinsic_discard_if
);
871 nir_ssa_def
*cond
= nir_ieq(&b
, nir_load_var(&b
, state
.emitted_vertex_var
), nir_imm_int(&b
, 0));
873 discard_if
->src
[0] = nir_src_for_ssa(cond
);
875 nir_builder_instr_insert(&b
, &discard_if
->instr
);
877 foreach_two_lists(dest_node
, &shader
->outputs
, src_node
, &state
.emit_outputs
) {
878 nir_variable
*dest
= exec_node_data(nir_variable
, dest_node
, node
);
879 nir_variable
*src
= exec_node_data(nir_variable
, src_node
, node
);
880 nir_copy_var(&b
, dest
, src
);
884 exec_list_append(&shader
->globals
, &state
.old_outputs
);
885 exec_list_append(&shader
->globals
, &state
.emit_outputs
);
887 nir_metadata_preserve(impl
, 0);
889 nir_lower_global_vars_to_local(shader
);
890 nir_split_var_copies(shader
);
891 nir_lower_var_copies(shader
);
893 nir_fixup_deref_modes(shader
);
895 if (shader_debug_enabled(shader
->info
.stage
)) {
896 fprintf(stderr
, "NIR (after gs lowering):\n");
897 nir_print_shader(shader
, stderr
);
902 ir3_link_geometry_stages(const struct ir3_shader_variant
*producer
,
903 const struct ir3_shader_variant
*consumer
,
906 uint32_t num_loc
= 0, factor
;
908 switch (consumer
->type
) {
909 case MESA_SHADER_TESS_CTRL
:
910 case MESA_SHADER_GEOMETRY
:
911 /* These stages load with ldlw, which expects byte offsets. */
914 case MESA_SHADER_TESS_EVAL
:
915 /* The tess eval shader uses ldg, which takes dword offsets. */
919 unreachable("bad shader stage");
922 nir_foreach_variable(in_var
, &consumer
->shader
->nir
->inputs
) {
923 nir_foreach_variable(out_var
, &producer
->shader
->nir
->outputs
) {
924 if (in_var
->data
.location
== out_var
->data
.location
) {
925 locs
[in_var
->data
.driver_location
] =
926 producer
->output_loc
[out_var
->data
.driver_location
] * factor
;
928 debug_assert(num_loc
<= in_var
->data
.driver_location
+ 1);
929 num_loc
= in_var
->data
.driver_location
+ 1;