2 * Copyright © 2019 Google, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25 #include "ir3_compiler.h"
26 #include "compiler/nir/nir_builder.h"
31 struct primitive_map
{
39 nir_variable
*vertex_count_var
;
40 nir_variable
*emitted_vertex_var
;
41 nir_variable
*vertex_flags_out
;
43 struct exec_list old_outputs
;
44 struct exec_list emit_outputs
;
46 nir_ssa_def
*outer_levels
[4];
47 nir_ssa_def
*inner_levels
[2];
51 bitfield_extract(nir_builder
*b
, nir_ssa_def
*v
, uint32_t start
, uint32_t mask
)
53 return nir_iand(b
, nir_ushr(b
, v
, nir_imm_int(b
, start
)),
54 nir_imm_int(b
, mask
));
58 build_invocation_id(nir_builder
*b
, struct state
*state
)
60 return bitfield_extract(b
, state
->header
, 11, 31);
64 build_vertex_id(nir_builder
*b
, struct state
*state
)
66 return bitfield_extract(b
, state
->header
, 6, 31);
70 build_local_primitive_id(nir_builder
*b
, struct state
*state
)
72 return bitfield_extract(b
, state
->header
, 0, 63);
76 get_var(struct exec_list
*list
, int driver_location
)
78 nir_foreach_variable (v
, list
) {
79 if (v
->data
.driver_location
== driver_location
) {
88 build_local_offset(nir_builder
*b
, struct state
*state
,
89 nir_ssa_def
*vertex
, uint32_t base
, nir_ssa_def
*offset
)
91 nir_ssa_def
*primitive_stride
= nir_load_vs_primitive_stride_ir3(b
);
92 nir_ssa_def
*primitive_offset
=
93 nir_imul24(b
, build_local_primitive_id(b
, state
), primitive_stride
);
94 nir_ssa_def
*attr_offset
;
95 nir_ssa_def
*vertex_stride
;
97 switch (b
->shader
->info
.stage
) {
98 case MESA_SHADER_VERTEX
:
99 case MESA_SHADER_TESS_EVAL
:
100 vertex_stride
= nir_imm_int(b
, state
->map
.stride
* 4);
101 attr_offset
= nir_imm_int(b
, state
->map
.loc
[base
] * 4);
103 case MESA_SHADER_TESS_CTRL
:
104 case MESA_SHADER_GEOMETRY
:
105 vertex_stride
= nir_load_vs_vertex_stride_ir3(b
);
106 attr_offset
= nir_load_primitive_location_ir3(b
, base
);
109 unreachable("bad shader stage");
112 nir_ssa_def
*vertex_offset
= nir_imul24(b
, vertex
, vertex_stride
);
114 return nir_iadd(b
, nir_iadd(b
, primitive_offset
, vertex_offset
),
115 nir_iadd(b
, attr_offset
, offset
));
118 static nir_intrinsic_instr
*
119 replace_intrinsic(nir_builder
*b
, nir_intrinsic_instr
*intr
,
120 nir_intrinsic_op op
, nir_ssa_def
*src0
, nir_ssa_def
*src1
, nir_ssa_def
*src2
)
122 nir_intrinsic_instr
*new_intr
=
123 nir_intrinsic_instr_create(b
->shader
, op
);
125 new_intr
->src
[0] = nir_src_for_ssa(src0
);
127 new_intr
->src
[1] = nir_src_for_ssa(src1
);
129 new_intr
->src
[2] = nir_src_for_ssa(src2
);
131 new_intr
->num_components
= intr
->num_components
;
133 if (nir_intrinsic_infos
[op
].has_dest
)
134 nir_ssa_dest_init(&new_intr
->instr
, &new_intr
->dest
,
135 intr
->num_components
, 32, NULL
);
137 nir_builder_instr_insert(b
, &new_intr
->instr
);
139 if (nir_intrinsic_infos
[op
].has_dest
)
140 nir_ssa_def_rewrite_uses(&intr
->dest
.ssa
, nir_src_for_ssa(&new_intr
->dest
.ssa
));
142 nir_instr_remove(&intr
->instr
);
148 build_primitive_map(nir_shader
*shader
, struct primitive_map
*map
, struct exec_list
*list
)
150 nir_foreach_variable (var
, list
) {
151 switch (var
->data
.location
) {
152 case VARYING_SLOT_TESS_LEVEL_OUTER
:
153 case VARYING_SLOT_TESS_LEVEL_INNER
:
157 unsigned size
= glsl_count_attribute_slots(var
->type
, false) * 4;
159 assert(var
->data
.driver_location
< ARRAY_SIZE(map
->size
));
160 map
->size
[var
->data
.driver_location
] =
161 MAX2(map
->size
[var
->data
.driver_location
], size
);
165 for (uint32_t i
= 0; i
< ARRAY_SIZE(map
->size
); i
++) {
166 if (map
->size
[i
] == 0)
168 nir_variable
*var
= get_var(list
, i
);
175 map
->size
[i
] = map
->size
[i
] / glsl_get_length(var
->type
);
182 lower_block_to_explicit_output(nir_block
*block
, nir_builder
*b
, struct state
*state
)
184 nir_foreach_instr_safe (instr
, block
) {
185 if (instr
->type
!= nir_instr_type_intrinsic
)
188 nir_intrinsic_instr
*intr
= nir_instr_as_intrinsic(instr
);
190 switch (intr
->intrinsic
) {
191 case nir_intrinsic_store_output
: {
192 // src[] = { value, offset }.
194 /* nir_lower_io_to_temporaries replaces all access to output
195 * variables with temp variables and then emits a nir_copy_var at
196 * the end of the shader. Thus, we should always get a full wrmask
199 assert(util_is_power_of_two_nonzero(nir_intrinsic_write_mask(intr
) + 1));
201 b
->cursor
= nir_instr_remove(&intr
->instr
);
203 nir_ssa_def
*vertex_id
= build_vertex_id(b
, state
);
204 nir_ssa_def
*offset
= build_local_offset(b
, state
, vertex_id
, nir_intrinsic_base(intr
),
206 nir_intrinsic_instr
*store
=
207 nir_intrinsic_instr_create(b
->shader
, nir_intrinsic_store_shared_ir3
);
209 store
->src
[0] = nir_src_for_ssa(intr
->src
[0].ssa
);
210 store
->src
[1] = nir_src_for_ssa(offset
);
211 store
->num_components
= intr
->num_components
;
213 nir_builder_instr_insert(b
, &store
->instr
);
224 local_thread_id(nir_builder
*b
)
226 return bitfield_extract(b
, nir_load_gs_header_ir3(b
), 16, 1023);
230 ir3_nir_lower_to_explicit_output(nir_shader
*shader
, struct ir3_shader_variant
*v
,
233 struct state state
= { };
235 build_primitive_map(shader
, &state
.map
, &shader
->outputs
);
236 memcpy(v
->output_loc
, state
.map
.loc
, sizeof(v
->output_loc
));
238 nir_function_impl
*impl
= nir_shader_get_entrypoint(shader
);
242 nir_builder_init(&b
, impl
);
243 b
.cursor
= nir_before_cf_list(&impl
->body
);
245 if (v
->type
== MESA_SHADER_VERTEX
&& topology
!= IR3_TESS_NONE
)
246 state
.header
= nir_load_tcs_header_ir3(&b
);
248 state
.header
= nir_load_gs_header_ir3(&b
);
250 nir_foreach_block_safe (block
, impl
)
251 lower_block_to_explicit_output(block
, &b
, &state
);
253 nir_metadata_preserve(impl
, nir_metadata_block_index
|
254 nir_metadata_dominance
);
256 v
->output_size
= state
.map
.stride
;
261 lower_block_to_explicit_input(nir_block
*block
, nir_builder
*b
, struct state
*state
)
263 nir_foreach_instr_safe (instr
, block
) {
264 if (instr
->type
!= nir_instr_type_intrinsic
)
267 nir_intrinsic_instr
*intr
= nir_instr_as_intrinsic(instr
);
269 switch (intr
->intrinsic
) {
270 case nir_intrinsic_load_per_vertex_input
: {
271 // src[] = { vertex, offset }.
273 b
->cursor
= nir_before_instr(&intr
->instr
);
275 nir_ssa_def
*offset
= build_local_offset(b
, state
,
276 intr
->src
[0].ssa
, // this is typically gl_InvocationID
277 nir_intrinsic_base(intr
),
280 replace_intrinsic(b
, intr
, nir_intrinsic_load_shared_ir3
, offset
, NULL
, NULL
);
284 case nir_intrinsic_load_invocation_id
: {
285 b
->cursor
= nir_before_instr(&intr
->instr
);
287 nir_ssa_def
*iid
= build_invocation_id(b
, state
);
288 nir_ssa_def_rewrite_uses(&intr
->dest
.ssa
, nir_src_for_ssa(iid
));
289 nir_instr_remove(&intr
->instr
);
300 ir3_nir_lower_to_explicit_input(nir_shader
*shader
)
302 struct state state
= { };
304 nir_function_impl
*impl
= nir_shader_get_entrypoint(shader
);
308 nir_builder_init(&b
, impl
);
309 b
.cursor
= nir_before_cf_list(&impl
->body
);
311 if (shader
->info
.stage
== MESA_SHADER_GEOMETRY
)
312 state
.header
= nir_load_gs_header_ir3(&b
);
314 state
.header
= nir_load_tcs_header_ir3(&b
);
316 nir_foreach_block_safe (block
, impl
)
317 lower_block_to_explicit_input(block
, &b
, &state
);
322 build_per_vertex_offset(nir_builder
*b
, struct state
*state
,
323 nir_ssa_def
*vertex
, nir_ssa_def
*offset
, nir_variable
*var
)
325 nir_ssa_def
*primitive_id
= nir_load_primitive_id(b
);
326 nir_ssa_def
*patch_stride
= nir_load_hs_patch_stride_ir3(b
);
327 nir_ssa_def
*patch_offset
= nir_imul24(b
, primitive_id
, patch_stride
);
328 nir_ssa_def
*attr_offset
;
329 int loc
= var
->data
.driver_location
;
331 switch (b
->shader
->info
.stage
) {
332 case MESA_SHADER_TESS_CTRL
:
333 attr_offset
= nir_imm_int(b
, state
->map
.loc
[loc
]);
335 case MESA_SHADER_TESS_EVAL
:
336 attr_offset
= nir_load_primitive_location_ir3(b
, loc
);
339 unreachable("bad shader state");
342 nir_ssa_def
*attr_stride
= nir_imm_int(b
, state
->map
.size
[loc
]);
343 nir_ssa_def
*vertex_offset
= nir_imul24(b
, vertex
, attr_stride
);
345 return nir_iadd(b
, nir_iadd(b
, patch_offset
, attr_offset
),
346 nir_iadd(b
, vertex_offset
, nir_ishl(b
, offset
, nir_imm_int(b
, 2))));
350 build_patch_offset(nir_builder
*b
, struct state
*state
, nir_ssa_def
*offset
, nir_variable
*var
)
352 debug_assert(var
&& var
->data
.patch
);
354 return build_per_vertex_offset(b
, state
, nir_imm_int(b
, 0), offset
, var
);
358 build_tessfactor_base(nir_builder
*b
, gl_varying_slot slot
, struct state
*state
)
360 uint32_t inner_levels
, outer_levels
;
361 switch (state
->topology
) {
362 case IR3_TESS_TRIANGLES
:
370 case IR3_TESS_ISOLINES
:
378 const uint32_t patch_stride
= 1 + inner_levels
+ outer_levels
;
380 nir_ssa_def
*primitive_id
= nir_load_primitive_id(b
);
382 nir_ssa_def
*patch_offset
= nir_imul24(b
, primitive_id
, nir_imm_int(b
, patch_stride
));
386 case VARYING_SLOT_TESS_LEVEL_OUTER
:
387 /* There's some kind of header dword, tess levels start at index 1. */
390 case VARYING_SLOT_TESS_LEVEL_INNER
:
391 offset
= 1 + outer_levels
;
397 return nir_iadd(b
, patch_offset
, nir_imm_int(b
, offset
));
401 lower_tess_ctrl_block(nir_block
*block
, nir_builder
*b
, struct state
*state
)
403 nir_foreach_instr_safe (instr
, block
) {
404 if (instr
->type
!= nir_instr_type_intrinsic
)
407 nir_intrinsic_instr
*intr
= nir_instr_as_intrinsic(instr
);
409 switch (intr
->intrinsic
) {
410 case nir_intrinsic_control_barrier
:
411 case nir_intrinsic_memory_barrier_tcs_patch
:
412 /* Hull shaders dispatch 32 wide so an entire patch will always
413 * fit in a single warp and execute in lock-step. Consequently,
414 * we don't need to do anything for TCS barriers so just remove
415 * the intrinsic. Otherwise we'll emit an actual barrier
416 * instructions, which will deadlock.
418 nir_instr_remove(&intr
->instr
);
421 case nir_intrinsic_load_per_vertex_output
: {
422 // src[] = { vertex, offset }.
424 b
->cursor
= nir_before_instr(&intr
->instr
);
426 nir_ssa_def
*address
= nir_load_tess_param_base_ir3(b
);
427 nir_variable
*var
= get_var(&b
->shader
->outputs
, nir_intrinsic_base(intr
));
428 nir_ssa_def
*offset
= build_per_vertex_offset(b
, state
,
429 intr
->src
[0].ssa
, intr
->src
[1].ssa
, var
);
431 replace_intrinsic(b
, intr
, nir_intrinsic_load_global_ir3
, address
, offset
, NULL
);
435 case nir_intrinsic_store_per_vertex_output
: {
436 // src[] = { value, vertex, offset }.
438 b
->cursor
= nir_before_instr(&intr
->instr
);
440 /* nir_lower_io_to_temporaries replaces all access to output
441 * variables with temp variables and then emits a nir_copy_var at
442 * the end of the shader. Thus, we should always get a full wrmask
445 assert(util_is_power_of_two_nonzero(nir_intrinsic_write_mask(intr
) + 1));
447 nir_ssa_def
*value
= intr
->src
[0].ssa
;
448 nir_ssa_def
*address
= nir_load_tess_param_base_ir3(b
);
449 nir_variable
*var
= get_var(&b
->shader
->outputs
, nir_intrinsic_base(intr
));
450 nir_ssa_def
*offset
= build_per_vertex_offset(b
, state
,
451 intr
->src
[1].ssa
, intr
->src
[2].ssa
, var
);
453 replace_intrinsic(b
, intr
, nir_intrinsic_store_global_ir3
, value
, address
,
454 nir_iadd(b
, offset
, nir_imm_int(b
, nir_intrinsic_component(intr
))));
459 case nir_intrinsic_load_tess_level_inner
:
460 case nir_intrinsic_load_tess_level_outer
: {
461 b
->cursor
= nir_before_instr(&intr
->instr
);
463 gl_varying_slot slot
;
464 if (intr
->intrinsic
== nir_intrinsic_load_tess_level_inner
)
465 slot
= VARYING_SLOT_TESS_LEVEL_INNER
;
467 slot
= VARYING_SLOT_TESS_LEVEL_OUTER
;
469 nir_ssa_def
*address
= nir_load_tess_factor_base_ir3(b
);
470 nir_ssa_def
*offset
= build_tessfactor_base(b
, slot
, state
);
472 replace_intrinsic(b
, intr
, nir_intrinsic_load_global_ir3
, address
, offset
, NULL
);
476 case nir_intrinsic_load_output
: {
477 // src[] = { offset }.
479 nir_variable
*var
= get_var(&b
->shader
->outputs
, nir_intrinsic_base(intr
));
481 b
->cursor
= nir_before_instr(&intr
->instr
);
483 nir_ssa_def
*address
= nir_load_tess_param_base_ir3(b
);
484 nir_ssa_def
*offset
= build_patch_offset(b
, state
, intr
->src
[0].ssa
, var
);
486 replace_intrinsic(b
, intr
, nir_intrinsic_load_global_ir3
, address
, offset
, NULL
);
490 case nir_intrinsic_store_output
: {
491 // src[] = { value, offset }.
493 /* write patch output to bo */
495 nir_variable
*var
= get_var(&b
->shader
->outputs
, nir_intrinsic_base(intr
));
497 nir_ssa_def
**levels
= NULL
;
498 if (var
->data
.location
== VARYING_SLOT_TESS_LEVEL_OUTER
)
499 levels
= state
->outer_levels
;
500 else if (var
->data
.location
== VARYING_SLOT_TESS_LEVEL_INNER
)
501 levels
= state
->inner_levels
;
503 b
->cursor
= nir_before_instr(&intr
->instr
);
506 for (int i
= 0; i
< 4; i
++) {
507 if (nir_intrinsic_write_mask(intr
) & (1 << i
)) {
508 uint32_t component
= nir_intrinsic_component(intr
);
509 levels
[i
+ component
] = nir_channel(b
, intr
->src
[0].ssa
, i
);
512 nir_instr_remove(&intr
->instr
);
514 nir_ssa_def
*address
= nir_load_tess_param_base_ir3(b
);
515 nir_ssa_def
*offset
= build_patch_offset(b
, state
, intr
->src
[1].ssa
, var
);
517 debug_assert(nir_intrinsic_component(intr
) == 0);
519 /* nir_lower_io_to_temporaries replaces all access to output
520 * variables with temp variables and then emits a nir_copy_var at
521 * the end of the shader. Thus, we should always get a full wrmask
524 assert(util_is_power_of_two_nonzero(nir_intrinsic_write_mask(intr
) + 1));
526 replace_intrinsic(b
, intr
, nir_intrinsic_store_global_ir3
,
527 intr
->src
[0].ssa
, address
, offset
);
539 emit_tess_epilouge(nir_builder
*b
, struct state
*state
)
541 nir_ssa_def
*tessfactor_address
= nir_load_tess_factor_base_ir3(b
);
542 nir_ssa_def
*levels
[2];
544 if (!state
->outer_levels
[0])
547 /* Then emit the epilogue that actually writes out the tessellation levels
550 switch (state
->topology
) {
551 case IR3_TESS_TRIANGLES
:
552 levels
[0] = nir_vec4(b
, state
->outer_levels
[0], state
->outer_levels
[1],
553 state
->outer_levels
[2], state
->inner_levels
[0]);
557 levels
[0] = nir_vec4(b
, state
->outer_levels
[0], state
->outer_levels
[1],
558 state
->outer_levels
[2], state
->outer_levels
[3]);
559 levels
[1] = nir_vec2(b
, state
->inner_levels
[0], state
->inner_levels
[1]);
561 case IR3_TESS_ISOLINES
:
562 levels
[0] = nir_vec2(b
, state
->outer_levels
[0], state
->outer_levels
[1]);
569 nir_ssa_def
*offset
= build_tessfactor_base(b
, VARYING_SLOT_TESS_LEVEL_OUTER
, state
);
571 nir_intrinsic_instr
*store
=
572 nir_intrinsic_instr_create(b
->shader
, nir_intrinsic_store_global_ir3
);
574 store
->src
[0] = nir_src_for_ssa(levels
[0]);
575 store
->src
[1] = nir_src_for_ssa(tessfactor_address
);
576 store
->src
[2] = nir_src_for_ssa(offset
);
577 nir_builder_instr_insert(b
, &store
->instr
);
578 store
->num_components
= levels
[0]->num_components
;
581 store
= nir_intrinsic_instr_create(b
->shader
, nir_intrinsic_store_global_ir3
);
582 offset
= nir_iadd(b
, offset
, nir_imm_int(b
, levels
[0]->num_components
));
584 store
->src
[0] = nir_src_for_ssa(levels
[1]);
585 store
->src
[1] = nir_src_for_ssa(tessfactor_address
);
586 store
->src
[2] = nir_src_for_ssa(offset
);
587 nir_builder_instr_insert(b
, &store
->instr
);
588 store
->num_components
= levels
[1]->num_components
;
591 /* Finally, Insert endpatch instruction:
593 * TODO we should re-work this to use normal flow control.
596 nir_intrinsic_instr
*end_patch
=
597 nir_intrinsic_instr_create(b
->shader
, nir_intrinsic_end_patch_ir3
);
598 nir_builder_instr_insert(b
, &end_patch
->instr
);
602 ir3_nir_lower_tess_ctrl(nir_shader
*shader
, struct ir3_shader_variant
*v
,
605 struct state state
= { .topology
= topology
};
607 if (shader_debug_enabled(shader
->info
.stage
)) {
608 fprintf(stderr
, "NIR (before tess lowering) for %s shader:\n",
609 _mesa_shader_stage_to_string(shader
->info
.stage
));
610 nir_print_shader(shader
, stderr
);
613 build_primitive_map(shader
, &state
.map
, &shader
->outputs
);
614 memcpy(v
->output_loc
, state
.map
.loc
, sizeof(v
->output_loc
));
615 v
->output_size
= state
.map
.stride
;
617 nir_function_impl
*impl
= nir_shader_get_entrypoint(shader
);
621 nir_builder_init(&b
, impl
);
622 b
.cursor
= nir_before_cf_list(&impl
->body
);
624 state
.header
= nir_load_tcs_header_ir3(&b
);
626 nir_foreach_block_safe (block
, impl
)
627 lower_tess_ctrl_block(block
, &b
, &state
);
629 /* Now move the body of the TCS into a conditional:
631 * if (gl_InvocationID < num_vertices)
637 nir_cf_extract(&body
, nir_before_cf_list(&impl
->body
),
638 nir_after_cf_list(&impl
->body
));
640 b
.cursor
= nir_after_cf_list(&impl
->body
);
642 /* Re-emit the header, since the old one got moved into the if branch */
643 state
.header
= nir_load_tcs_header_ir3(&b
);
644 nir_ssa_def
*iid
= build_invocation_id(&b
, &state
);
646 const uint32_t nvertices
= shader
->info
.tess
.tcs_vertices_out
;
647 nir_ssa_def
*cond
= nir_ult(&b
, iid
, nir_imm_int(&b
, nvertices
));
649 nir_if
*nif
= nir_push_if(&b
, cond
);
651 nir_cf_reinsert(&body
, b
.cursor
);
653 b
.cursor
= nir_after_cf_list(&nif
->then_list
);
655 /* Insert conditional exit for threads invocation id != 0 */
656 nir_ssa_def
*iid0_cond
= nir_ieq(&b
, iid
, nir_imm_int(&b
, 0));
657 nir_intrinsic_instr
*cond_end
=
658 nir_intrinsic_instr_create(shader
, nir_intrinsic_cond_end_ir3
);
659 cond_end
->src
[0] = nir_src_for_ssa(iid0_cond
);
660 nir_builder_instr_insert(&b
, &cond_end
->instr
);
662 emit_tess_epilouge(&b
, &state
);
666 nir_metadata_preserve(impl
, 0);
671 lower_tess_eval_block(nir_block
*block
, nir_builder
*b
, struct state
*state
)
673 nir_foreach_instr_safe (instr
, block
) {
674 if (instr
->type
!= nir_instr_type_intrinsic
)
677 nir_intrinsic_instr
*intr
= nir_instr_as_intrinsic(instr
);
679 switch (intr
->intrinsic
) {
680 case nir_intrinsic_load_tess_coord
: {
681 b
->cursor
= nir_after_instr(&intr
->instr
);
682 nir_ssa_def
*x
= nir_channel(b
, &intr
->dest
.ssa
, 0);
683 nir_ssa_def
*y
= nir_channel(b
, &intr
->dest
.ssa
, 1);
686 if (state
->topology
== IR3_TESS_TRIANGLES
)
687 z
= nir_fsub(b
, nir_fsub(b
, nir_imm_float(b
, 1.0f
), y
), x
);
689 z
= nir_imm_float(b
, 0.0f
);
691 nir_ssa_def
*coord
= nir_vec3(b
, x
, y
, z
);
693 nir_ssa_def_rewrite_uses_after(&intr
->dest
.ssa
,
694 nir_src_for_ssa(coord
),
699 case nir_intrinsic_load_per_vertex_input
: {
700 // src[] = { vertex, offset }.
702 b
->cursor
= nir_before_instr(&intr
->instr
);
704 nir_ssa_def
*address
= nir_load_tess_param_base_ir3(b
);
705 nir_variable
*var
= get_var(&b
->shader
->inputs
, nir_intrinsic_base(intr
));
706 nir_ssa_def
*offset
= build_per_vertex_offset(b
, state
,
707 intr
->src
[0].ssa
, intr
->src
[1].ssa
, var
);
709 replace_intrinsic(b
, intr
, nir_intrinsic_load_global_ir3
, address
, offset
, NULL
);
713 case nir_intrinsic_load_tess_level_inner
:
714 case nir_intrinsic_load_tess_level_outer
: {
715 unsigned dest_comp
= nir_intrinsic_dest_components(intr
);
716 b
->cursor
= nir_before_instr(&intr
->instr
);
718 gl_varying_slot slot
;
719 if (intr
->intrinsic
== nir_intrinsic_load_tess_level_inner
)
720 slot
= VARYING_SLOT_TESS_LEVEL_INNER
;
722 slot
= VARYING_SLOT_TESS_LEVEL_OUTER
;
724 nir_ssa_def
*address
= nir_load_tess_factor_base_ir3(b
);
725 nir_ssa_def
*offset
= build_tessfactor_base(b
, slot
, state
);
727 /* Loading across a vec4 (16b) memory boundary is problematic
728 * if we don't use components from the second vec4. The tess
729 * levels aren't guaranteed to be vec4 aligned and we don't
730 * know which levels are actually used, so we load each
731 * component individually.
733 nir_ssa_def
*levels
[4];
734 for (unsigned i
= 0; i
< dest_comp
; i
++) {
735 nir_intrinsic_instr
*new_intr
=
736 nir_intrinsic_instr_create(b
->shader
, nir_intrinsic_load_global_ir3
);
738 new_intr
->src
[0] = nir_src_for_ssa(address
);
739 new_intr
->src
[1] = nir_src_for_ssa(nir_iadd(b
, offset
, nir_imm_int(b
, i
)));
740 new_intr
->num_components
= 1;
741 nir_ssa_dest_init(&new_intr
->instr
, &new_intr
->dest
, 1, 32, NULL
);
742 nir_builder_instr_insert(b
, &new_intr
->instr
);
743 levels
[i
] = &new_intr
->dest
.ssa
;
746 nir_ssa_def
*v
= nir_vec(b
, levels
, dest_comp
);
748 nir_ssa_def_rewrite_uses(&intr
->dest
.ssa
, nir_src_for_ssa(v
));
750 nir_instr_remove(&intr
->instr
);
754 case nir_intrinsic_load_input
: {
755 // src[] = { offset }.
757 nir_variable
*var
= get_var(&b
->shader
->inputs
, nir_intrinsic_base(intr
));
759 debug_assert(var
->data
.patch
);
761 b
->cursor
= nir_before_instr(&intr
->instr
);
763 nir_ssa_def
*address
= nir_load_tess_param_base_ir3(b
);
764 nir_ssa_def
*offset
= build_patch_offset(b
, state
, intr
->src
[0].ssa
, var
);
766 replace_intrinsic(b
, intr
, nir_intrinsic_load_global_ir3
, address
, offset
, NULL
);
777 ir3_nir_lower_tess_eval(nir_shader
*shader
, unsigned topology
)
779 struct state state
= { .topology
= topology
};
781 if (shader_debug_enabled(shader
->info
.stage
)) {
782 fprintf(stderr
, "NIR (before tess lowering) for %s shader:\n",
783 _mesa_shader_stage_to_string(shader
->info
.stage
));
784 nir_print_shader(shader
, stderr
);
787 /* Build map of inputs so we have the sizes. */
788 build_primitive_map(shader
, &state
.map
, &shader
->inputs
);
790 nir_function_impl
*impl
= nir_shader_get_entrypoint(shader
);
794 nir_builder_init(&b
, impl
);
796 nir_foreach_block_safe (block
, impl
)
797 lower_tess_eval_block(block
, &b
, &state
);
799 nir_metadata_preserve(impl
, 0);
803 lower_gs_block(nir_block
*block
, nir_builder
*b
, struct state
*state
)
805 nir_foreach_instr_safe (instr
, block
) {
806 if (instr
->type
!= nir_instr_type_intrinsic
)
809 nir_intrinsic_instr
*intr
= nir_instr_as_intrinsic(instr
);
811 switch (intr
->intrinsic
) {
812 case nir_intrinsic_end_primitive
: {
813 b
->cursor
= nir_before_instr(&intr
->instr
);
814 nir_store_var(b
, state
->vertex_flags_out
, nir_imm_int(b
, 4), 0x1);
815 nir_instr_remove(&intr
->instr
);
819 case nir_intrinsic_emit_vertex
: {
820 /* Load the vertex count */
821 b
->cursor
= nir_before_instr(&intr
->instr
);
822 nir_ssa_def
*count
= nir_load_var(b
, state
->vertex_count_var
);
824 nir_push_if(b
, nir_ieq(b
, count
, local_thread_id(b
)));
826 foreach_two_lists(dest_node
, &state
->emit_outputs
, src_node
, &state
->old_outputs
) {
827 nir_variable
*dest
= exec_node_data(nir_variable
, dest_node
, node
);
828 nir_variable
*src
= exec_node_data(nir_variable
, src_node
, node
);
829 nir_copy_var(b
, dest
, src
);
832 nir_instr_remove(&intr
->instr
);
834 nir_store_var(b
, state
->emitted_vertex_var
,
835 nir_iadd(b
, nir_load_var(b
, state
->emitted_vertex_var
), nir_imm_int(b
, 1)), 0x1);
839 /* Increment the vertex count by 1 */
840 nir_store_var(b
, state
->vertex_count_var
,
841 nir_iadd(b
, count
, nir_imm_int(b
, 1)), 0x1); /* .x */
842 nir_store_var(b
, state
->vertex_flags_out
, nir_imm_int(b
, 0), 0x1);
854 ir3_nir_lower_gs(nir_shader
*shader
)
856 struct state state
= { };
858 if (shader_debug_enabled(shader
->info
.stage
)) {
859 fprintf(stderr
, "NIR (before gs lowering):\n");
860 nir_print_shader(shader
, stderr
);
863 build_primitive_map(shader
, &state
.map
, &shader
->inputs
);
865 /* Create an output var for vertex_flags. This will be shadowed below,
866 * same way regular outputs get shadowed, and this variable will become a
869 state
.vertex_flags_out
= nir_variable_create(shader
, nir_var_shader_out
,
870 glsl_uint_type(), "vertex_flags");
871 state
.vertex_flags_out
->data
.driver_location
= shader
->num_outputs
++;
872 state
.vertex_flags_out
->data
.location
= VARYING_SLOT_GS_VERTEX_FLAGS_IR3
;
873 state
.vertex_flags_out
->data
.interpolation
= INTERP_MODE_NONE
;
875 nir_function_impl
*impl
= nir_shader_get_entrypoint(shader
);
879 nir_builder_init(&b
, impl
);
880 b
.cursor
= nir_before_cf_list(&impl
->body
);
882 state
.header
= nir_load_gs_header_ir3(&b
);
884 /* Generate two set of shadow vars for the output variables. The first
885 * set replaces the real outputs and the second set (emit_outputs) we'll
886 * assign in the emit_vertex conditionals. Then at the end of the shader
887 * we copy the emit_outputs to the real outputs, so that we get
888 * store_output in uniform control flow.
890 exec_list_move_nodes_to(&shader
->outputs
, &state
.old_outputs
);
891 exec_list_make_empty(&state
.emit_outputs
);
892 nir_foreach_variable(var
, &state
.old_outputs
) {
893 /* Create a new output var by cloning the original output var and
896 nir_variable
*output
= nir_variable_clone(var
, shader
);
897 exec_list_push_tail(&shader
->outputs
, &output
->node
);
899 /* Rewrite the original output to be a shadow variable. */
900 var
->name
= ralloc_asprintf(var
, "%s@gs-temp", output
->name
);
901 var
->data
.mode
= nir_var_shader_temp
;
903 /* Clone the shadow variable to create the emit shadow variable that
904 * we'll assign in the emit conditionals.
906 nir_variable
*emit_output
= nir_variable_clone(var
, shader
);
907 emit_output
->name
= ralloc_asprintf(var
, "%s@emit-temp", output
->name
);
908 exec_list_push_tail(&state
.emit_outputs
, &emit_output
->node
);
911 /* During the shader we'll keep track of which vertex we're currently
912 * emitting for the EmitVertex test and how many vertices we emitted so we
913 * know to discard if didn't emit any. In most simple shaders, this can
914 * all be statically determined and gets optimized away.
916 state
.vertex_count_var
=
917 nir_local_variable_create(impl
, glsl_uint_type(), "vertex_count");
918 state
.emitted_vertex_var
=
919 nir_local_variable_create(impl
, glsl_uint_type(), "emitted_vertex");
921 /* Initialize to 0. */
922 b
.cursor
= nir_before_cf_list(&impl
->body
);
923 nir_store_var(&b
, state
.vertex_count_var
, nir_imm_int(&b
, 0), 0x1);
924 nir_store_var(&b
, state
.emitted_vertex_var
, nir_imm_int(&b
, 0), 0x1);
925 nir_store_var(&b
, state
.vertex_flags_out
, nir_imm_int(&b
, 4), 0x1);
927 nir_foreach_block_safe (block
, impl
)
928 lower_gs_block(block
, &b
, &state
);
930 set_foreach(impl
->end_block
->predecessors
, block_entry
) {
931 struct nir_block
*block
= (void *)block_entry
->key
;
932 b
.cursor
= nir_after_block_before_jump(block
);
934 nir_intrinsic_instr
*discard_if
=
935 nir_intrinsic_instr_create(b
.shader
, nir_intrinsic_discard_if
);
937 nir_ssa_def
*cond
= nir_ieq(&b
, nir_load_var(&b
, state
.emitted_vertex_var
), nir_imm_int(&b
, 0));
939 discard_if
->src
[0] = nir_src_for_ssa(cond
);
941 nir_builder_instr_insert(&b
, &discard_if
->instr
);
943 foreach_two_lists(dest_node
, &shader
->outputs
, src_node
, &state
.emit_outputs
) {
944 nir_variable
*dest
= exec_node_data(nir_variable
, dest_node
, node
);
945 nir_variable
*src
= exec_node_data(nir_variable
, src_node
, node
);
946 nir_copy_var(&b
, dest
, src
);
950 exec_list_append(&shader
->globals
, &state
.old_outputs
);
951 exec_list_append(&shader
->globals
, &state
.emit_outputs
);
953 nir_metadata_preserve(impl
, 0);
955 nir_lower_global_vars_to_local(shader
);
956 nir_split_var_copies(shader
);
957 nir_lower_var_copies(shader
);
959 nir_fixup_deref_modes(shader
);
961 if (shader_debug_enabled(shader
->info
.stage
)) {
962 fprintf(stderr
, "NIR (after gs lowering):\n");
963 nir_print_shader(shader
, stderr
);
968 ir3_link_geometry_stages(const struct ir3_shader_variant
*producer
,
969 const struct ir3_shader_variant
*consumer
,
972 uint32_t num_loc
= 0, factor
;
974 switch (consumer
->type
) {
975 case MESA_SHADER_TESS_CTRL
:
976 case MESA_SHADER_GEOMETRY
:
977 /* These stages load with ldlw, which expects byte offsets. */
980 case MESA_SHADER_TESS_EVAL
:
981 /* The tess eval shader uses ldg, which takes dword offsets. */
985 unreachable("bad shader stage");
988 nir_foreach_variable(in_var
, &consumer
->shader
->nir
->inputs
) {
989 nir_foreach_variable(out_var
, &producer
->shader
->nir
->outputs
) {
990 if (in_var
->data
.location
== out_var
->data
.location
) {
991 locs
[in_var
->data
.driver_location
] =
992 producer
->output_loc
[out_var
->data
.driver_location
] * factor
;
994 debug_assert(num_loc
<= in_var
->data
.driver_location
+ 1);
995 num_loc
= in_var
->data
.driver_location
+ 1;