2 * Copyright © 2019 Google, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25 #include "ir3_compiler.h"
26 #include "compiler/nir/nir_builder.h"
31 struct primitive_map
{
39 nir_variable
*vertex_count_var
;
40 nir_variable
*emitted_vertex_var
;
41 nir_variable
*vertex_flags_out
;
43 struct exec_list old_outputs
;
44 struct exec_list emit_outputs
;
46 /* tess ctrl shader on a650 gets the local primitive id at different bits: */
47 unsigned local_primitive_id_start
;
51 bitfield_extract(nir_builder
*b
, nir_ssa_def
*v
, uint32_t start
, uint32_t mask
)
53 return nir_iand(b
, nir_ushr(b
, v
, nir_imm_int(b
, start
)),
54 nir_imm_int(b
, mask
));
58 build_invocation_id(nir_builder
*b
, struct state
*state
)
60 return bitfield_extract(b
, state
->header
, 11, 31);
64 build_vertex_id(nir_builder
*b
, struct state
*state
)
66 return bitfield_extract(b
, state
->header
, 6, 31);
70 build_local_primitive_id(nir_builder
*b
, struct state
*state
)
72 return bitfield_extract(b
, state
->header
, state
->local_primitive_id_start
, 63);
76 get_var(struct exec_list
*list
, int driver_location
)
78 nir_foreach_variable (v
, list
) {
79 if (v
->data
.driver_location
== driver_location
) {
88 is_tess_levels(nir_variable
*var
)
90 return (var
->data
.location
== VARYING_SLOT_TESS_LEVEL_OUTER
||
91 var
->data
.location
== VARYING_SLOT_TESS_LEVEL_INNER
);
95 build_local_offset(nir_builder
*b
, struct state
*state
,
96 nir_ssa_def
*vertex
, uint32_t base
, nir_ssa_def
*offset
)
98 nir_ssa_def
*primitive_stride
= nir_load_vs_primitive_stride_ir3(b
);
99 nir_ssa_def
*primitive_offset
=
100 nir_imul24(b
, build_local_primitive_id(b
, state
), primitive_stride
);
101 nir_ssa_def
*attr_offset
;
102 nir_ssa_def
*vertex_stride
;
104 switch (b
->shader
->info
.stage
) {
105 case MESA_SHADER_VERTEX
:
106 case MESA_SHADER_TESS_EVAL
:
107 vertex_stride
= nir_imm_int(b
, state
->map
.stride
* 4);
108 attr_offset
= nir_imm_int(b
, state
->map
.loc
[base
] * 4);
110 case MESA_SHADER_TESS_CTRL
:
111 case MESA_SHADER_GEOMETRY
:
112 vertex_stride
= nir_load_vs_vertex_stride_ir3(b
);
113 attr_offset
= nir_load_primitive_location_ir3(b
, base
);
116 unreachable("bad shader stage");
119 nir_ssa_def
*vertex_offset
= nir_imul24(b
, vertex
, vertex_stride
);
121 return nir_iadd(b
, nir_iadd(b
, primitive_offset
, vertex_offset
),
122 nir_iadd(b
, attr_offset
, offset
));
125 static nir_intrinsic_instr
*
126 replace_intrinsic(nir_builder
*b
, nir_intrinsic_instr
*intr
,
127 nir_intrinsic_op op
, nir_ssa_def
*src0
, nir_ssa_def
*src1
, nir_ssa_def
*src2
)
129 nir_intrinsic_instr
*new_intr
=
130 nir_intrinsic_instr_create(b
->shader
, op
);
132 new_intr
->src
[0] = nir_src_for_ssa(src0
);
134 new_intr
->src
[1] = nir_src_for_ssa(src1
);
136 new_intr
->src
[2] = nir_src_for_ssa(src2
);
138 new_intr
->num_components
= intr
->num_components
;
140 if (nir_intrinsic_infos
[op
].has_dest
)
141 nir_ssa_dest_init(&new_intr
->instr
, &new_intr
->dest
,
142 intr
->num_components
, 32, NULL
);
144 nir_builder_instr_insert(b
, &new_intr
->instr
);
146 if (nir_intrinsic_infos
[op
].has_dest
)
147 nir_ssa_def_rewrite_uses(&intr
->dest
.ssa
, nir_src_for_ssa(&new_intr
->dest
.ssa
));
149 nir_instr_remove(&intr
->instr
);
155 build_primitive_map(nir_shader
*shader
, struct primitive_map
*map
, struct exec_list
*list
)
157 nir_foreach_variable (var
, list
) {
158 switch (var
->data
.location
) {
159 case VARYING_SLOT_TESS_LEVEL_OUTER
:
160 case VARYING_SLOT_TESS_LEVEL_INNER
:
164 unsigned size
= glsl_count_attribute_slots(var
->type
, false) * 4;
166 assert(var
->data
.driver_location
< ARRAY_SIZE(map
->size
));
167 map
->size
[var
->data
.driver_location
] =
168 MAX2(map
->size
[var
->data
.driver_location
], size
);
172 for (uint32_t i
= 0; i
< ARRAY_SIZE(map
->size
); i
++) {
173 if (map
->size
[i
] == 0)
175 nir_variable
*var
= get_var(list
, i
);
182 map
->size
[i
] = map
->size
[i
] / glsl_get_length(var
->type
);
189 lower_block_to_explicit_output(nir_block
*block
, nir_builder
*b
, struct state
*state
)
191 nir_foreach_instr_safe (instr
, block
) {
192 if (instr
->type
!= nir_instr_type_intrinsic
)
195 nir_intrinsic_instr
*intr
= nir_instr_as_intrinsic(instr
);
197 switch (intr
->intrinsic
) {
198 case nir_intrinsic_store_output
: {
199 // src[] = { value, offset }.
201 /* nir_lower_io_to_temporaries replaces all access to output
202 * variables with temp variables and then emits a nir_copy_var at
203 * the end of the shader. Thus, we should always get a full wrmask
206 assert(util_is_power_of_two_nonzero(nir_intrinsic_write_mask(intr
) + 1));
208 b
->cursor
= nir_instr_remove(&intr
->instr
);
210 nir_ssa_def
*vertex_id
= build_vertex_id(b
, state
);
211 nir_ssa_def
*offset
= build_local_offset(b
, state
, vertex_id
, nir_intrinsic_base(intr
),
213 nir_intrinsic_instr
*store
=
214 nir_intrinsic_instr_create(b
->shader
, nir_intrinsic_store_shared_ir3
);
216 store
->src
[0] = nir_src_for_ssa(intr
->src
[0].ssa
);
217 store
->src
[1] = nir_src_for_ssa(offset
);
218 store
->num_components
= intr
->num_components
;
220 nir_builder_instr_insert(b
, &store
->instr
);
231 local_thread_id(nir_builder
*b
)
233 return bitfield_extract(b
, nir_load_gs_header_ir3(b
), 16, 1023);
237 ir3_nir_lower_to_explicit_output(nir_shader
*shader
, struct ir3_shader_variant
*v
,
240 struct state state
= { };
242 build_primitive_map(shader
, &state
.map
, &shader
->outputs
);
243 memcpy(v
->output_loc
, state
.map
.loc
, sizeof(v
->output_loc
));
245 nir_function_impl
*impl
= nir_shader_get_entrypoint(shader
);
249 nir_builder_init(&b
, impl
);
250 b
.cursor
= nir_before_cf_list(&impl
->body
);
252 if (v
->type
== MESA_SHADER_VERTEX
&& topology
!= IR3_TESS_NONE
)
253 state
.header
= nir_load_tcs_header_ir3(&b
);
255 state
.header
= nir_load_gs_header_ir3(&b
);
257 nir_foreach_block_safe (block
, impl
)
258 lower_block_to_explicit_output(block
, &b
, &state
);
260 nir_metadata_preserve(impl
, nir_metadata_block_index
|
261 nir_metadata_dominance
);
263 v
->output_size
= state
.map
.stride
;
268 lower_block_to_explicit_input(nir_block
*block
, nir_builder
*b
, struct state
*state
)
270 nir_foreach_instr_safe (instr
, block
) {
271 if (instr
->type
!= nir_instr_type_intrinsic
)
274 nir_intrinsic_instr
*intr
= nir_instr_as_intrinsic(instr
);
276 switch (intr
->intrinsic
) {
277 case nir_intrinsic_load_per_vertex_input
: {
278 // src[] = { vertex, offset }.
280 b
->cursor
= nir_before_instr(&intr
->instr
);
282 nir_ssa_def
*offset
= build_local_offset(b
, state
,
283 intr
->src
[0].ssa
, // this is typically gl_InvocationID
284 nir_intrinsic_base(intr
),
287 replace_intrinsic(b
, intr
, nir_intrinsic_load_shared_ir3
, offset
, NULL
, NULL
);
291 case nir_intrinsic_load_invocation_id
: {
292 b
->cursor
= nir_before_instr(&intr
->instr
);
294 nir_ssa_def
*iid
= build_invocation_id(b
, state
);
295 nir_ssa_def_rewrite_uses(&intr
->dest
.ssa
, nir_src_for_ssa(iid
));
296 nir_instr_remove(&intr
->instr
);
307 ir3_nir_lower_to_explicit_input(nir_shader
*shader
, struct ir3_compiler
*compiler
)
309 struct state state
= { };
311 /* when using stl/ldl (instead of stlw/ldlw) for linking VS and HS,
312 * HS uses a different primitive id, which starts at bit 16 in the header
314 if (shader
->info
.stage
== MESA_SHADER_TESS_CTRL
&& compiler
->tess_use_shared
)
315 state
.local_primitive_id_start
= 16;
317 nir_function_impl
*impl
= nir_shader_get_entrypoint(shader
);
321 nir_builder_init(&b
, impl
);
322 b
.cursor
= nir_before_cf_list(&impl
->body
);
324 if (shader
->info
.stage
== MESA_SHADER_GEOMETRY
)
325 state
.header
= nir_load_gs_header_ir3(&b
);
327 state
.header
= nir_load_tcs_header_ir3(&b
);
329 nir_foreach_block_safe (block
, impl
)
330 lower_block_to_explicit_input(block
, &b
, &state
);
335 build_per_vertex_offset(nir_builder
*b
, struct state
*state
,
336 nir_ssa_def
*vertex
, nir_ssa_def
*offset
, nir_variable
*var
)
338 nir_ssa_def
*primitive_id
= nir_load_primitive_id(b
);
339 nir_ssa_def
*patch_stride
= nir_load_hs_patch_stride_ir3(b
);
340 nir_ssa_def
*patch_offset
= nir_imul24(b
, primitive_id
, patch_stride
);
341 nir_ssa_def
*attr_offset
;
342 int loc
= var
->data
.driver_location
;
344 switch (b
->shader
->info
.stage
) {
345 case MESA_SHADER_TESS_CTRL
:
346 attr_offset
= nir_imm_int(b
, state
->map
.loc
[loc
]);
348 case MESA_SHADER_TESS_EVAL
:
349 attr_offset
= nir_load_primitive_location_ir3(b
, loc
);
352 unreachable("bad shader state");
355 nir_ssa_def
*attr_stride
= nir_imm_int(b
, state
->map
.size
[loc
]);
356 nir_ssa_def
*vertex_offset
= nir_imul24(b
, vertex
, attr_stride
);
358 return nir_iadd(b
, nir_iadd(b
, patch_offset
, attr_offset
),
359 nir_iadd(b
, vertex_offset
, nir_ishl(b
, offset
, nir_imm_int(b
, 2))));
363 build_patch_offset(nir_builder
*b
, struct state
*state
, nir_ssa_def
*offset
, nir_variable
*var
)
365 debug_assert(var
&& var
->data
.patch
);
367 return build_per_vertex_offset(b
, state
, nir_imm_int(b
, 0), offset
, var
);
371 tess_level_components(struct state
*state
, uint32_t *inner
, uint32_t *outer
)
373 switch (state
->topology
) {
374 case IR3_TESS_TRIANGLES
:
382 case IR3_TESS_ISOLINES
:
392 build_tessfactor_base(nir_builder
*b
, gl_varying_slot slot
, struct state
*state
)
394 uint32_t inner_levels
, outer_levels
;
395 tess_level_components(state
, &inner_levels
, &outer_levels
);
397 const uint32_t patch_stride
= 1 + inner_levels
+ outer_levels
;
399 nir_ssa_def
*primitive_id
= nir_load_primitive_id(b
);
401 nir_ssa_def
*patch_offset
= nir_imul24(b
, primitive_id
, nir_imm_int(b
, patch_stride
));
405 case VARYING_SLOT_TESS_LEVEL_OUTER
:
406 /* There's some kind of header dword, tess levels start at index 1. */
409 case VARYING_SLOT_TESS_LEVEL_INNER
:
410 offset
= 1 + outer_levels
;
416 return nir_iadd(b
, patch_offset
, nir_imm_int(b
, offset
));
420 lower_tess_ctrl_block(nir_block
*block
, nir_builder
*b
, struct state
*state
)
422 nir_foreach_instr_safe (instr
, block
) {
423 if (instr
->type
!= nir_instr_type_intrinsic
)
426 nir_intrinsic_instr
*intr
= nir_instr_as_intrinsic(instr
);
428 switch (intr
->intrinsic
) {
429 case nir_intrinsic_control_barrier
:
430 case nir_intrinsic_memory_barrier_tcs_patch
:
431 /* Hull shaders dispatch 32 wide so an entire patch will always
432 * fit in a single warp and execute in lock-step. Consequently,
433 * we don't need to do anything for TCS barriers so just remove
434 * the intrinsic. Otherwise we'll emit an actual barrier
435 * instructions, which will deadlock.
437 nir_instr_remove(&intr
->instr
);
440 case nir_intrinsic_load_per_vertex_output
: {
441 // src[] = { vertex, offset }.
443 b
->cursor
= nir_before_instr(&intr
->instr
);
445 nir_ssa_def
*address
= nir_load_tess_param_base_ir3(b
);
446 nir_variable
*var
= get_var(&b
->shader
->outputs
, nir_intrinsic_base(intr
));
447 nir_ssa_def
*offset
= build_per_vertex_offset(b
, state
,
448 intr
->src
[0].ssa
, intr
->src
[1].ssa
, var
);
450 replace_intrinsic(b
, intr
, nir_intrinsic_load_global_ir3
, address
, offset
, NULL
);
454 case nir_intrinsic_store_per_vertex_output
: {
455 // src[] = { value, vertex, offset }.
457 b
->cursor
= nir_before_instr(&intr
->instr
);
459 /* sparse writemask not supported */
460 assert(util_is_power_of_two_nonzero(nir_intrinsic_write_mask(intr
) + 1));
462 nir_ssa_def
*value
= intr
->src
[0].ssa
;
463 nir_ssa_def
*address
= nir_load_tess_param_base_ir3(b
);
464 nir_variable
*var
= get_var(&b
->shader
->outputs
, nir_intrinsic_base(intr
));
465 nir_ssa_def
*offset
= build_per_vertex_offset(b
, state
,
466 intr
->src
[1].ssa
, intr
->src
[2].ssa
, var
);
468 replace_intrinsic(b
, intr
, nir_intrinsic_store_global_ir3
, value
, address
,
469 nir_iadd(b
, offset
, nir_imm_int(b
, nir_intrinsic_component(intr
))));
474 case nir_intrinsic_load_output
: {
475 // src[] = { offset }.
477 nir_variable
*var
= get_var(&b
->shader
->outputs
, nir_intrinsic_base(intr
));
479 b
->cursor
= nir_before_instr(&intr
->instr
);
481 nir_ssa_def
*address
, *offset
;
483 /* note if vectorization of the tess level loads ever happens:
484 * "ldg" across 16-byte boundaries can behave incorrectly if results
485 * are never used. most likely some issue with (sy) not properly
486 * syncing with values coming from a second memory transaction.
488 if (is_tess_levels(var
)) {
489 assert(intr
->dest
.ssa
.num_components
== 1);
490 address
= nir_load_tess_factor_base_ir3(b
);
491 offset
= build_tessfactor_base(b
, var
->data
.location
, state
);
493 address
= nir_load_tess_param_base_ir3(b
);
494 offset
= build_patch_offset(b
, state
, intr
->src
[0].ssa
, var
);
497 replace_intrinsic(b
, intr
, nir_intrinsic_load_global_ir3
, address
, offset
, NULL
);
501 case nir_intrinsic_store_output
: {
502 // src[] = { value, offset }.
504 /* write patch output to bo */
506 nir_variable
*var
= get_var(&b
->shader
->outputs
, nir_intrinsic_base(intr
));
508 b
->cursor
= nir_before_instr(&intr
->instr
);
510 /* sparse writemask not supported */
511 assert(util_is_power_of_two_nonzero(nir_intrinsic_write_mask(intr
) + 1));
513 if (is_tess_levels(var
)) {
514 /* with tess levels are defined as float[4] and float[2],
515 * but tess factor BO has smaller sizes for tris/isolines,
516 * so we have to discard any writes beyond the number of
517 * components for inner/outer levels */
518 uint32_t inner_levels
, outer_levels
, levels
;
519 tess_level_components(state
, &inner_levels
, &outer_levels
);
521 if (var
->data
.location
== VARYING_SLOT_TESS_LEVEL_OUTER
)
522 levels
= outer_levels
;
524 levels
= inner_levels
;
526 assert(intr
->src
[0].ssa
->num_components
== 1);
528 nir_ssa_def
*offset
=
529 nir_iadd_imm(b
, intr
->src
[1].ssa
, nir_intrinsic_component(intr
));
531 nir_if
*nif
= nir_push_if(b
, nir_ult(b
, offset
, nir_imm_int(b
, levels
)));
533 replace_intrinsic(b
, intr
, nir_intrinsic_store_global_ir3
,
535 nir_load_tess_factor_base_ir3(b
),
536 nir_iadd(b
, offset
, build_tessfactor_base(b
, var
->data
.location
, state
)));
540 nir_ssa_def
*address
= nir_load_tess_param_base_ir3(b
);
541 nir_ssa_def
*offset
= build_patch_offset(b
, state
, intr
->src
[1].ssa
, var
);
543 debug_assert(nir_intrinsic_component(intr
) == 0);
545 replace_intrinsic(b
, intr
, nir_intrinsic_store_global_ir3
,
546 intr
->src
[0].ssa
, address
, offset
);
558 emit_tess_epilouge(nir_builder
*b
, struct state
*state
)
560 /* Insert endpatch instruction:
562 * TODO we should re-work this to use normal flow control.
565 nir_intrinsic_instr
*end_patch
=
566 nir_intrinsic_instr_create(b
->shader
, nir_intrinsic_end_patch_ir3
);
567 nir_builder_instr_insert(b
, &end_patch
->instr
);
571 ir3_nir_lower_tess_ctrl(nir_shader
*shader
, struct ir3_shader_variant
*v
,
574 struct state state
= { .topology
= topology
};
576 if (shader_debug_enabled(shader
->info
.stage
)) {
577 fprintf(stderr
, "NIR (before tess lowering) for %s shader:\n",
578 _mesa_shader_stage_to_string(shader
->info
.stage
));
579 nir_print_shader(shader
, stderr
);
582 build_primitive_map(shader
, &state
.map
, &shader
->outputs
);
583 memcpy(v
->output_loc
, state
.map
.loc
, sizeof(v
->output_loc
));
584 v
->output_size
= state
.map
.stride
;
586 nir_function_impl
*impl
= nir_shader_get_entrypoint(shader
);
590 nir_builder_init(&b
, impl
);
591 b
.cursor
= nir_before_cf_list(&impl
->body
);
593 state
.header
= nir_load_tcs_header_ir3(&b
);
595 nir_foreach_block_safe (block
, impl
)
596 lower_tess_ctrl_block(block
, &b
, &state
);
598 /* Now move the body of the TCS into a conditional:
600 * if (gl_InvocationID < num_vertices)
606 nir_cf_extract(&body
, nir_before_cf_list(&impl
->body
),
607 nir_after_cf_list(&impl
->body
));
609 b
.cursor
= nir_after_cf_list(&impl
->body
);
611 /* Re-emit the header, since the old one got moved into the if branch */
612 state
.header
= nir_load_tcs_header_ir3(&b
);
613 nir_ssa_def
*iid
= build_invocation_id(&b
, &state
);
615 const uint32_t nvertices
= shader
->info
.tess
.tcs_vertices_out
;
616 nir_ssa_def
*cond
= nir_ult(&b
, iid
, nir_imm_int(&b
, nvertices
));
618 nir_if
*nif
= nir_push_if(&b
, cond
);
620 nir_cf_reinsert(&body
, b
.cursor
);
622 b
.cursor
= nir_after_cf_list(&nif
->then_list
);
624 /* Insert conditional exit for threads invocation id != 0 */
625 nir_ssa_def
*iid0_cond
= nir_ieq(&b
, iid
, nir_imm_int(&b
, 0));
626 nir_intrinsic_instr
*cond_end
=
627 nir_intrinsic_instr_create(shader
, nir_intrinsic_cond_end_ir3
);
628 cond_end
->src
[0] = nir_src_for_ssa(iid0_cond
);
629 nir_builder_instr_insert(&b
, &cond_end
->instr
);
631 emit_tess_epilouge(&b
, &state
);
635 nir_metadata_preserve(impl
, 0);
640 lower_tess_eval_block(nir_block
*block
, nir_builder
*b
, struct state
*state
)
642 nir_foreach_instr_safe (instr
, block
) {
643 if (instr
->type
!= nir_instr_type_intrinsic
)
646 nir_intrinsic_instr
*intr
= nir_instr_as_intrinsic(instr
);
648 switch (intr
->intrinsic
) {
649 case nir_intrinsic_load_tess_coord
: {
650 b
->cursor
= nir_after_instr(&intr
->instr
);
651 nir_ssa_def
*x
= nir_channel(b
, &intr
->dest
.ssa
, 0);
652 nir_ssa_def
*y
= nir_channel(b
, &intr
->dest
.ssa
, 1);
655 if (state
->topology
== IR3_TESS_TRIANGLES
)
656 z
= nir_fsub(b
, nir_fsub(b
, nir_imm_float(b
, 1.0f
), y
), x
);
658 z
= nir_imm_float(b
, 0.0f
);
660 nir_ssa_def
*coord
= nir_vec3(b
, x
, y
, z
);
662 nir_ssa_def_rewrite_uses_after(&intr
->dest
.ssa
,
663 nir_src_for_ssa(coord
),
668 case nir_intrinsic_load_per_vertex_input
: {
669 // src[] = { vertex, offset }.
671 b
->cursor
= nir_before_instr(&intr
->instr
);
673 nir_ssa_def
*address
= nir_load_tess_param_base_ir3(b
);
674 nir_variable
*var
= get_var(&b
->shader
->inputs
, nir_intrinsic_base(intr
));
675 nir_ssa_def
*offset
= build_per_vertex_offset(b
, state
,
676 intr
->src
[0].ssa
, intr
->src
[1].ssa
, var
);
678 replace_intrinsic(b
, intr
, nir_intrinsic_load_global_ir3
, address
, offset
, NULL
);
682 case nir_intrinsic_load_input
: {
683 // src[] = { offset }.
685 nir_variable
*var
= get_var(&b
->shader
->inputs
, nir_intrinsic_base(intr
));
687 debug_assert(var
->data
.patch
);
689 b
->cursor
= nir_before_instr(&intr
->instr
);
691 nir_ssa_def
*address
, *offset
;
693 /* note if vectorization of the tess level loads ever happens:
694 * "ldg" across 16-byte boundaries can behave incorrectly if results
695 * are never used. most likely some issue with (sy) not properly
696 * syncing with values coming from a second memory transaction.
698 if (is_tess_levels(var
)) {
699 assert(intr
->dest
.ssa
.num_components
== 1);
700 address
= nir_load_tess_factor_base_ir3(b
);
701 offset
= build_tessfactor_base(b
, var
->data
.location
, state
);
703 address
= nir_load_tess_param_base_ir3(b
);
704 offset
= build_patch_offset(b
, state
, intr
->src
[0].ssa
, var
);
707 offset
= nir_iadd(b
, offset
, nir_imm_int(b
, nir_intrinsic_component(intr
)));
709 replace_intrinsic(b
, intr
, nir_intrinsic_load_global_ir3
, address
, offset
, NULL
);
720 ir3_nir_lower_tess_eval(nir_shader
*shader
, unsigned topology
)
722 struct state state
= { .topology
= topology
};
724 if (shader_debug_enabled(shader
->info
.stage
)) {
725 fprintf(stderr
, "NIR (before tess lowering) for %s shader:\n",
726 _mesa_shader_stage_to_string(shader
->info
.stage
));
727 nir_print_shader(shader
, stderr
);
730 /* Build map of inputs so we have the sizes. */
731 build_primitive_map(shader
, &state
.map
, &shader
->inputs
);
733 nir_function_impl
*impl
= nir_shader_get_entrypoint(shader
);
737 nir_builder_init(&b
, impl
);
739 nir_foreach_block_safe (block
, impl
)
740 lower_tess_eval_block(block
, &b
, &state
);
742 nir_metadata_preserve(impl
, 0);
746 lower_gs_block(nir_block
*block
, nir_builder
*b
, struct state
*state
)
748 nir_foreach_instr_safe (instr
, block
) {
749 if (instr
->type
!= nir_instr_type_intrinsic
)
752 nir_intrinsic_instr
*intr
= nir_instr_as_intrinsic(instr
);
754 switch (intr
->intrinsic
) {
755 case nir_intrinsic_end_primitive
: {
756 b
->cursor
= nir_before_instr(&intr
->instr
);
757 nir_store_var(b
, state
->vertex_flags_out
, nir_imm_int(b
, 4), 0x1);
758 nir_instr_remove(&intr
->instr
);
762 case nir_intrinsic_emit_vertex
: {
763 /* Load the vertex count */
764 b
->cursor
= nir_before_instr(&intr
->instr
);
765 nir_ssa_def
*count
= nir_load_var(b
, state
->vertex_count_var
);
767 nir_push_if(b
, nir_ieq(b
, count
, local_thread_id(b
)));
769 foreach_two_lists(dest_node
, &state
->emit_outputs
, src_node
, &state
->old_outputs
) {
770 nir_variable
*dest
= exec_node_data(nir_variable
, dest_node
, node
);
771 nir_variable
*src
= exec_node_data(nir_variable
, src_node
, node
);
772 nir_copy_var(b
, dest
, src
);
775 nir_instr_remove(&intr
->instr
);
777 nir_store_var(b
, state
->emitted_vertex_var
,
778 nir_iadd(b
, nir_load_var(b
, state
->emitted_vertex_var
), nir_imm_int(b
, 1)), 0x1);
782 /* Increment the vertex count by 1 */
783 nir_store_var(b
, state
->vertex_count_var
,
784 nir_iadd(b
, count
, nir_imm_int(b
, 1)), 0x1); /* .x */
785 nir_store_var(b
, state
->vertex_flags_out
, nir_imm_int(b
, 0), 0x1);
797 ir3_nir_lower_gs(nir_shader
*shader
)
799 struct state state
= { };
801 if (shader_debug_enabled(shader
->info
.stage
)) {
802 fprintf(stderr
, "NIR (before gs lowering):\n");
803 nir_print_shader(shader
, stderr
);
806 build_primitive_map(shader
, &state
.map
, &shader
->inputs
);
808 /* Create an output var for vertex_flags. This will be shadowed below,
809 * same way regular outputs get shadowed, and this variable will become a
812 state
.vertex_flags_out
= nir_variable_create(shader
, nir_var_shader_out
,
813 glsl_uint_type(), "vertex_flags");
814 state
.vertex_flags_out
->data
.driver_location
= shader
->num_outputs
++;
815 state
.vertex_flags_out
->data
.location
= VARYING_SLOT_GS_VERTEX_FLAGS_IR3
;
816 state
.vertex_flags_out
->data
.interpolation
= INTERP_MODE_NONE
;
818 nir_function_impl
*impl
= nir_shader_get_entrypoint(shader
);
822 nir_builder_init(&b
, impl
);
823 b
.cursor
= nir_before_cf_list(&impl
->body
);
825 state
.header
= nir_load_gs_header_ir3(&b
);
827 /* Generate two set of shadow vars for the output variables. The first
828 * set replaces the real outputs and the second set (emit_outputs) we'll
829 * assign in the emit_vertex conditionals. Then at the end of the shader
830 * we copy the emit_outputs to the real outputs, so that we get
831 * store_output in uniform control flow.
833 exec_list_move_nodes_to(&shader
->outputs
, &state
.old_outputs
);
834 exec_list_make_empty(&state
.emit_outputs
);
835 nir_foreach_variable(var
, &state
.old_outputs
) {
836 /* Create a new output var by cloning the original output var and
839 nir_variable
*output
= nir_variable_clone(var
, shader
);
840 exec_list_push_tail(&shader
->outputs
, &output
->node
);
842 /* Rewrite the original output to be a shadow variable. */
843 var
->name
= ralloc_asprintf(var
, "%s@gs-temp", output
->name
);
844 var
->data
.mode
= nir_var_shader_temp
;
846 /* Clone the shadow variable to create the emit shadow variable that
847 * we'll assign in the emit conditionals.
849 nir_variable
*emit_output
= nir_variable_clone(var
, shader
);
850 emit_output
->name
= ralloc_asprintf(var
, "%s@emit-temp", output
->name
);
851 exec_list_push_tail(&state
.emit_outputs
, &emit_output
->node
);
854 /* During the shader we'll keep track of which vertex we're currently
855 * emitting for the EmitVertex test and how many vertices we emitted so we
856 * know to discard if didn't emit any. In most simple shaders, this can
857 * all be statically determined and gets optimized away.
859 state
.vertex_count_var
=
860 nir_local_variable_create(impl
, glsl_uint_type(), "vertex_count");
861 state
.emitted_vertex_var
=
862 nir_local_variable_create(impl
, glsl_uint_type(), "emitted_vertex");
864 /* Initialize to 0. */
865 b
.cursor
= nir_before_cf_list(&impl
->body
);
866 nir_store_var(&b
, state
.vertex_count_var
, nir_imm_int(&b
, 0), 0x1);
867 nir_store_var(&b
, state
.emitted_vertex_var
, nir_imm_int(&b
, 0), 0x1);
868 nir_store_var(&b
, state
.vertex_flags_out
, nir_imm_int(&b
, 4), 0x1);
870 nir_foreach_block_safe (block
, impl
)
871 lower_gs_block(block
, &b
, &state
);
873 set_foreach(impl
->end_block
->predecessors
, block_entry
) {
874 struct nir_block
*block
= (void *)block_entry
->key
;
875 b
.cursor
= nir_after_block_before_jump(block
);
877 nir_intrinsic_instr
*discard_if
=
878 nir_intrinsic_instr_create(b
.shader
, nir_intrinsic_discard_if
);
880 nir_ssa_def
*cond
= nir_ieq(&b
, nir_load_var(&b
, state
.emitted_vertex_var
), nir_imm_int(&b
, 0));
882 discard_if
->src
[0] = nir_src_for_ssa(cond
);
884 nir_builder_instr_insert(&b
, &discard_if
->instr
);
886 foreach_two_lists(dest_node
, &shader
->outputs
, src_node
, &state
.emit_outputs
) {
887 nir_variable
*dest
= exec_node_data(nir_variable
, dest_node
, node
);
888 nir_variable
*src
= exec_node_data(nir_variable
, src_node
, node
);
889 nir_copy_var(&b
, dest
, src
);
893 exec_list_append(&shader
->globals
, &state
.old_outputs
);
894 exec_list_append(&shader
->globals
, &state
.emit_outputs
);
896 nir_metadata_preserve(impl
, 0);
898 nir_lower_global_vars_to_local(shader
);
899 nir_split_var_copies(shader
);
900 nir_lower_var_copies(shader
);
902 nir_fixup_deref_modes(shader
);
904 if (shader_debug_enabled(shader
->info
.stage
)) {
905 fprintf(stderr
, "NIR (after gs lowering):\n");
906 nir_print_shader(shader
, stderr
);
911 ir3_link_geometry_stages(const struct ir3_shader_variant
*producer
,
912 const struct ir3_shader_variant
*consumer
,
915 uint32_t num_loc
= 0, factor
;
917 switch (consumer
->type
) {
918 case MESA_SHADER_TESS_CTRL
:
919 case MESA_SHADER_GEOMETRY
:
920 /* These stages load with ldlw, which expects byte offsets. */
923 case MESA_SHADER_TESS_EVAL
:
924 /* The tess eval shader uses ldg, which takes dword offsets. */
928 unreachable("bad shader stage");
931 nir_foreach_variable(in_var
, &consumer
->shader
->nir
->inputs
) {
932 nir_foreach_variable(out_var
, &producer
->shader
->nir
->outputs
) {
933 if (in_var
->data
.location
== out_var
->data
.location
) {
934 locs
[in_var
->data
.driver_location
] =
935 producer
->output_loc
[out_var
->data
.driver_location
] * factor
;
937 debug_assert(num_loc
<= in_var
->data
.driver_location
+ 1);
938 num_loc
= in_var
->data
.driver_location
+ 1;