2 * Copyright © 2019 Google, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25 #include "ir3_compiler.h"
26 #include "compiler/nir/nir_builder.h"
31 struct primitive_map
{
39 nir_variable
*vertex_count_var
;
40 nir_variable
*emitted_vertex_var
;
41 nir_variable
*vertex_flags_out
;
43 struct exec_list old_outputs
;
44 struct exec_list new_outputs
;
45 struct exec_list emit_outputs
;
47 /* tess ctrl shader on a650 gets the local primitive id at different bits: */
48 unsigned local_primitive_id_start
;
52 bitfield_extract(nir_builder
*b
, nir_ssa_def
*v
, uint32_t start
, uint32_t mask
)
54 return nir_iand(b
, nir_ushr(b
, v
, nir_imm_int(b
, start
)),
55 nir_imm_int(b
, mask
));
59 build_invocation_id(nir_builder
*b
, struct state
*state
)
61 return bitfield_extract(b
, state
->header
, 11, 31);
65 build_vertex_id(nir_builder
*b
, struct state
*state
)
67 return bitfield_extract(b
, state
->header
, 6, 31);
71 build_local_primitive_id(nir_builder
*b
, struct state
*state
)
73 return bitfield_extract(b
, state
->header
, state
->local_primitive_id_start
, 63);
77 get_var(nir_shader
*shader
, nir_variable_mode mode
, int driver_location
)
79 nir_foreach_variable_with_modes (v
, shader
, mode
) {
80 if (v
->data
.driver_location
== driver_location
) {
89 is_tess_levels(nir_variable
*var
)
91 return (var
->data
.location
== VARYING_SLOT_TESS_LEVEL_OUTER
||
92 var
->data
.location
== VARYING_SLOT_TESS_LEVEL_INNER
);
96 build_local_offset(nir_builder
*b
, struct state
*state
,
97 nir_ssa_def
*vertex
, uint32_t base
, nir_ssa_def
*offset
)
99 nir_ssa_def
*primitive_stride
= nir_load_vs_primitive_stride_ir3(b
);
100 nir_ssa_def
*primitive_offset
=
101 nir_imul24(b
, build_local_primitive_id(b
, state
), primitive_stride
);
102 nir_ssa_def
*attr_offset
;
103 nir_ssa_def
*vertex_stride
;
105 switch (b
->shader
->info
.stage
) {
106 case MESA_SHADER_VERTEX
:
107 case MESA_SHADER_TESS_EVAL
:
108 vertex_stride
= nir_imm_int(b
, state
->map
.stride
* 4);
109 attr_offset
= nir_imm_int(b
, state
->map
.loc
[base
] * 4);
111 case MESA_SHADER_TESS_CTRL
:
112 case MESA_SHADER_GEOMETRY
:
113 vertex_stride
= nir_load_vs_vertex_stride_ir3(b
);
114 attr_offset
= nir_load_primitive_location_ir3(b
, base
);
117 unreachable("bad shader stage");
120 nir_ssa_def
*vertex_offset
= nir_imul24(b
, vertex
, vertex_stride
);
122 return nir_iadd(b
, nir_iadd(b
, primitive_offset
, vertex_offset
),
123 nir_iadd(b
, attr_offset
, offset
));
126 static nir_intrinsic_instr
*
127 replace_intrinsic(nir_builder
*b
, nir_intrinsic_instr
*intr
,
128 nir_intrinsic_op op
, nir_ssa_def
*src0
, nir_ssa_def
*src1
, nir_ssa_def
*src2
)
130 nir_intrinsic_instr
*new_intr
=
131 nir_intrinsic_instr_create(b
->shader
, op
);
133 new_intr
->src
[0] = nir_src_for_ssa(src0
);
135 new_intr
->src
[1] = nir_src_for_ssa(src1
);
137 new_intr
->src
[2] = nir_src_for_ssa(src2
);
139 new_intr
->num_components
= intr
->num_components
;
141 if (nir_intrinsic_infos
[op
].has_dest
)
142 nir_ssa_dest_init(&new_intr
->instr
, &new_intr
->dest
,
143 intr
->num_components
, 32, NULL
);
145 nir_builder_instr_insert(b
, &new_intr
->instr
);
147 if (nir_intrinsic_infos
[op
].has_dest
)
148 nir_ssa_def_rewrite_uses(&intr
->dest
.ssa
, nir_src_for_ssa(&new_intr
->dest
.ssa
));
150 nir_instr_remove(&intr
->instr
);
156 build_primitive_map(nir_shader
*shader
, nir_variable_mode mode
, struct primitive_map
*map
)
158 nir_foreach_variable_with_modes (var
, shader
, mode
) {
159 switch (var
->data
.location
) {
160 case VARYING_SLOT_TESS_LEVEL_OUTER
:
161 case VARYING_SLOT_TESS_LEVEL_INNER
:
165 unsigned size
= glsl_count_attribute_slots(var
->type
, false) * 4;
167 assert(var
->data
.driver_location
< ARRAY_SIZE(map
->size
));
168 map
->size
[var
->data
.driver_location
] =
169 MAX2(map
->size
[var
->data
.driver_location
], size
);
173 for (uint32_t i
= 0; i
< ARRAY_SIZE(map
->size
); i
++) {
174 if (map
->size
[i
] == 0)
176 nir_variable
*var
= get_var(shader
, mode
, i
);
183 map
->size
[i
] = map
->size
[i
] / glsl_get_length(var
->type
);
190 lower_block_to_explicit_output(nir_block
*block
, nir_builder
*b
, struct state
*state
)
192 nir_foreach_instr_safe (instr
, block
) {
193 if (instr
->type
!= nir_instr_type_intrinsic
)
196 nir_intrinsic_instr
*intr
= nir_instr_as_intrinsic(instr
);
198 switch (intr
->intrinsic
) {
199 case nir_intrinsic_store_output
: {
200 // src[] = { value, offset }.
202 /* nir_lower_io_to_temporaries replaces all access to output
203 * variables with temp variables and then emits a nir_copy_var at
204 * the end of the shader. Thus, we should always get a full wrmask
207 assert(util_is_power_of_two_nonzero(nir_intrinsic_write_mask(intr
) + 1));
209 b
->cursor
= nir_instr_remove(&intr
->instr
);
211 nir_ssa_def
*vertex_id
= build_vertex_id(b
, state
);
212 nir_ssa_def
*offset
= build_local_offset(b
, state
, vertex_id
, nir_intrinsic_base(intr
),
214 nir_intrinsic_instr
*store
=
215 nir_intrinsic_instr_create(b
->shader
, nir_intrinsic_store_shared_ir3
);
217 store
->src
[0] = nir_src_for_ssa(intr
->src
[0].ssa
);
218 store
->src
[1] = nir_src_for_ssa(offset
);
219 store
->num_components
= intr
->num_components
;
221 nir_builder_instr_insert(b
, &store
->instr
);
232 local_thread_id(nir_builder
*b
)
234 return bitfield_extract(b
, nir_load_gs_header_ir3(b
), 16, 1023);
238 ir3_nir_lower_to_explicit_output(nir_shader
*shader
, struct ir3_shader_variant
*v
,
241 struct state state
= { };
243 build_primitive_map(shader
, nir_var_shader_out
, &state
.map
);
244 memcpy(v
->output_loc
, state
.map
.loc
, sizeof(v
->output_loc
));
246 nir_function_impl
*impl
= nir_shader_get_entrypoint(shader
);
250 nir_builder_init(&b
, impl
);
251 b
.cursor
= nir_before_cf_list(&impl
->body
);
253 if (v
->type
== MESA_SHADER_VERTEX
&& topology
!= IR3_TESS_NONE
)
254 state
.header
= nir_load_tcs_header_ir3(&b
);
256 state
.header
= nir_load_gs_header_ir3(&b
);
258 nir_foreach_block_safe (block
, impl
)
259 lower_block_to_explicit_output(block
, &b
, &state
);
261 nir_metadata_preserve(impl
, nir_metadata_block_index
|
262 nir_metadata_dominance
);
264 v
->output_size
= state
.map
.stride
;
269 lower_block_to_explicit_input(nir_block
*block
, nir_builder
*b
, struct state
*state
)
271 nir_foreach_instr_safe (instr
, block
) {
272 if (instr
->type
!= nir_instr_type_intrinsic
)
275 nir_intrinsic_instr
*intr
= nir_instr_as_intrinsic(instr
);
277 switch (intr
->intrinsic
) {
278 case nir_intrinsic_load_per_vertex_input
: {
279 // src[] = { vertex, offset }.
281 b
->cursor
= nir_before_instr(&intr
->instr
);
283 nir_ssa_def
*offset
= build_local_offset(b
, state
,
284 intr
->src
[0].ssa
, // this is typically gl_InvocationID
285 nir_intrinsic_base(intr
),
288 replace_intrinsic(b
, intr
, nir_intrinsic_load_shared_ir3
, offset
, NULL
, NULL
);
292 case nir_intrinsic_load_invocation_id
: {
293 b
->cursor
= nir_before_instr(&intr
->instr
);
295 nir_ssa_def
*iid
= build_invocation_id(b
, state
);
296 nir_ssa_def_rewrite_uses(&intr
->dest
.ssa
, nir_src_for_ssa(iid
));
297 nir_instr_remove(&intr
->instr
);
308 ir3_nir_lower_to_explicit_input(nir_shader
*shader
, struct ir3_compiler
*compiler
)
310 struct state state
= { };
312 /* when using stl/ldl (instead of stlw/ldlw) for linking VS and HS,
313 * HS uses a different primitive id, which starts at bit 16 in the header
315 if (shader
->info
.stage
== MESA_SHADER_TESS_CTRL
&& compiler
->tess_use_shared
)
316 state
.local_primitive_id_start
= 16;
318 nir_function_impl
*impl
= nir_shader_get_entrypoint(shader
);
322 nir_builder_init(&b
, impl
);
323 b
.cursor
= nir_before_cf_list(&impl
->body
);
325 if (shader
->info
.stage
== MESA_SHADER_GEOMETRY
)
326 state
.header
= nir_load_gs_header_ir3(&b
);
328 state
.header
= nir_load_tcs_header_ir3(&b
);
330 nir_foreach_block_safe (block
, impl
)
331 lower_block_to_explicit_input(block
, &b
, &state
);
336 build_per_vertex_offset(nir_builder
*b
, struct state
*state
,
337 nir_ssa_def
*vertex
, nir_ssa_def
*offset
, nir_variable
*var
)
339 nir_ssa_def
*primitive_id
= nir_load_primitive_id(b
);
340 nir_ssa_def
*patch_stride
= nir_load_hs_patch_stride_ir3(b
);
341 nir_ssa_def
*patch_offset
= nir_imul24(b
, primitive_id
, patch_stride
);
342 nir_ssa_def
*attr_offset
;
343 int loc
= var
->data
.driver_location
;
345 switch (b
->shader
->info
.stage
) {
346 case MESA_SHADER_TESS_CTRL
:
347 attr_offset
= nir_imm_int(b
, state
->map
.loc
[loc
]);
349 case MESA_SHADER_TESS_EVAL
:
350 attr_offset
= nir_load_primitive_location_ir3(b
, loc
);
353 unreachable("bad shader state");
356 nir_ssa_def
*attr_stride
= nir_imm_int(b
, state
->map
.size
[loc
]);
357 nir_ssa_def
*vertex_offset
= nir_imul24(b
, vertex
, attr_stride
);
359 return nir_iadd(b
, nir_iadd(b
, patch_offset
, attr_offset
),
360 nir_iadd(b
, vertex_offset
, nir_ishl(b
, offset
, nir_imm_int(b
, 2))));
364 build_patch_offset(nir_builder
*b
, struct state
*state
, nir_ssa_def
*offset
, nir_variable
*var
)
366 debug_assert(var
&& var
->data
.patch
);
368 return build_per_vertex_offset(b
, state
, nir_imm_int(b
, 0), offset
, var
);
372 tess_level_components(struct state
*state
, uint32_t *inner
, uint32_t *outer
)
374 switch (state
->topology
) {
375 case IR3_TESS_TRIANGLES
:
383 case IR3_TESS_ISOLINES
:
393 build_tessfactor_base(nir_builder
*b
, gl_varying_slot slot
, struct state
*state
)
395 uint32_t inner_levels
, outer_levels
;
396 tess_level_components(state
, &inner_levels
, &outer_levels
);
398 const uint32_t patch_stride
= 1 + inner_levels
+ outer_levels
;
400 nir_ssa_def
*primitive_id
= nir_load_primitive_id(b
);
402 nir_ssa_def
*patch_offset
= nir_imul24(b
, primitive_id
, nir_imm_int(b
, patch_stride
));
406 case VARYING_SLOT_TESS_LEVEL_OUTER
:
407 /* There's some kind of header dword, tess levels start at index 1. */
410 case VARYING_SLOT_TESS_LEVEL_INNER
:
411 offset
= 1 + outer_levels
;
417 return nir_iadd(b
, patch_offset
, nir_imm_int(b
, offset
));
421 lower_tess_ctrl_block(nir_block
*block
, nir_builder
*b
, struct state
*state
)
423 nir_foreach_instr_safe (instr
, block
) {
424 if (instr
->type
!= nir_instr_type_intrinsic
)
427 nir_intrinsic_instr
*intr
= nir_instr_as_intrinsic(instr
);
429 switch (intr
->intrinsic
) {
430 case nir_intrinsic_control_barrier
:
431 case nir_intrinsic_memory_barrier_tcs_patch
:
432 /* Hull shaders dispatch 32 wide so an entire patch will always
433 * fit in a single warp and execute in lock-step. Consequently,
434 * we don't need to do anything for TCS barriers so just remove
435 * the intrinsic. Otherwise we'll emit an actual barrier
436 * instructions, which will deadlock.
438 nir_instr_remove(&intr
->instr
);
441 case nir_intrinsic_load_per_vertex_output
: {
442 // src[] = { vertex, offset }.
444 b
->cursor
= nir_before_instr(&intr
->instr
);
446 nir_ssa_def
*address
= nir_load_tess_param_base_ir3(b
);
447 nir_variable
*var
= get_var(b
->shader
, nir_var_shader_out
, nir_intrinsic_base(intr
));
448 nir_ssa_def
*offset
= build_per_vertex_offset(b
, state
,
449 intr
->src
[0].ssa
, intr
->src
[1].ssa
, var
);
451 replace_intrinsic(b
, intr
, nir_intrinsic_load_global_ir3
, address
, offset
, NULL
);
455 case nir_intrinsic_store_per_vertex_output
: {
456 // src[] = { value, vertex, offset }.
458 b
->cursor
= nir_before_instr(&intr
->instr
);
460 /* sparse writemask not supported */
461 assert(util_is_power_of_two_nonzero(nir_intrinsic_write_mask(intr
) + 1));
463 nir_ssa_def
*value
= intr
->src
[0].ssa
;
464 nir_ssa_def
*address
= nir_load_tess_param_base_ir3(b
);
465 nir_variable
*var
= get_var(b
->shader
, nir_var_shader_out
, nir_intrinsic_base(intr
));
466 nir_ssa_def
*offset
= build_per_vertex_offset(b
, state
,
467 intr
->src
[1].ssa
, intr
->src
[2].ssa
, var
);
469 replace_intrinsic(b
, intr
, nir_intrinsic_store_global_ir3
, value
, address
,
470 nir_iadd(b
, offset
, nir_imm_int(b
, nir_intrinsic_component(intr
))));
475 case nir_intrinsic_load_output
: {
476 // src[] = { offset }.
478 nir_variable
*var
= get_var(b
->shader
, nir_var_shader_out
, nir_intrinsic_base(intr
));
480 b
->cursor
= nir_before_instr(&intr
->instr
);
482 nir_ssa_def
*address
, *offset
;
484 /* note if vectorization of the tess level loads ever happens:
485 * "ldg" across 16-byte boundaries can behave incorrectly if results
486 * are never used. most likely some issue with (sy) not properly
487 * syncing with values coming from a second memory transaction.
489 if (is_tess_levels(var
)) {
490 assert(intr
->dest
.ssa
.num_components
== 1);
491 address
= nir_load_tess_factor_base_ir3(b
);
492 offset
= build_tessfactor_base(b
, var
->data
.location
, state
);
494 address
= nir_load_tess_param_base_ir3(b
);
495 offset
= build_patch_offset(b
, state
, intr
->src
[0].ssa
, var
);
498 replace_intrinsic(b
, intr
, nir_intrinsic_load_global_ir3
, address
, offset
, NULL
);
502 case nir_intrinsic_store_output
: {
503 // src[] = { value, offset }.
505 /* write patch output to bo */
507 nir_variable
*var
= get_var(b
->shader
, nir_var_shader_out
, nir_intrinsic_base(intr
));
509 b
->cursor
= nir_before_instr(&intr
->instr
);
511 /* sparse writemask not supported */
512 assert(util_is_power_of_two_nonzero(nir_intrinsic_write_mask(intr
) + 1));
514 if (is_tess_levels(var
)) {
515 /* with tess levels are defined as float[4] and float[2],
516 * but tess factor BO has smaller sizes for tris/isolines,
517 * so we have to discard any writes beyond the number of
518 * components for inner/outer levels */
519 uint32_t inner_levels
, outer_levels
, levels
;
520 tess_level_components(state
, &inner_levels
, &outer_levels
);
522 if (var
->data
.location
== VARYING_SLOT_TESS_LEVEL_OUTER
)
523 levels
= outer_levels
;
525 levels
= inner_levels
;
527 assert(intr
->src
[0].ssa
->num_components
== 1);
529 nir_ssa_def
*offset
=
530 nir_iadd_imm(b
, intr
->src
[1].ssa
, nir_intrinsic_component(intr
));
532 nir_if
*nif
= nir_push_if(b
, nir_ult(b
, offset
, nir_imm_int(b
, levels
)));
534 replace_intrinsic(b
, intr
, nir_intrinsic_store_global_ir3
,
536 nir_load_tess_factor_base_ir3(b
),
537 nir_iadd(b
, offset
, build_tessfactor_base(b
, var
->data
.location
, state
)));
541 nir_ssa_def
*address
= nir_load_tess_param_base_ir3(b
);
542 nir_ssa_def
*offset
= build_patch_offset(b
, state
, intr
->src
[1].ssa
, var
);
544 debug_assert(nir_intrinsic_component(intr
) == 0);
546 replace_intrinsic(b
, intr
, nir_intrinsic_store_global_ir3
,
547 intr
->src
[0].ssa
, address
, offset
);
559 emit_tess_epilouge(nir_builder
*b
, struct state
*state
)
561 /* Insert endpatch instruction:
563 * TODO we should re-work this to use normal flow control.
566 nir_intrinsic_instr
*end_patch
=
567 nir_intrinsic_instr_create(b
->shader
, nir_intrinsic_end_patch_ir3
);
568 nir_builder_instr_insert(b
, &end_patch
->instr
);
572 ir3_nir_lower_tess_ctrl(nir_shader
*shader
, struct ir3_shader_variant
*v
,
575 struct state state
= { .topology
= topology
};
577 if (shader_debug_enabled(shader
->info
.stage
)) {
578 fprintf(stderr
, "NIR (before tess lowering) for %s shader:\n",
579 _mesa_shader_stage_to_string(shader
->info
.stage
));
580 nir_print_shader(shader
, stderr
);
583 build_primitive_map(shader
, nir_var_shader_out
, &state
.map
);
584 memcpy(v
->output_loc
, state
.map
.loc
, sizeof(v
->output_loc
));
585 v
->output_size
= state
.map
.stride
;
587 nir_function_impl
*impl
= nir_shader_get_entrypoint(shader
);
591 nir_builder_init(&b
, impl
);
592 b
.cursor
= nir_before_cf_list(&impl
->body
);
594 state
.header
= nir_load_tcs_header_ir3(&b
);
596 nir_foreach_block_safe (block
, impl
)
597 lower_tess_ctrl_block(block
, &b
, &state
);
599 /* Now move the body of the TCS into a conditional:
601 * if (gl_InvocationID < num_vertices)
607 nir_cf_extract(&body
, nir_before_cf_list(&impl
->body
),
608 nir_after_cf_list(&impl
->body
));
610 b
.cursor
= nir_after_cf_list(&impl
->body
);
612 /* Re-emit the header, since the old one got moved into the if branch */
613 state
.header
= nir_load_tcs_header_ir3(&b
);
614 nir_ssa_def
*iid
= build_invocation_id(&b
, &state
);
616 const uint32_t nvertices
= shader
->info
.tess
.tcs_vertices_out
;
617 nir_ssa_def
*cond
= nir_ult(&b
, iid
, nir_imm_int(&b
, nvertices
));
619 nir_if
*nif
= nir_push_if(&b
, cond
);
621 nir_cf_reinsert(&body
, b
.cursor
);
623 b
.cursor
= nir_after_cf_list(&nif
->then_list
);
625 /* Insert conditional exit for threads invocation id != 0 */
626 nir_ssa_def
*iid0_cond
= nir_ieq(&b
, iid
, nir_imm_int(&b
, 0));
627 nir_intrinsic_instr
*cond_end
=
628 nir_intrinsic_instr_create(shader
, nir_intrinsic_cond_end_ir3
);
629 cond_end
->src
[0] = nir_src_for_ssa(iid0_cond
);
630 nir_builder_instr_insert(&b
, &cond_end
->instr
);
632 emit_tess_epilouge(&b
, &state
);
636 nir_metadata_preserve(impl
, 0);
641 lower_tess_eval_block(nir_block
*block
, nir_builder
*b
, struct state
*state
)
643 nir_foreach_instr_safe (instr
, block
) {
644 if (instr
->type
!= nir_instr_type_intrinsic
)
647 nir_intrinsic_instr
*intr
= nir_instr_as_intrinsic(instr
);
649 switch (intr
->intrinsic
) {
650 case nir_intrinsic_load_tess_coord
: {
651 b
->cursor
= nir_after_instr(&intr
->instr
);
652 nir_ssa_def
*x
= nir_channel(b
, &intr
->dest
.ssa
, 0);
653 nir_ssa_def
*y
= nir_channel(b
, &intr
->dest
.ssa
, 1);
656 if (state
->topology
== IR3_TESS_TRIANGLES
)
657 z
= nir_fsub(b
, nir_fsub(b
, nir_imm_float(b
, 1.0f
), y
), x
);
659 z
= nir_imm_float(b
, 0.0f
);
661 nir_ssa_def
*coord
= nir_vec3(b
, x
, y
, z
);
663 nir_ssa_def_rewrite_uses_after(&intr
->dest
.ssa
,
664 nir_src_for_ssa(coord
),
669 case nir_intrinsic_load_per_vertex_input
: {
670 // src[] = { vertex, offset }.
672 b
->cursor
= nir_before_instr(&intr
->instr
);
674 nir_ssa_def
*address
= nir_load_tess_param_base_ir3(b
);
675 nir_variable
*var
= get_var(b
->shader
, nir_var_shader_in
, nir_intrinsic_base(intr
));
676 nir_ssa_def
*offset
= build_per_vertex_offset(b
, state
,
677 intr
->src
[0].ssa
, intr
->src
[1].ssa
, var
);
679 replace_intrinsic(b
, intr
, nir_intrinsic_load_global_ir3
, address
, offset
, NULL
);
683 case nir_intrinsic_load_input
: {
684 // src[] = { offset }.
686 nir_variable
*var
= get_var(b
->shader
, nir_var_shader_in
, nir_intrinsic_base(intr
));
688 debug_assert(var
->data
.patch
);
690 b
->cursor
= nir_before_instr(&intr
->instr
);
692 nir_ssa_def
*address
, *offset
;
694 /* note if vectorization of the tess level loads ever happens:
695 * "ldg" across 16-byte boundaries can behave incorrectly if results
696 * are never used. most likely some issue with (sy) not properly
697 * syncing with values coming from a second memory transaction.
699 if (is_tess_levels(var
)) {
700 assert(intr
->dest
.ssa
.num_components
== 1);
701 address
= nir_load_tess_factor_base_ir3(b
);
702 offset
= build_tessfactor_base(b
, var
->data
.location
, state
);
704 address
= nir_load_tess_param_base_ir3(b
);
705 offset
= build_patch_offset(b
, state
, intr
->src
[0].ssa
, var
);
708 offset
= nir_iadd(b
, offset
, nir_imm_int(b
, nir_intrinsic_component(intr
)));
710 replace_intrinsic(b
, intr
, nir_intrinsic_load_global_ir3
, address
, offset
, NULL
);
721 ir3_nir_lower_tess_eval(nir_shader
*shader
, unsigned topology
)
723 struct state state
= { .topology
= topology
};
725 if (shader_debug_enabled(shader
->info
.stage
)) {
726 fprintf(stderr
, "NIR (before tess lowering) for %s shader:\n",
727 _mesa_shader_stage_to_string(shader
->info
.stage
));
728 nir_print_shader(shader
, stderr
);
731 /* Build map of inputs so we have the sizes. */
732 build_primitive_map(shader
, nir_var_shader_in
, &state
.map
);
734 nir_function_impl
*impl
= nir_shader_get_entrypoint(shader
);
738 nir_builder_init(&b
, impl
);
740 nir_foreach_block_safe (block
, impl
)
741 lower_tess_eval_block(block
, &b
, &state
);
743 nir_metadata_preserve(impl
, 0);
747 lower_gs_block(nir_block
*block
, nir_builder
*b
, struct state
*state
)
749 nir_foreach_instr_safe (instr
, block
) {
750 if (instr
->type
!= nir_instr_type_intrinsic
)
753 nir_intrinsic_instr
*intr
= nir_instr_as_intrinsic(instr
);
755 switch (intr
->intrinsic
) {
756 case nir_intrinsic_end_primitive
: {
757 b
->cursor
= nir_before_instr(&intr
->instr
);
758 nir_store_var(b
, state
->vertex_flags_out
, nir_imm_int(b
, 4), 0x1);
759 nir_instr_remove(&intr
->instr
);
763 case nir_intrinsic_emit_vertex
: {
764 /* Load the vertex count */
765 b
->cursor
= nir_before_instr(&intr
->instr
);
766 nir_ssa_def
*count
= nir_load_var(b
, state
->vertex_count_var
);
768 nir_push_if(b
, nir_ieq(b
, count
, local_thread_id(b
)));
770 foreach_two_lists(dest_node
, &state
->emit_outputs
, src_node
, &state
->old_outputs
) {
771 nir_variable
*dest
= exec_node_data(nir_variable
, dest_node
, node
);
772 nir_variable
*src
= exec_node_data(nir_variable
, src_node
, node
);
773 nir_copy_var(b
, dest
, src
);
776 nir_instr_remove(&intr
->instr
);
778 nir_store_var(b
, state
->emitted_vertex_var
,
779 nir_iadd(b
, nir_load_var(b
, state
->emitted_vertex_var
), nir_imm_int(b
, 1)), 0x1);
783 /* Increment the vertex count by 1 */
784 nir_store_var(b
, state
->vertex_count_var
,
785 nir_iadd(b
, count
, nir_imm_int(b
, 1)), 0x1); /* .x */
786 nir_store_var(b
, state
->vertex_flags_out
, nir_imm_int(b
, 0), 0x1);
798 ir3_nir_lower_gs(nir_shader
*shader
)
800 struct state state
= { };
802 if (shader_debug_enabled(shader
->info
.stage
)) {
803 fprintf(stderr
, "NIR (before gs lowering):\n");
804 nir_print_shader(shader
, stderr
);
807 build_primitive_map(shader
, nir_var_shader_in
, &state
.map
);
809 /* Create an output var for vertex_flags. This will be shadowed below,
810 * same way regular outputs get shadowed, and this variable will become a
813 state
.vertex_flags_out
= nir_variable_create(shader
, nir_var_shader_out
,
814 glsl_uint_type(), "vertex_flags");
815 state
.vertex_flags_out
->data
.driver_location
= shader
->num_outputs
++;
816 state
.vertex_flags_out
->data
.location
= VARYING_SLOT_GS_VERTEX_FLAGS_IR3
;
817 state
.vertex_flags_out
->data
.interpolation
= INTERP_MODE_NONE
;
819 nir_function_impl
*impl
= nir_shader_get_entrypoint(shader
);
823 nir_builder_init(&b
, impl
);
824 b
.cursor
= nir_before_cf_list(&impl
->body
);
826 state
.header
= nir_load_gs_header_ir3(&b
);
828 /* Generate two set of shadow vars for the output variables. The first
829 * set replaces the real outputs and the second set (emit_outputs) we'll
830 * assign in the emit_vertex conditionals. Then at the end of the shader
831 * we copy the emit_outputs to the real outputs, so that we get
832 * store_output in uniform control flow.
834 exec_list_make_empty(&state
.old_outputs
);
835 nir_foreach_shader_out_variable_safe(var
, shader
) {
836 exec_node_remove(&var
->node
);
837 exec_list_push_tail(&state
.old_outputs
, &var
->node
);
839 exec_list_make_empty(&state
.new_outputs
);
840 exec_list_make_empty(&state
.emit_outputs
);
841 nir_foreach_variable_in_list(var
, &state
.old_outputs
) {
842 /* Create a new output var by cloning the original output var and
845 nir_variable
*output
= nir_variable_clone(var
, shader
);
846 exec_list_push_tail(&state
.new_outputs
, &output
->node
);
848 /* Rewrite the original output to be a shadow variable. */
849 var
->name
= ralloc_asprintf(var
, "%s@gs-temp", output
->name
);
850 var
->data
.mode
= nir_var_shader_temp
;
852 /* Clone the shadow variable to create the emit shadow variable that
853 * we'll assign in the emit conditionals.
855 nir_variable
*emit_output
= nir_variable_clone(var
, shader
);
856 emit_output
->name
= ralloc_asprintf(var
, "%s@emit-temp", output
->name
);
857 exec_list_push_tail(&state
.emit_outputs
, &emit_output
->node
);
860 /* During the shader we'll keep track of which vertex we're currently
861 * emitting for the EmitVertex test and how many vertices we emitted so we
862 * know to discard if didn't emit any. In most simple shaders, this can
863 * all be statically determined and gets optimized away.
865 state
.vertex_count_var
=
866 nir_local_variable_create(impl
, glsl_uint_type(), "vertex_count");
867 state
.emitted_vertex_var
=
868 nir_local_variable_create(impl
, glsl_uint_type(), "emitted_vertex");
870 /* Initialize to 0. */
871 b
.cursor
= nir_before_cf_list(&impl
->body
);
872 nir_store_var(&b
, state
.vertex_count_var
, nir_imm_int(&b
, 0), 0x1);
873 nir_store_var(&b
, state
.emitted_vertex_var
, nir_imm_int(&b
, 0), 0x1);
874 nir_store_var(&b
, state
.vertex_flags_out
, nir_imm_int(&b
, 4), 0x1);
876 nir_foreach_block_safe (block
, impl
)
877 lower_gs_block(block
, &b
, &state
);
879 set_foreach(impl
->end_block
->predecessors
, block_entry
) {
880 struct nir_block
*block
= (void *)block_entry
->key
;
881 b
.cursor
= nir_after_block_before_jump(block
);
883 nir_intrinsic_instr
*discard_if
=
884 nir_intrinsic_instr_create(b
.shader
, nir_intrinsic_discard_if
);
886 nir_ssa_def
*cond
= nir_ieq(&b
, nir_load_var(&b
, state
.emitted_vertex_var
), nir_imm_int(&b
, 0));
888 discard_if
->src
[0] = nir_src_for_ssa(cond
);
890 nir_builder_instr_insert(&b
, &discard_if
->instr
);
892 foreach_two_lists(dest_node
, &state
.new_outputs
, src_node
, &state
.emit_outputs
) {
893 nir_variable
*dest
= exec_node_data(nir_variable
, dest_node
, node
);
894 nir_variable
*src
= exec_node_data(nir_variable
, src_node
, node
);
895 nir_copy_var(&b
, dest
, src
);
899 exec_list_append(&shader
->variables
, &state
.old_outputs
);
900 exec_list_append(&shader
->variables
, &state
.emit_outputs
);
901 exec_list_append(&shader
->variables
, &state
.new_outputs
);
903 nir_metadata_preserve(impl
, 0);
905 nir_lower_global_vars_to_local(shader
);
906 nir_split_var_copies(shader
);
907 nir_lower_var_copies(shader
);
909 nir_fixup_deref_modes(shader
);
911 if (shader_debug_enabled(shader
->info
.stage
)) {
912 fprintf(stderr
, "NIR (after gs lowering):\n");
913 nir_print_shader(shader
, stderr
);
918 ir3_link_geometry_stages(const struct ir3_shader_variant
*producer
,
919 const struct ir3_shader_variant
*consumer
,
922 uint32_t num_loc
= 0, factor
;
924 switch (consumer
->type
) {
925 case MESA_SHADER_TESS_CTRL
:
926 case MESA_SHADER_GEOMETRY
:
927 /* These stages load with ldlw, which expects byte offsets. */
930 case MESA_SHADER_TESS_EVAL
:
931 /* The tess eval shader uses ldg, which takes dword offsets. */
935 unreachable("bad shader stage");
938 nir_foreach_shader_in_variable(in_var
, consumer
->shader
->nir
) {
939 nir_foreach_shader_out_variable(out_var
, producer
->shader
->nir
) {
940 if (in_var
->data
.location
== out_var
->data
.location
) {
941 locs
[in_var
->data
.driver_location
] =
942 producer
->output_loc
[out_var
->data
.driver_location
] * factor
;
944 debug_assert(num_loc
<= in_var
->data
.driver_location
+ 1);
945 num_loc
= in_var
->data
.driver_location
+ 1;