fossil-db (LLVM, Navi):
Totals from 843 (0.62% of 135820) affected shaders:
SGPRs: 40456 -> 40480 (+0.06%); split: -0.10%, +0.16%
VGPRs: 39648 -> 39688 (+0.10%); split: -0.01%, +0.11%
CodeSize:
2936164 ->
2932508 (-0.12%); split: -0.21%, +0.09%
MaxWaves: 10828 -> 10827 (-0.01%)
fossil-db changes seem to be due to SPIR-V -> NIR emitting a workgroup
scope shared memory barrier instead of a group_memory_barrier.
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5980>
nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
nir_builder_instr_insert(&b, &tex->instr);
nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
nir_builder_instr_insert(&b, &tex->instr);
- nir_intrinsic_instr *membar = nir_intrinsic_instr_create(b.shader, nir_intrinsic_memory_barrier);
- nir_builder_instr_insert(&b, &membar->instr);
-
- nir_intrinsic_instr *bar = nir_intrinsic_instr_create(b.shader, nir_intrinsic_control_barrier);
- nir_builder_instr_insert(&b, &bar->instr);
+ nir_scoped_barrier(&b, NIR_SCOPE_WORKGROUP, NIR_SCOPE_WORKGROUP,
+ NIR_MEMORY_ACQ_REL, nir_var_mem_ssbo);
nir_ssa_def *outval = &tex->dest.ssa;
nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_store);
nir_ssa_def *outval = &tex->dest.ssa;
nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_store);
.lower_fpow = true,
.lower_mul_2x32_64 = true,
.lower_rotate = true,
.lower_fpow = true,
.lower_mul_2x32_64 = true,
.lower_rotate = true,
+ .use_scoped_barrier = true,
.max_unroll_iterations = 32,
.use_interpolated_input_intrinsics = true,
/* nir_lower_int64() isn't actually called for the LLVM backend, but
.max_unroll_iterations = 32,
.use_interpolated_input_intrinsics = true,
/* nir_lower_int64() isn't actually called for the LLVM backend, but
.lower_fpow = true,
.lower_mul_2x32_64 = true,
.lower_rotate = true,
.lower_fpow = true,
.lower_mul_2x32_64 = true,
.lower_rotate = true,
+ .use_scoped_barrier = true,
.max_unroll_iterations = 32,
.use_interpolated_input_intrinsics = true,
.lower_int64_options = nir_lower_imul64 |
.max_unroll_iterations = 32,
.use_interpolated_input_intrinsics = true,
.lower_int64_options = nir_lower_imul64 |
nir_lower_dsqrt |
nir_lower_drsq |
nir_lower_ddiv,
nir_lower_dsqrt |
nir_lower_drsq |
nir_lower_ddiv,
- .use_scoped_barrier = true,