From: Caio Marcelo de Oliveira Filho Date: Fri, 21 Feb 2020 18:58:48 +0000 (-0800) Subject: intel/fs: Combine adjacent memory barriers X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=7d54b84d49de991188a6a91bbadf00e89654f2c0;p=mesa.git intel/fs: Combine adjacent memory barriers This will avoid generating multiple identical fences in a row. For Gen11+ we have multiple types of fences (affecting different variable modes), but is still better to combine them in a single scoped barrier so that the translation to backend IR have the option of dispatching both fences in parallel. This will clean up redundant barriers from various dEQP-VK.memory_model.* tests. Reviewed-by: Jason Ekstrand Tested-by: Marge Bot Part-of: --- diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c index 6c6e72208ac..9e8ae3b52a1 100644 --- a/src/intel/compiler/brw_nir.c +++ b/src/intel/compiler/brw_nir.c @@ -824,6 +824,26 @@ brw_nir_link_shaders(const struct brw_compiler *compiler, } } +static +bool combine_all_barriers(nir_intrinsic_instr *a, + nir_intrinsic_instr *b, + void *data) +{ + /* Translation to backend IR will get rid of modes we don't care about, so + * no harm in always combining them. + * + * TODO: While HW has only ACQUIRE|RELEASE fences, we could improve the + * scheduling so that it can take advantage of the different semantics. + */ + nir_intrinsic_set_memory_modes(a, nir_intrinsic_memory_modes(a) | + nir_intrinsic_memory_modes(b)); + nir_intrinsic_set_memory_semantics(a, nir_intrinsic_memory_semantics(a) | + nir_intrinsic_memory_semantics(b)); + nir_intrinsic_set_memory_scope(a, MAX2(nir_intrinsic_memory_scope(a), + nir_intrinsic_memory_scope(b))); + return true; +} + /* Prepare the given shader for codegen * * This function is intended to be called right before going into the actual @@ -843,6 +863,8 @@ brw_postprocess_nir(nir_shader *nir, const struct brw_compiler *compiler, OPT(brw_nir_lower_mem_access_bit_sizes, devinfo); + OPT(nir_opt_combine_memory_barriers, combine_all_barriers, NULL); + do { progress = false; OPT(nir_opt_algebraic_before_ffma);