From 65e8761474ca8c9c0cce167cb32b720c3cc25a90 Mon Sep 17 00:00:00 2001 From: Caio Marcelo de Oliveira Filho Date: Mon, 11 Mar 2019 09:43:04 -0700 Subject: [PATCH] intel/nir: Combine store_derefs to improve code from SPIR-V Due to lack of write mask in SPIR-V store, generators may produce multiple stores to the same vector but using different array derefs. Use the combining store pass to clean this up. For example, layout(binding = 3) buffer block { vec4 v; }; void main() { v.x = 11; v.y = 22; } after going to SPIR-V and NIR, ends up with in two store_derefs to v[0] and v[1] vec2 32 ssa_4 = deref_struct &ssa_3->field0 (ssbo vec4) /* &((block *)ssa_2)->field0 */ vec2 32 ssa_6 = deref_array &(*ssa_4)[0] (ssbo float) /* &((block *)ssa_2)->field0[0] */ intrinsic store_deref (ssa_6, ssa_7) (1, 0) /* wrmask=x */ /* access=0 */ vec1 32 ssa_13 = load_const (0x00000001 /* 0.000000 */) vec2 32 ssa_14 = deref_array &(*ssa_4)[1] (ssbo float) /* &((block *)ssa_2)->field0[1] */ intrinsic store_deref (ssa_14, ssa_15) (1, 0) /* wrmask=x */ /* access=0 */ producing two different sends instructions in skl. The combining pass transform the snippet above into vec2 32 ssa_4 = deref_struct &ssa_3->field0 (ssbo vec4) /* &((block *)ssa_2)->field0 */ vec4 32 ssa_18 = vec4 ssa_7, ssa_15, ssa_16, ssa_17 intrinsic store_deref (ssa_4, ssa_18) (3, 0) /* wrmask=xy */ /* access=0 */ producing a single sends instruction. v2: Move this from spirv_to_nir into the general optimization pass for intel compiler. (Jason) Reviewed-by: Jason Ekstrand --- src/intel/compiler/brw_nir.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c index 1b99a3b2316..5734987a964 100644 --- a/src/intel/compiler/brw_nir.c +++ b/src/intel/compiler/brw_nir.c @@ -570,6 +570,7 @@ brw_nir_optimize(nir_shader *nir, const struct brw_compiler *compiler, OPT(nir_copy_prop); OPT(nir_opt_dce); OPT(nir_opt_cse); + OPT(nir_opt_combine_stores, nir_var_all); /* Passing 0 to the peephole select pass causes it to convert * if-statements that contain only move instructions in the branches -- 2.30.2