nir: Shrink store intrinsic num_components to the size used by the writemask.
authorEric Anholt <eric@anholt.net>
Thu, 23 Jul 2020 21:32:13 +0000 (14:32 -0700)
committerMarge Bot <eric+marge@anholt.net>
Thu, 20 Aug 2020 16:44:08 +0000 (16:44 +0000)
This cuts a bunch of vector setup for undef components in the i965 vec4
backend.  Noticed while looking into codegen regressions in nir-to-tgsi.

brw results:
total instructions in shared programs: 3893221 -> 3881461 (-0.30%)
total cycles in shared programs: 113792154 -> 113810288 (0.02%)

Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6054>

src/compiler/nir/nir_opt_shrink_vectors.c

index 8b01f9f4817e157208cdef796d3b3cd9db2f0cc5..0790138a74934f771e24fe9fa11258a25322d875 100644 (file)
@@ -110,7 +110,7 @@ opt_shrink_vectors_alu(nir_builder *b, nir_alu_instr *instr)
 }
 
 static bool
-opt_shrink_vectors_intrinsic(nir_intrinsic_instr *instr)
+opt_shrink_vectors_intrinsic(nir_builder *b, nir_intrinsic_instr *instr)
 {
    switch (instr->intrinsic) {
    case nir_intrinsic_load_uniform:
@@ -125,18 +125,43 @@ opt_shrink_vectors_intrinsic(nir_intrinsic_instr *instr)
    case nir_intrinsic_load_global:
    case nir_intrinsic_load_kernel_input:
    case nir_intrinsic_load_scratch:
+   case nir_intrinsic_store_output:
+   case nir_intrinsic_store_per_vertex_output:
+   case nir_intrinsic_store_ssbo:
+   case nir_intrinsic_store_shared:
+   case nir_intrinsic_store_global:
+   case nir_intrinsic_store_scratch:
       break;
    default:
       return false;
    }
 
-   assert(nir_intrinsic_infos[instr->intrinsic].has_dest);
    /* Must be a vectorized intrinsic that we can resize. */
    assert(instr->num_components != 0);
 
-   if (shrink_dest_to_read_mask(&instr->dest.ssa)) {
-      instr->num_components = instr->dest.ssa.num_components;
-      return true;
+   if (nir_intrinsic_infos[instr->intrinsic].has_dest) {
+      /* loads: Trim the dest to the used channels */
+
+      if (shrink_dest_to_read_mask(&instr->dest.ssa)) {
+         instr->num_components = instr->dest.ssa.num_components;
+         return true;
+      }
+   } else {
+      /* Stores: trim the num_components stored according to the write
+       * mask.
+       */
+      unsigned write_mask = nir_intrinsic_write_mask(instr);
+      unsigned last_bit = util_last_bit(write_mask);
+      if (last_bit < instr->num_components && instr->src[0].is_ssa) {
+         nir_ssa_def *def = nir_channels(b, instr->src[0].ssa,
+                                         BITSET_MASK(last_bit));
+         nir_instr_rewrite_src(&instr->instr,
+                               &instr->src[0],
+                               nir_src_for_ssa(def));
+         instr->num_components = last_bit;
+
+         return true;
+      }
    }
 
    return false;
@@ -164,7 +189,7 @@ opt_shrink_vectors_instr(nir_builder *b, nir_instr *instr)
       return opt_shrink_vectors_alu(b, nir_instr_as_alu(instr));
 
    case nir_instr_type_intrinsic:
-      return opt_shrink_vectors_intrinsic(nir_instr_as_intrinsic(instr));
+      return opt_shrink_vectors_intrinsic(b, nir_instr_as_intrinsic(instr));
 
    case nir_instr_type_load_const:
       return opt_shrink_vectors_load_const(nir_instr_as_load_const(instr));