aco: improve vectorization of 8/16-bit loads/stores
authorRhys Perry <pendingchaos02@gmail.com>
Mon, 25 May 2020 14:36:12 +0000 (15:36 +0100)
committerMarge Bot <eric+marge@anholt.net>
Wed, 24 Jun 2020 10:52:28 +0000 (10:52 +0000)
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5207>

src/amd/compiler/aco_instruction_selection_setup.cpp

index d60cd69445db1a239cbeeda8a2457cfe1c8fd79c..0cc4a558850b67e350938890f6c09f1e6dbb5fcd 100644 (file)
@@ -896,7 +896,7 @@ mem_vectorize_callback(unsigned align, unsigned bit_size,
                        unsigned num_components, unsigned high_offset,
                        nir_intrinsic_instr *low, nir_intrinsic_instr *high)
 {
-   if ((bit_size != 32 && bit_size != 64) || num_components > 4)
+   if (num_components > 4)
       return false;
 
    /* >128 bit loads are split except with SMEM */
@@ -906,17 +906,11 @@ mem_vectorize_callback(unsigned align, unsigned bit_size,
    switch (low->intrinsic) {
    case nir_intrinsic_load_global:
    case nir_intrinsic_store_global:
-      return align % 4 == 0;
    case nir_intrinsic_store_ssbo:
-      if (low->src[0].ssa->bit_size < 32 || high->src[0].ssa->bit_size < 32)
-         return false;
-      return align % 4 == 0;
    case nir_intrinsic_load_ssbo:
-      if (low->dest.ssa.bit_size < 32 || high->dest.ssa.bit_size < 32)
-         return false;
    case nir_intrinsic_load_ubo:
    case nir_intrinsic_load_push_constant:
-      return align % 4 == 0;
+      return align % (bit_size == 8 ? 2 : 4) == 0;
    case nir_intrinsic_load_deref:
    case nir_intrinsic_store_deref:
       assert(nir_src_as_deref(low->src[0])->mode == nir_var_mem_shared);
@@ -926,7 +920,7 @@ mem_vectorize_callback(unsigned align, unsigned bit_size,
       if (bit_size * num_components > 64) /* 96 and 128 bit loads require 128 bit alignment and are split otherwise */
          return align % 16 == 0;
       else
-         return align % 4 == 0;
+         return align % (bit_size == 8 ? 2 : 4) == 0;
    default:
       return false;
    }