intel/nir: Allow splitting a single load into up to 32 loads
[mesa.git] / src / intel / compiler / brw_nir_lower_mem_access_bit_sizes.c
index ef9aa206b445e585da8c59738e8a903f5bd8848f..c26ea0bb7783c3633d22cdd1b322dc64e26ab9ce 100644 (file)
@@ -81,15 +81,15 @@ lower_mem_load_bit_size(nir_builder *b, nir_intrinsic_instr *intrin,
       intrin->intrinsic == nir_intrinsic_load_scratch;
 
    assert(intrin->dest.is_ssa);
-   if (intrin->dest.ssa.bit_size == 32 &&
-       (!needs_scalar || intrin->num_components == 1))
-      return false;
-
    const unsigned bit_size = intrin->dest.ssa.bit_size;
    const unsigned num_components = intrin->dest.ssa.num_components;
    const unsigned bytes_read = num_components * (bit_size / 8);
    const unsigned align = nir_intrinsic_align(intrin);
 
+   if (bit_size == 32 && align >= 32 &&
+       (!needs_scalar || intrin->num_components == 1))
+      return false;
+
    nir_ssa_def *result;
    nir_src *offset_src = nir_get_io_offset_src(intrin);
    if (bit_size < 32 && nir_src_is_const(*offset_src)) {
@@ -109,8 +109,10 @@ lower_mem_load_bit_size(nir_builder *b, nir_intrinsic_instr *intrin,
       result = nir_extract_bits(b, &load, 1, load_offset * 8,
                                 num_components, bit_size);
    } else {
-      /* Otherwise, we have to break it into smaller loads */
-      nir_ssa_def *loads[8];
+      /* Otherwise, we have to break it into smaller loads.  We could end up
+       * with as many as 32 loads if we're loading a u64vec16 from scratch.
+       */
+      nir_ssa_def *loads[32];
       unsigned num_loads = 0;
       int load_offset = 0;
       while (load_offset < bytes_read) {
@@ -167,7 +169,7 @@ lower_mem_store_bit_size(nir_builder *b, nir_intrinsic_instr *intrin,
    assert(writemask < (1 << num_components));
 
    if ((value->bit_size <= 32 && num_components == 1) ||
-       (value->bit_size == 32 &&
+       (value->bit_size == 32 && align >= 32 &&
         writemask == (1 << num_components) - 1 &&
         !needs_scalar))
       return false;
@@ -275,6 +277,8 @@ lower_mem_access_bit_sizes_impl(nir_function_impl *impl,
    if (progress) {
       nir_metadata_preserve(impl, nir_metadata_block_index |
                                   nir_metadata_dominance);
+   } else {
+      nir_metadata_preserve(impl, nir_metadata_all);
    }
 
    return progress;