Added few more stubs so that control reaches to DestroyDevice().
[mesa.git] / src / intel / compiler / brw_nir_lower_mem_access_bit_sizes.c
index 19abc16a9c58dc16e94f28fed0299aec7ef6e6a4..4ea20fe5b18002ba4b27a60323463d6935242009 100644 (file)
@@ -86,7 +86,7 @@ lower_mem_load_bit_size(nir_builder *b, nir_intrinsic_instr *intrin,
    const unsigned bytes_read = num_components * (bit_size / 8);
    const unsigned align = nir_intrinsic_align(intrin);
 
-   if (bit_size == 32 && align >= 32 &&
+   if (bit_size == 32 && align >= 32 && intrin->num_components <= 4 &&
        (!needs_scalar || intrin->num_components == 1))
       return false;
 
@@ -109,8 +109,10 @@ lower_mem_load_bit_size(nir_builder *b, nir_intrinsic_instr *intrin,
       result = nir_extract_bits(b, &load, 1, load_offset * 8,
                                 num_components, bit_size);
    } else {
-      /* Otherwise, we have to break it into smaller loads */
-      nir_ssa_def *loads[8];
+      /* Otherwise, we have to break it into smaller loads.  We could end up
+       * with as many as 32 loads if we're loading a u64vec16 from scratch.
+       */
+      nir_ssa_def *loads[32];
       unsigned num_loads = 0;
       int load_offset = 0;
       while (load_offset < bytes_read) {
@@ -167,7 +169,7 @@ lower_mem_store_bit_size(nir_builder *b, nir_intrinsic_instr *intrin,
    assert(writemask < (1 << num_components));
 
    if ((value->bit_size <= 32 && num_components == 1) ||
-       (value->bit_size == 32 && align >= 32 &&
+       (value->bit_size == 32 && num_components <= 4 && align >= 32 &&
         writemask == (1 << num_components) - 1 &&
         !needs_scalar))
       return false;
@@ -275,6 +277,8 @@ lower_mem_access_bit_sizes_impl(nir_function_impl *impl,
    if (progress) {
       nir_metadata_preserve(impl, nir_metadata_block_index |
                                   nir_metadata_dominance);
+   } else {
+      nir_metadata_preserve(impl, nir_metadata_all);
    }
 
    return progress;