nir: do not vectorize load/store if offset can overflow and robustness enabled
authorSamuel Pitoiset <samuel.pitoiset@gmail.com>
Mon, 4 May 2020 14:02:38 +0000 (16:02 +0200)
committerMarge Bot <eric+marge@anholt.net>
Mon, 11 May 2020 07:25:15 +0000 (07:25 +0000)
This prevents vectorization for loads/stores that can overflow if
the low offset is negative and the range greater or equal than 0.

The caller can pass the list of variable modes that matter for
robust access.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4881>

src/amd/compiler/aco_instruction_selection_setup.cpp
src/compiler/nir/nir.h
src/compiler/nir/nir_opt_load_store_vectorize.c
src/compiler/nir/tests/load_store_vectorizer_tests.cpp
src/intel/compiler/brw_nir.c

index 28645017b770d4b869c1013628633e71ad450512..f1fde789e7b2787707f2d08c26ce05f361307960 100644 (file)
@@ -998,7 +998,8 @@ setup_nir(isel_context *ctx, nir_shader *nir)
                                     (nir_variable_mode)(nir_var_mem_ssbo | nir_var_mem_ubo |
                                                         nir_var_mem_push_const | nir_var_mem_shared |
                                                         nir_var_mem_global),
-                                    mem_vectorize_callback)) {
+                                    mem_vectorize_callback,
+                                    (nir_variable_mode)0)) {
       lower_to_scalar = true;
       lower_pack = true;
    }
index 04080b5e1dfdb0c3e54e3716f9faa1b823b4688e..25caa370f13277b32d06f237cbb2133a5f3db69d 100644 (file)
@@ -4429,7 +4429,8 @@ typedef bool (*nir_should_vectorize_mem_func)(unsigned align, unsigned bit_size,
                                               nir_intrinsic_instr *low, nir_intrinsic_instr *high);
 
 bool nir_opt_load_store_vectorize(nir_shader *shader, nir_variable_mode modes,
-                                  nir_should_vectorize_mem_func callback);
+                                  nir_should_vectorize_mem_func callback,
+                                  nir_variable_mode robust_modes);
 
 void nir_schedule(nir_shader *shader, int threshold);
 
index c31c8d293bf76598133485d1d5ca0e122481b5b4..814c5a552641edeb539dcb57f2660701b156c2a2 100644 (file)
@@ -187,6 +187,7 @@ struct entry {
 struct vectorize_ctx {
    nir_variable_mode modes;
    nir_should_vectorize_mem_func callback;
+   nir_variable_mode robust_modes;
    struct list_head entries[nir_num_variable_modes];
    struct hash_table *loads[nir_num_variable_modes];
    struct hash_table *stores[nir_num_variable_modes];
@@ -1053,6 +1054,22 @@ check_for_aliasing(struct vectorize_ctx *ctx, struct entry *first, struct entry
    return false;
 }
 
+static bool
+check_for_robustness(struct vectorize_ctx *ctx, struct entry *low)
+{
+   nir_variable_mode mode = get_variable_mode(low);
+   if (mode & ctx->robust_modes) {
+      unsigned low_bit_size = get_bit_size(low);
+      unsigned low_size = low->intrin->num_components * low_bit_size;
+
+      /* don't attempt to vectorize accesses if the offset can overflow. */
+      /* TODO: handle indirect accesses. */
+      return low->offset_signed < 0 && low->offset_signed + low_size >= 0;
+   }
+
+   return false;
+}
+
 static bool
 is_strided_vector(const struct glsl_type *type)
 {
@@ -1077,6 +1094,9 @@ try_vectorize(nir_function_impl *impl, struct vectorize_ctx *ctx,
    if (check_for_aliasing(ctx, first, second))
       return false;
 
+   if (check_for_robustness(ctx, low))
+      return false;
+
    /* we can only vectorize non-volatile loads/stores of the same type and with
     * the same access */
    if (first->info != second->info || first->access != second->access ||
@@ -1327,13 +1347,15 @@ process_block(nir_function_impl *impl, struct vectorize_ctx *ctx, nir_block *blo
 
 bool
 nir_opt_load_store_vectorize(nir_shader *shader, nir_variable_mode modes,
-                             nir_should_vectorize_mem_func callback)
+                             nir_should_vectorize_mem_func callback,
+                             nir_variable_mode robust_modes)
 {
    bool progress = false;
 
    struct vectorize_ctx *ctx = rzalloc(NULL, struct vectorize_ctx);
    ctx->modes = modes;
    ctx->callback = callback;
+   ctx->robust_modes = robust_modes;
 
    nir_index_vars(shader, NULL, modes);
 
index 710a2da6f72852052a9bfe95aa7bd4f2653e0a8b..4001b8d192c9897f6346a89b24bafe0fa833395b 100644 (file)
@@ -38,7 +38,8 @@ protected:
    nir_intrinsic_instr *get_intrinsic(nir_intrinsic_op intrinsic,
                                       unsigned index);
 
-   bool run_vectorizer(nir_variable_mode modes, bool cse=false);
+   bool run_vectorizer(nir_variable_mode modes, bool cse=false,
+                       nir_variable_mode robust_modes = (nir_variable_mode)0);
 
    nir_ssa_def *get_resource(uint32_t binding, bool ssbo);
 
@@ -134,11 +135,13 @@ nir_load_store_vectorize_test::get_intrinsic(nir_intrinsic_op intrinsic,
 }
 
 bool
-nir_load_store_vectorize_test::run_vectorizer(nir_variable_mode modes, bool cse)
+nir_load_store_vectorize_test::run_vectorizer(nir_variable_mode modes,
+                                              bool cse,
+                                              nir_variable_mode robust_modes)
 {
    if (modes & nir_var_mem_shared)
       nir_lower_vars_to_explicit_types(b->shader, nir_var_mem_shared, shared_type_info);
-   bool progress = nir_opt_load_store_vectorize(b->shader, modes, mem_vectorize_callback);
+   bool progress = nir_opt_load_store_vectorize(b->shader, modes, mem_vectorize_callback, robust_modes);
    if (progress) {
       nir_validate_shader(b->shader, NULL);
       if (cse)
@@ -1773,3 +1776,16 @@ TEST_F(nir_load_store_vectorize_test, ssbo_load_distant_indirect_64bit)
 
    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
 }
+
+TEST_F(nir_load_store_vectorize_test, ssbo_offset_overflow_robust)
+{
+   create_load(nir_var_mem_ssbo, 0, 0xfffffffc, 0x1);
+   create_load(nir_var_mem_ssbo, 0, 0x0, 0x2);
+
+   nir_validate_shader(b->shader, NULL);
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
+
+   EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo, false, nir_var_mem_ssbo));
+
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
+}
index e59b3ac4269b608cec2ba0ff14943565352a75a6..7a1fea10aa2e7eed5ac797f2535e1a18446cb48b 100644 (file)
@@ -881,7 +881,8 @@ brw_vectorize_lower_mem_access(nir_shader *nir,
       OPT(nir_opt_load_store_vectorize,
           nir_var_mem_ubo | nir_var_mem_ssbo |
           nir_var_mem_global | nir_var_mem_shared,
-          brw_nir_should_vectorize_mem);
+          brw_nir_should_vectorize_mem,
+          (nir_variable_mode)0);
    }
 
    OPT(brw_nir_lower_mem_access_bit_sizes, devinfo);