nir/opt_vectorize: Add a callback for filtering of vectorizing.
authorEric Anholt <eric@anholt.net>
Thu, 27 Aug 2020 19:49:13 +0000 (12:49 -0700)
committerEric Anholt <eric@anholt.net>
Wed, 2 Sep 2020 16:59:17 +0000 (09:59 -0700)
For NIR-to-TGSI, we don't want to revectorize 64-bit ops that we split to
scalar beyond vec2 width.  We even have some ops that we would rather
retain as scalar due to TGSI opcodes being scalar, or having more unusual
requirements.

This could be used to do the vectorize_vec2_16bit filtering, but that
shader compiler option is also used in algebraic so leave it in place for
now.

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6567>

src/compiler/nir/nir.h
src/compiler/nir/nir_opt_vectorize.c
src/gallium/drivers/etnaviv/etnaviv_compiler_nir.c
src/gallium/drivers/lima/lima_program.c
src/gallium/drivers/r600/r600_shader.c
src/gallium/drivers/r600/sfn/sfn_nir.cpp
src/panfrost/midgard/midgard_compile.c

index cbf5c0746c351fe8e8ad6b2b1765db51bcd45344..1d1bb6c958470db3b21d30c5dc50ad8a3c55b2ad 100644 (file)
@@ -4780,7 +4780,10 @@ bool nir_opt_trivial_continues(nir_shader *shader);
 
 bool nir_opt_undef(nir_shader *shader);
 
-bool nir_opt_vectorize(nir_shader *shader);
+typedef bool (*nir_opt_vectorize_cb)(const nir_instr *a, const nir_instr *b,
+                                     void *data);
+bool nir_opt_vectorize(nir_shader *shader, nir_opt_vectorize_cb filter,
+                       void *data);
 
 bool nir_opt_conditional_discard(nir_shader *shader);
 
index 3bf65151e0d978ad70ae4e8b63765a9c598c1cda..1d372ead9fbcbdc7011d00f1fe05f36bbe07eee7 100644 (file)
@@ -162,7 +162,8 @@ instr_can_rewrite(nir_instr *instr)
  */
 
 static nir_instr *
-instr_try_combine(struct nir_shader *nir, nir_instr *instr1, nir_instr *instr2)
+instr_try_combine(struct nir_shader *nir, nir_instr *instr1, nir_instr *instr2,
+                  nir_opt_vectorize_cb filter, void *data)
 {
    assert(instr1->type == nir_instr_type_alu);
    assert(instr2->type == nir_instr_type_alu);
@@ -181,6 +182,9 @@ instr_try_combine(struct nir_shader *nir, nir_instr *instr1, nir_instr *instr2)
        (total_components > 2 || alu1->dest.dest.ssa.bit_size != 16))
       return NULL;
 
+   if (filter && !filter(&alu1->instr, &alu2->instr, data))
+      return NULL;
+
    nir_builder b;
    nir_builder_init(&b, nir_cf_node_get_function(&instr1->block->cf_node));
    b.cursor = nir_after_instr(instr1);
@@ -320,13 +324,15 @@ vec_instr_stack_create(void *mem_ctx)
 
 static bool
 vec_instr_stack_push(struct nir_shader *nir, struct util_dynarray *stack,
-                     nir_instr *instr)
+                     nir_instr *instr,
+                     nir_opt_vectorize_cb filter, void *data)
 {
    /* Walk the stack from child to parent to make live ranges shorter by
     * matching the closest thing we can
     */
    util_dynarray_foreach_reverse(stack, nir_instr *, stack_instr) {
-      nir_instr *new_instr = instr_try_combine(nir, *stack_instr, instr);
+      nir_instr *new_instr = instr_try_combine(nir, *stack_instr, instr,
+                                               filter, data);
       if (new_instr) {
          *stack_instr = new_instr;
          return true;
@@ -378,20 +384,21 @@ vec_instr_set_destroy(struct set *instr_set)
 
 static bool
 vec_instr_set_add_or_rewrite(struct nir_shader *nir, struct set *instr_set,
-                             nir_instr *instr)
+                             nir_instr *instr,
+                             nir_opt_vectorize_cb filter, void *data)
 {
    if (!instr_can_rewrite(instr))
       return false;
 
    struct util_dynarray *new_stack = vec_instr_stack_create(instr_set);
-   vec_instr_stack_push(nir, new_stack, instr);
+   vec_instr_stack_push(nir, new_stack, instr, filter, data);
 
    struct set_entry *entry = _mesa_set_search(instr_set, new_stack);
 
    if (entry) {
       ralloc_free(new_stack);
       struct util_dynarray *stack = (struct util_dynarray *) entry->key;
-      return vec_instr_stack_push(nir, stack, instr);
+      return vec_instr_stack_push(nir, stack, instr, filter, data);
    }
 
    _mesa_set_add(instr_set, new_stack);
@@ -400,7 +407,7 @@ vec_instr_set_add_or_rewrite(struct nir_shader *nir, struct set *instr_set,
 
 static void
 vec_instr_set_remove(struct nir_shader *nir, struct set *instr_set,
-                     nir_instr *instr)
+                     nir_instr *instr, nir_opt_vectorize_cb filter, void *data)
 {
    if (!instr_can_rewrite(instr))
       return;
@@ -417,7 +424,7 @@ vec_instr_set_remove(struct nir_shader *nir, struct set *instr_set,
     * comparison function as well.
     */
    struct util_dynarray *temp = vec_instr_stack_create(instr_set);
-   vec_instr_stack_push(nir, temp, instr);
+   vec_instr_stack_push(nir, temp, instr, filter, data);
    struct set_entry *entry = _mesa_set_search(instr_set, temp);
    ralloc_free(temp);
 
@@ -433,34 +440,37 @@ vec_instr_set_remove(struct nir_shader *nir, struct set *instr_set,
 
 static bool
 vectorize_block(struct nir_shader *nir, nir_block *block,
-                struct set *instr_set)
+                struct set *instr_set,
+                nir_opt_vectorize_cb filter, void *data)
 {
    bool progress = false;
 
    nir_foreach_instr_safe(instr, block) {
-      if (vec_instr_set_add_or_rewrite(nir, instr_set, instr))
+      if (vec_instr_set_add_or_rewrite(nir, instr_set, instr, filter, data))
          progress = true;
    }
 
    for (unsigned i = 0; i < block->num_dom_children; i++) {
       nir_block *child = block->dom_children[i];
-      progress |= vectorize_block(nir, child, instr_set);
+      progress |= vectorize_block(nir, child, instr_set, filter, data);
    }
 
    nir_foreach_instr_reverse(instr, block)
-      vec_instr_set_remove(nir, instr_set, instr);
+      vec_instr_set_remove(nir, instr_set, instr, filter, data);
 
    return progress;
 }
 
 static bool
-nir_opt_vectorize_impl(struct nir_shader *nir, nir_function_impl *impl)
+nir_opt_vectorize_impl(struct nir_shader *nir, nir_function_impl *impl,
+                       nir_opt_vectorize_cb filter, void *data)
 {
    struct set *instr_set = vec_instr_set_create();
 
    nir_metadata_require(impl, nir_metadata_dominance);
 
-   bool progress = vectorize_block(nir, nir_start_block(impl), instr_set);
+   bool progress = vectorize_block(nir, nir_start_block(impl), instr_set,
+                                   filter, data);
 
    if (progress)
       nir_metadata_preserve(impl, nir_metadata_block_index |
@@ -471,13 +481,14 @@ nir_opt_vectorize_impl(struct nir_shader *nir, nir_function_impl *impl)
 }
 
 bool
-nir_opt_vectorize(nir_shader *shader)
+nir_opt_vectorize(nir_shader *shader, nir_opt_vectorize_cb filter,
+                  void *data)
 {
    bool progress = false;
 
    nir_foreach_function(function, shader) {
       if (function->impl)
-         progress |= nir_opt_vectorize_impl(shader, function->impl);
+         progress |= nir_opt_vectorize_impl(shader, function->impl, filter, data);
    }
 
    return progress;
index 79ef97cdbde4ab10524622bb372b288a21cc1e25..b09a720e43cb4992f13bec6977f870de5d8cf736 100644 (file)
@@ -1122,7 +1122,7 @@ etna_compile_shader_nir(struct etna_shader_variant *v)
    if (DBG_ENABLED(ETNA_DBG_DUMP_SHADERS))
       nir_print_shader(s, stdout);
 
-   while( OPT(s, nir_opt_vectorize) );
+   while( OPT(s, nir_opt_vectorize, NULL, NULL) );
    NIR_PASS_V(s, nir_lower_alu_to_scalar, etna_alu_to_scalar_filter_cb, specs);
 
    NIR_PASS_V(s, nir_remove_dead_variables, nir_var_function_temp, NULL);
index 1bb1bd88622eb280f9a9fec0114dd1060a0c1aa9..87029d3140a8d4ce7ffde72f4f5c6dd79f3a37fe 100644 (file)
@@ -201,7 +201,7 @@ lima_program_optimize_fs_nir(struct nir_shader *s,
 
    do {
       progress = false;
-      NIR_PASS(progress, s, nir_opt_vectorize);
+      NIR_PASS(progress, s, nir_opt_vectorize, NULL, NULL);
    } while (progress);
 
    do {
index b7dab54daab55a795b84a6a2727a392015173017..9ac0efad7e466532d60a7c6fa824586a2e2e200e 100644 (file)
@@ -201,7 +201,7 @@ int r600_pipe_shader_create(struct pipe_context *ctx,
                                NIR_PASS_V(sel->nir, nir_lower_regs_to_ssa);
                                NIR_PASS_V(sel->nir, nir_lower_alu_to_scalar, NULL, NULL);
                                NIR_PASS_V(sel->nir, nir_lower_int64);
-                               NIR_PASS_V(sel->nir, nir_opt_vectorize);
+                               NIR_PASS_V(sel->nir, nir_opt_vectorize, NULL, NULL);
                        }
                        NIR_PASS_V(sel->nir, nir_lower_flrp, ~0, false, false);
                }
index cbaaa7eb147910feef657cd392c7f498be03a7e0..9f75726f2aca35331f720aaa57fb464a0daa8ca6 100644 (file)
@@ -768,7 +768,7 @@ optimize_once(nir_shader *shader)
    NIR_PASS(progress, shader, nir_opt_algebraic);
    NIR_PASS(progress, shader, nir_opt_constant_folding);
    NIR_PASS(progress, shader, nir_opt_copy_prop_vars);
-   NIR_PASS(progress, shader, nir_opt_vectorize);
+   NIR_PASS(progress, shader, nir_opt_vectorize, NULL, NULL);
 
    NIR_PASS(progress, shader, nir_opt_remove_phis);
 
index a58701de75fbd9ac552da32afeab8ded29625ed1..bd13efd0ecad2618a94323fe0736f6e902dfa1c9 100644 (file)
@@ -558,7 +558,7 @@ optimise_nir(nir_shader *nir, unsigned quirks, bool is_blend)
                          nir_var_shader_out |
                          nir_var_function_temp);
 
-                NIR_PASS(progress, nir, nir_opt_vectorize);
+                NIR_PASS(progress, nir, nir_opt_vectorize, NULL, NULL);
         } while (progress);
 
         NIR_PASS_V(nir, nir_lower_alu_to_scalar, mdg_is_64, NULL);