For NIR-to-TGSI, we don't want to revectorize 64-bit ops that we split to
scalar beyond vec2 width. We even have some ops that we would rather
retain as scalar due to TGSI opcodes being scalar, or having more unusual
requirements.
This could be used to do the vectorize_vec2_16bit filtering, but that
shader compiler option is also used in algebraic so leave it in place for
now.
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6567>
bool nir_opt_undef(nir_shader *shader);
bool nir_opt_undef(nir_shader *shader);
-bool nir_opt_vectorize(nir_shader *shader);
+typedef bool (*nir_opt_vectorize_cb)(const nir_instr *a, const nir_instr *b,
+ void *data);
+bool nir_opt_vectorize(nir_shader *shader, nir_opt_vectorize_cb filter,
+ void *data);
bool nir_opt_conditional_discard(nir_shader *shader);
bool nir_opt_conditional_discard(nir_shader *shader);
-instr_try_combine(struct nir_shader *nir, nir_instr *instr1, nir_instr *instr2)
+instr_try_combine(struct nir_shader *nir, nir_instr *instr1, nir_instr *instr2,
+ nir_opt_vectorize_cb filter, void *data)
{
assert(instr1->type == nir_instr_type_alu);
assert(instr2->type == nir_instr_type_alu);
{
assert(instr1->type == nir_instr_type_alu);
assert(instr2->type == nir_instr_type_alu);
(total_components > 2 || alu1->dest.dest.ssa.bit_size != 16))
return NULL;
(total_components > 2 || alu1->dest.dest.ssa.bit_size != 16))
return NULL;
+ if (filter && !filter(&alu1->instr, &alu2->instr, data))
+ return NULL;
+
nir_builder b;
nir_builder_init(&b, nir_cf_node_get_function(&instr1->block->cf_node));
b.cursor = nir_after_instr(instr1);
nir_builder b;
nir_builder_init(&b, nir_cf_node_get_function(&instr1->block->cf_node));
b.cursor = nir_after_instr(instr1);
static bool
vec_instr_stack_push(struct nir_shader *nir, struct util_dynarray *stack,
static bool
vec_instr_stack_push(struct nir_shader *nir, struct util_dynarray *stack,
+ nir_instr *instr,
+ nir_opt_vectorize_cb filter, void *data)
{
/* Walk the stack from child to parent to make live ranges shorter by
* matching the closest thing we can
*/
util_dynarray_foreach_reverse(stack, nir_instr *, stack_instr) {
{
/* Walk the stack from child to parent to make live ranges shorter by
* matching the closest thing we can
*/
util_dynarray_foreach_reverse(stack, nir_instr *, stack_instr) {
- nir_instr *new_instr = instr_try_combine(nir, *stack_instr, instr);
+ nir_instr *new_instr = instr_try_combine(nir, *stack_instr, instr,
+ filter, data);
if (new_instr) {
*stack_instr = new_instr;
return true;
if (new_instr) {
*stack_instr = new_instr;
return true;
static bool
vec_instr_set_add_or_rewrite(struct nir_shader *nir, struct set *instr_set,
static bool
vec_instr_set_add_or_rewrite(struct nir_shader *nir, struct set *instr_set,
+ nir_instr *instr,
+ nir_opt_vectorize_cb filter, void *data)
{
if (!instr_can_rewrite(instr))
return false;
struct util_dynarray *new_stack = vec_instr_stack_create(instr_set);
{
if (!instr_can_rewrite(instr))
return false;
struct util_dynarray *new_stack = vec_instr_stack_create(instr_set);
- vec_instr_stack_push(nir, new_stack, instr);
+ vec_instr_stack_push(nir, new_stack, instr, filter, data);
struct set_entry *entry = _mesa_set_search(instr_set, new_stack);
if (entry) {
ralloc_free(new_stack);
struct util_dynarray *stack = (struct util_dynarray *) entry->key;
struct set_entry *entry = _mesa_set_search(instr_set, new_stack);
if (entry) {
ralloc_free(new_stack);
struct util_dynarray *stack = (struct util_dynarray *) entry->key;
- return vec_instr_stack_push(nir, stack, instr);
+ return vec_instr_stack_push(nir, stack, instr, filter, data);
}
_mesa_set_add(instr_set, new_stack);
}
_mesa_set_add(instr_set, new_stack);
static void
vec_instr_set_remove(struct nir_shader *nir, struct set *instr_set,
static void
vec_instr_set_remove(struct nir_shader *nir, struct set *instr_set,
+ nir_instr *instr, nir_opt_vectorize_cb filter, void *data)
{
if (!instr_can_rewrite(instr))
return;
{
if (!instr_can_rewrite(instr))
return;
* comparison function as well.
*/
struct util_dynarray *temp = vec_instr_stack_create(instr_set);
* comparison function as well.
*/
struct util_dynarray *temp = vec_instr_stack_create(instr_set);
- vec_instr_stack_push(nir, temp, instr);
+ vec_instr_stack_push(nir, temp, instr, filter, data);
struct set_entry *entry = _mesa_set_search(instr_set, temp);
ralloc_free(temp);
struct set_entry *entry = _mesa_set_search(instr_set, temp);
ralloc_free(temp);
static bool
vectorize_block(struct nir_shader *nir, nir_block *block,
static bool
vectorize_block(struct nir_shader *nir, nir_block *block,
+ struct set *instr_set,
+ nir_opt_vectorize_cb filter, void *data)
{
bool progress = false;
nir_foreach_instr_safe(instr, block) {
{
bool progress = false;
nir_foreach_instr_safe(instr, block) {
- if (vec_instr_set_add_or_rewrite(nir, instr_set, instr))
+ if (vec_instr_set_add_or_rewrite(nir, instr_set, instr, filter, data))
progress = true;
}
for (unsigned i = 0; i < block->num_dom_children; i++) {
nir_block *child = block->dom_children[i];
progress = true;
}
for (unsigned i = 0; i < block->num_dom_children; i++) {
nir_block *child = block->dom_children[i];
- progress |= vectorize_block(nir, child, instr_set);
+ progress |= vectorize_block(nir, child, instr_set, filter, data);
}
nir_foreach_instr_reverse(instr, block)
}
nir_foreach_instr_reverse(instr, block)
- vec_instr_set_remove(nir, instr_set, instr);
+ vec_instr_set_remove(nir, instr_set, instr, filter, data);
return progress;
}
static bool
return progress;
}
static bool
-nir_opt_vectorize_impl(struct nir_shader *nir, nir_function_impl *impl)
+nir_opt_vectorize_impl(struct nir_shader *nir, nir_function_impl *impl,
+ nir_opt_vectorize_cb filter, void *data)
{
struct set *instr_set = vec_instr_set_create();
nir_metadata_require(impl, nir_metadata_dominance);
{
struct set *instr_set = vec_instr_set_create();
nir_metadata_require(impl, nir_metadata_dominance);
- bool progress = vectorize_block(nir, nir_start_block(impl), instr_set);
+ bool progress = vectorize_block(nir, nir_start_block(impl), instr_set,
+ filter, data);
if (progress)
nir_metadata_preserve(impl, nir_metadata_block_index |
if (progress)
nir_metadata_preserve(impl, nir_metadata_block_index |
-nir_opt_vectorize(nir_shader *shader)
+nir_opt_vectorize(nir_shader *shader, nir_opt_vectorize_cb filter,
+ void *data)
{
bool progress = false;
nir_foreach_function(function, shader) {
if (function->impl)
{
bool progress = false;
nir_foreach_function(function, shader) {
if (function->impl)
- progress |= nir_opt_vectorize_impl(shader, function->impl);
+ progress |= nir_opt_vectorize_impl(shader, function->impl, filter, data);
if (DBG_ENABLED(ETNA_DBG_DUMP_SHADERS))
nir_print_shader(s, stdout);
if (DBG_ENABLED(ETNA_DBG_DUMP_SHADERS))
nir_print_shader(s, stdout);
- while( OPT(s, nir_opt_vectorize) );
+ while( OPT(s, nir_opt_vectorize, NULL, NULL) );
NIR_PASS_V(s, nir_lower_alu_to_scalar, etna_alu_to_scalar_filter_cb, specs);
NIR_PASS_V(s, nir_remove_dead_variables, nir_var_function_temp, NULL);
NIR_PASS_V(s, nir_lower_alu_to_scalar, etna_alu_to_scalar_filter_cb, specs);
NIR_PASS_V(s, nir_remove_dead_variables, nir_var_function_temp, NULL);
- NIR_PASS(progress, s, nir_opt_vectorize);
+ NIR_PASS(progress, s, nir_opt_vectorize, NULL, NULL);
NIR_PASS_V(sel->nir, nir_lower_regs_to_ssa);
NIR_PASS_V(sel->nir, nir_lower_alu_to_scalar, NULL, NULL);
NIR_PASS_V(sel->nir, nir_lower_int64);
NIR_PASS_V(sel->nir, nir_lower_regs_to_ssa);
NIR_PASS_V(sel->nir, nir_lower_alu_to_scalar, NULL, NULL);
NIR_PASS_V(sel->nir, nir_lower_int64);
- NIR_PASS_V(sel->nir, nir_opt_vectorize);
+ NIR_PASS_V(sel->nir, nir_opt_vectorize, NULL, NULL);
}
NIR_PASS_V(sel->nir, nir_lower_flrp, ~0, false, false);
}
}
NIR_PASS_V(sel->nir, nir_lower_flrp, ~0, false, false);
}
NIR_PASS(progress, shader, nir_opt_algebraic);
NIR_PASS(progress, shader, nir_opt_constant_folding);
NIR_PASS(progress, shader, nir_opt_copy_prop_vars);
NIR_PASS(progress, shader, nir_opt_algebraic);
NIR_PASS(progress, shader, nir_opt_constant_folding);
NIR_PASS(progress, shader, nir_opt_copy_prop_vars);
- NIR_PASS(progress, shader, nir_opt_vectorize);
+ NIR_PASS(progress, shader, nir_opt_vectorize, NULL, NULL);
NIR_PASS(progress, shader, nir_opt_remove_phis);
NIR_PASS(progress, shader, nir_opt_remove_phis);
nir_var_shader_out |
nir_var_function_temp);
nir_var_shader_out |
nir_var_function_temp);
- NIR_PASS(progress, nir, nir_opt_vectorize);
+ NIR_PASS(progress, nir, nir_opt_vectorize, NULL, NULL);
} while (progress);
NIR_PASS_V(nir, nir_lower_alu_to_scalar, mdg_is_64, NULL);
} while (progress);
NIR_PASS_V(nir, nir_lower_alu_to_scalar, mdg_is_64, NULL);