From 9367d2ca37a3b8108ecb74e2875a600b5543c163 Mon Sep 17 00:00:00 2001 From: Vasily Khoruzhick Date: Thu, 29 Aug 2019 21:14:54 -0700 Subject: [PATCH] nir: allow specifying filter callback in lower_alu_to_scalar Set of opcodes doesn't have enough flexibility in certain cases. E.g. Utgard PP has vector conditional select operation, but condition is always scalar. Lowering all the vector selects to scalar increases instruction number, so we need a way to filter only those ops that can't be handled in hardware. Reviewed-by: Qiang Yu Reviewed-by: Eric Anholt Reviewed-by: Jason Ekstrand Signed-off-by: Vasily Khoruzhick --- src/amd/vulkan/radv_shader.c | 2 +- src/broadcom/compiler/nir_to_vir.c | 2 +- src/compiler/nir/nir.h | 2 +- src/compiler/nir/nir_lower_alu_to_scalar.c | 20 ++++++-- src/freedreno/ir3/ir3_nir.c | 2 +- src/gallium/auxiliary/nir/tgsi_to_nir.c | 2 +- .../drivers/etnaviv/etnaviv_compiler_nir.c | 49 +++++++++++-------- src/gallium/drivers/freedreno/a2xx/ir2_nir.c | 35 ++++++++----- src/gallium/drivers/lima/lima_program.c | 46 +++++++++++------ .../nouveau/codegen/nv50_ir_from_nir.cpp | 2 +- src/gallium/drivers/radeonsi/si_shader_nir.c | 2 +- src/gallium/drivers/vc4/vc4_program.c | 2 +- src/intel/compiler/brw_nir.c | 6 +-- src/mesa/state_tracker/st_glsl_to_nir.cpp | 4 +- src/panfrost/bifrost/bifrost_compile.c | 2 +- src/panfrost/bifrost/cmdline.c | 2 +- 16 files changed, 113 insertions(+), 67 deletions(-) diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index 1ab64a6e328..f90689e85b5 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -200,7 +200,7 @@ radv_optimize_nir(struct nir_shader *shader, bool optimize_conservatively, NIR_PASS(progress, shader, nir_remove_dead_variables, nir_var_function_temp); - NIR_PASS_V(shader, nir_lower_alu_to_scalar, NULL); + NIR_PASS_V(shader, nir_lower_alu_to_scalar, NULL, NULL); NIR_PASS_V(shader, nir_lower_phis_to_scalar); NIR_PASS(progress, shader, nir_copy_prop); diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c index 91e95f9ee5a..b640dcc341b 100644 --- a/src/broadcom/compiler/nir_to_vir.c +++ b/src/broadcom/compiler/nir_to_vir.c @@ -1382,7 +1382,7 @@ v3d_optimize_nir(struct nir_shader *s) progress = false; NIR_PASS_V(s, nir_lower_vars_to_ssa); - NIR_PASS(progress, s, nir_lower_alu_to_scalar, NULL); + NIR_PASS(progress, s, nir_lower_alu_to_scalar, NULL, NULL); NIR_PASS(progress, s, nir_lower_phis_to_scalar); NIR_PASS(progress, s, nir_copy_prop); NIR_PASS(progress, s, nir_opt_remove_phis); diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 5149a0e8c01..bad1d6af212 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -3606,7 +3606,7 @@ bool nir_lower_alu(nir_shader *shader); bool nir_lower_flrp(nir_shader *shader, unsigned lowering_mask, bool always_precise, bool have_ffma); -bool nir_lower_alu_to_scalar(nir_shader *shader, BITSET_WORD *lower_set); +bool nir_lower_alu_to_scalar(nir_shader *shader, nir_instr_filter_cb cb, const void *data); bool nir_lower_bool_to_float(nir_shader *shader); bool nir_lower_bool_to_int32(nir_shader *shader); bool nir_lower_int_to_float(nir_shader *shader); diff --git a/src/compiler/nir/nir_lower_alu_to_scalar.c b/src/compiler/nir/nir_lower_alu_to_scalar.c index b16624bd8aa..bcd92908253 100644 --- a/src/compiler/nir/nir_lower_alu_to_scalar.c +++ b/src/compiler/nir/nir_lower_alu_to_scalar.c @@ -24,6 +24,11 @@ #include "nir.h" #include "nir_builder.h" +struct alu_to_scalar_data { + nir_instr_filter_cb cb; + const void *data; +}; + /** @file nir_lower_alu_to_scalar.c * * Replaces nir_alu_instr operations with more than one channel used in the @@ -89,9 +94,9 @@ lower_reduction(nir_alu_instr *alu, nir_op chan_op, nir_op merge_op, } static nir_ssa_def * -lower_alu_instr_scalar(nir_builder *b, nir_instr *instr, void *_state) +lower_alu_instr_scalar(nir_builder *b, nir_instr *instr, void *_data) { - BITSET_WORD *lower_set = _state; + struct alu_to_scalar_data *data = _data; nir_alu_instr *alu = nir_instr_as_alu(instr); unsigned num_src = nir_op_infos[alu->op].num_inputs; unsigned i, chan; @@ -102,7 +107,7 @@ lower_alu_instr_scalar(nir_builder *b, nir_instr *instr, void *_state) b->cursor = nir_before_instr(&alu->instr); b->exact = alu->exact; - if (lower_set && !BITSET_TEST(lower_set, alu->op)) + if (data->cb && !data->cb(instr, data->data)) return NULL; #define LOWER_REDUCTION(name, chan, merge) \ @@ -246,10 +251,15 @@ lower_alu_instr_scalar(nir_builder *b, nir_instr *instr, void *_state) } bool -nir_lower_alu_to_scalar(nir_shader *shader, BITSET_WORD *lower_set) +nir_lower_alu_to_scalar(nir_shader *shader, nir_instr_filter_cb cb, const void *_data) { + struct alu_to_scalar_data data = { + .cb = cb, + .data = _data, + }; + return nir_shader_lower_instructions(shader, inst_is_vector_alu, lower_alu_instr_scalar, - lower_set); + &data); } diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c index 05426bc7a0e..50a961f2bad 100644 --- a/src/freedreno/ir3/ir3_nir.c +++ b/src/freedreno/ir3/ir3_nir.c @@ -124,7 +124,7 @@ ir3_optimize_loop(nir_shader *s) OPT_V(s, nir_lower_vars_to_ssa); progress |= OPT(s, nir_opt_copy_prop_vars); progress |= OPT(s, nir_opt_dead_write_vars); - progress |= OPT(s, nir_lower_alu_to_scalar, NULL); + progress |= OPT(s, nir_lower_alu_to_scalar, NULL, NULL); progress |= OPT(s, nir_lower_phis_to_scalar); progress |= OPT(s, nir_copy_prop); diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c index 20d6c0bfb29..eae2ef058d2 100644 --- a/src/gallium/auxiliary/nir/tgsi_to_nir.c +++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c @@ -2559,7 +2559,7 @@ ttn_optimize_nir(nir_shader *nir, bool scalar) NIR_PASS_V(nir, nir_lower_vars_to_ssa); if (scalar) { - NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL); + NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL, NULL); NIR_PASS_V(nir, nir_lower_phis_to_scalar); } diff --git a/src/gallium/drivers/etnaviv/etnaviv_compiler_nir.c b/src/gallium/drivers/etnaviv/etnaviv_compiler_nir.c index dc6756af257..c2bfec14ed2 100644 --- a/src/gallium/drivers/etnaviv/etnaviv_compiler_nir.c +++ b/src/gallium/drivers/etnaviv/etnaviv_compiler_nir.c @@ -208,25 +208,34 @@ etna_lower_io(nir_shader *shader, struct etna_shader_variant *v) } } -static void -etna_lower_alu_to_scalar(nir_shader *shader, const struct etna_specs *specs) +static bool +etna_alu_to_scalar_filter_cb(const nir_instr *instr, const void *data) { - BITSET_DECLARE(scalar_ops, nir_num_opcodes); - BITSET_ZERO(scalar_ops); - - BITSET_SET(scalar_ops, nir_op_frsq); - BITSET_SET(scalar_ops, nir_op_frcp); - BITSET_SET(scalar_ops, nir_op_flog2); - BITSET_SET(scalar_ops, nir_op_fexp2); - BITSET_SET(scalar_ops, nir_op_fsqrt); - BITSET_SET(scalar_ops, nir_op_fcos); - BITSET_SET(scalar_ops, nir_op_fsin); - BITSET_SET(scalar_ops, nir_op_fdiv); - - if (!specs->has_halti2_instructions) - BITSET_SET(scalar_ops, nir_op_fdot2); - - nir_lower_alu_to_scalar(shader, scalar_ops); + const struct etna_specs *specs = data; + + if (instr->type != nir_instr_type_alu) + return false; + + nir_alu_instr *alu = nir_instr_as_alu(instr); + switch (alu->op) { + case nir_op_frsq: + case nir_op_frcp: + case nir_op_flog2: + case nir_op_fexp2: + case nir_op_fsqrt: + case nir_op_fcos: + case nir_op_fsin: + case nir_op_fdiv: + return true; + case nir_op_fdot2: + if (!specs->has_halti2_instructions) + return true; + break; + default: + break; + } + + return false; } static void @@ -607,7 +616,7 @@ etna_compile_shader_nir(struct etna_shader_variant *v) OPT_V(s, nir_lower_vars_to_ssa); OPT_V(s, nir_lower_indirect_derefs, nir_var_all); OPT_V(s, nir_lower_tex, &(struct nir_lower_tex_options) { .lower_txp = ~0u }); - OPT_V(s, etna_lower_alu_to_scalar, specs); + OPT_V(s, nir_lower_alu_to_scalar, etna_alu_to_scalar_filter_cb, specs); etna_optimize_loop(s); @@ -627,7 +636,7 @@ etna_compile_shader_nir(struct etna_shader_variant *v) nir_print_shader(s, stdout); while( OPT(s, nir_opt_vectorize) ); - OPT_V(s, etna_lower_alu_to_scalar, specs); + OPT_V(s, nir_lower_alu_to_scalar, etna_alu_to_scalar_filter_cb, specs); NIR_PASS_V(s, nir_remove_dead_variables, nir_var_function_temp); NIR_PASS_V(s, nir_opt_algebraic_late); diff --git a/src/gallium/drivers/freedreno/a2xx/ir2_nir.c b/src/gallium/drivers/freedreno/a2xx/ir2_nir.c index 9d36f7092ef..6915194234d 100644 --- a/src/gallium/drivers/freedreno/a2xx/ir2_nir.c +++ b/src/gallium/drivers/freedreno/a2xx/ir2_nir.c @@ -1062,6 +1062,29 @@ static void cleanup_binning(struct ir2_context *ctx) ir2_optimize_nir(ctx->nir, false); } +static bool +ir2_alu_to_scalar_filter_cb(const nir_instr *instr, const void *data) +{ + if (instr->type != nir_instr_type_alu) + return false; + + nir_alu_instr *alu = nir_instr_as_alu(instr); + switch (alu->op) { + case nir_op_frsq: + case nir_op_frcp: + case nir_op_flog2: + case nir_op_fexp2: + case nir_op_fsqrt: + case nir_op_fcos: + case nir_op_fsin: + return true; + default: + break; + } + + return false; +} + void ir2_nir_compile(struct ir2_context *ctx, bool binning) { @@ -1085,17 +1108,7 @@ ir2_nir_compile(struct ir2_context *ctx, bool binning) OPT_V(ctx->nir, nir_lower_bool_to_float); OPT_V(ctx->nir, nir_lower_to_source_mods, nir_lower_all_source_mods); - /* TODO: static bitset ? */ - BITSET_DECLARE(scalar_ops, nir_num_opcodes); - BITSET_ZERO(scalar_ops); - BITSET_SET(scalar_ops, nir_op_frsq); - BITSET_SET(scalar_ops, nir_op_frcp); - BITSET_SET(scalar_ops, nir_op_flog2); - BITSET_SET(scalar_ops, nir_op_fexp2); - BITSET_SET(scalar_ops, nir_op_fsqrt); - BITSET_SET(scalar_ops, nir_op_fcos); - BITSET_SET(scalar_ops, nir_op_fsin); - OPT_V(ctx->nir, nir_lower_alu_to_scalar, scalar_ops); + OPT_V(ctx->nir, nir_lower_alu_to_scalar, ir2_alu_to_scalar_filter_cb, NULL); OPT_V(ctx->nir, nir_lower_locals_to_regs); diff --git a/src/gallium/drivers/lima/lima_program.c b/src/gallium/drivers/lima/lima_program.c index b9c4cbc4d5f..c0683b88600 100644 --- a/src/gallium/drivers/lima/lima_program.c +++ b/src/gallium/drivers/lima/lima_program.c @@ -110,7 +110,7 @@ lima_program_optimize_vs_nir(struct nir_shader *s) progress = false; NIR_PASS_V(s, nir_lower_vars_to_ssa); - NIR_PASS(progress, s, nir_lower_alu_to_scalar, NULL); + NIR_PASS(progress, s, nir_lower_alu_to_scalar, NULL, NULL); NIR_PASS(progress, s, nir_lower_phis_to_scalar); NIR_PASS(progress, s, nir_copy_prop); NIR_PASS(progress, s, nir_opt_remove_phis); @@ -145,24 +145,38 @@ lima_program_optimize_vs_nir(struct nir_shader *s) nir_sweep(s); } -void -lima_program_optimize_fs_nir(struct nir_shader *s) +static bool +lima_alu_to_scalar_filter_cb(const nir_instr *instr, const void *data) { - BITSET_DECLARE(alu_lower, nir_num_opcodes) = {0}; - bool progress; - - BITSET_SET(alu_lower, nir_op_frcp); - BITSET_SET(alu_lower, nir_op_frsq); - BITSET_SET(alu_lower, nir_op_flog2); - BITSET_SET(alu_lower, nir_op_fexp2); - BITSET_SET(alu_lower, nir_op_fsqrt); - BITSET_SET(alu_lower, nir_op_fsin); - BITSET_SET(alu_lower, nir_op_fcos); + if (instr->type != nir_instr_type_alu) + return false; + + nir_alu_instr *alu = nir_instr_as_alu(instr); + switch (alu->op) { + case nir_op_frcp: + case nir_op_frsq: + case nir_op_flog2: + case nir_op_fexp2: + case nir_op_fsqrt: + case nir_op_fsin: + case nir_op_fcos: /* nir vec4 fcsel assumes that each component of the condition will be * used to select the same component from the two options, but lima * can't implement that since we only have 1 component condition */ - BITSET_SET(alu_lower, nir_op_fcsel); - BITSET_SET(alu_lower, nir_op_bcsel); + case nir_op_fcsel: + case nir_op_bcsel: + return true; + default: + break; + } + + return false; +} + +void +lima_program_optimize_fs_nir(struct nir_shader *s) +{ + bool progress; NIR_PASS_V(s, nir_lower_fragcoord_wtrans); NIR_PASS_V(s, nir_lower_io, nir_var_all, type_size, 0); @@ -178,7 +192,7 @@ lima_program_optimize_fs_nir(struct nir_shader *s) progress = false; NIR_PASS_V(s, nir_lower_vars_to_ssa); - NIR_PASS(progress, s, nir_lower_alu_to_scalar, alu_lower); + NIR_PASS(progress, s, nir_lower_alu_to_scalar, lima_alu_to_scalar_filter_cb, NULL); NIR_PASS(progress, s, nir_lower_phis_to_scalar); NIR_PASS(progress, s, nir_copy_prop); NIR_PASS(progress, s, nir_opt_remove_phis); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp index 378638bf3a4..4e86ab8f8cc 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -3500,7 +3500,7 @@ Converter::run() NIR_PASS_V(nir, nir_lower_regs_to_ssa); NIR_PASS_V(nir, nir_lower_load_const_to_scalar); NIR_PASS_V(nir, nir_lower_vars_to_ssa); - NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL); + NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL, NULL); NIR_PASS_V(nir, nir_lower_phis_to_scalar); do { diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c b/src/gallium/drivers/radeonsi/si_shader_nir.c index fdd139141e2..4970b01fd73 100644 --- a/src/gallium/drivers/radeonsi/si_shader_nir.c +++ b/src/gallium/drivers/radeonsi/si_shader_nir.c @@ -817,7 +817,7 @@ si_nir_opts(struct nir_shader *nir) NIR_PASS(progress, nir, nir_opt_copy_prop_vars); NIR_PASS(progress, nir, nir_opt_dead_write_vars); - NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL); + NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL, NULL); NIR_PASS_V(nir, nir_lower_phis_to_scalar); /* (Constant) copy propagation is needed for txf with offsets. */ diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index 1d55b87e1ef..e5f7aa31b0e 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -1530,7 +1530,7 @@ vc4_optimize_nir(struct nir_shader *s) progress = false; NIR_PASS_V(s, nir_lower_vars_to_ssa); - NIR_PASS(progress, s, nir_lower_alu_to_scalar, NULL); + NIR_PASS(progress, s, nir_lower_alu_to_scalar, NULL, NULL); NIR_PASS(progress, s, nir_lower_phis_to_scalar); NIR_PASS(progress, s, nir_copy_prop); NIR_PASS(progress, s, nir_opt_remove_phis); diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c index bd5c5016550..c710fe46e5d 100644 --- a/src/intel/compiler/brw_nir.c +++ b/src/intel/compiler/brw_nir.c @@ -518,7 +518,7 @@ brw_nir_optimize(nir_shader *nir, const struct brw_compiler *compiler, OPT(nir_opt_combine_stores, nir_var_all); if (is_scalar) { - OPT(nir_lower_alu_to_scalar, NULL); + OPT(nir_lower_alu_to_scalar, NULL, NULL); } OPT(nir_copy_prop); @@ -654,7 +654,7 @@ brw_preprocess_nir(const struct brw_compiler *compiler, nir_shader *nir, const bool is_scalar = compiler->scalar_stage[nir->info.stage]; if (is_scalar) { - OPT(nir_lower_alu_to_scalar, NULL); + OPT(nir_lower_alu_to_scalar, NULL, NULL); } if (nir->info.stage == MESA_SHADER_GEOMETRY) @@ -871,7 +871,7 @@ brw_postprocess_nir(nir_shader *nir, const struct brw_compiler *compiler, OPT(brw_nir_lower_conversions); if (is_scalar) - OPT(nir_lower_alu_to_scalar, NULL); + OPT(nir_lower_alu_to_scalar, NULL, NULL); OPT(nir_lower_to_source_mods, nir_lower_all_source_mods); OPT(nir_copy_prop); OPT(nir_opt_dce); diff --git a/src/mesa/state_tracker/st_glsl_to_nir.cpp b/src/mesa/state_tracker/st_glsl_to_nir.cpp index 4585455c856..b9ce22c4a6e 100644 --- a/src/mesa/state_tracker/st_glsl_to_nir.cpp +++ b/src/mesa/state_tracker/st_glsl_to_nir.cpp @@ -247,7 +247,7 @@ st_nir_opts(nir_shader *nir, bool scalar) NIR_PASS(progress, nir, nir_opt_dead_write_vars); if (scalar) { - NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL); + NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL, NULL); NIR_PASS_V(nir, nir_lower_phis_to_scalar); } @@ -363,7 +363,7 @@ st_glsl_to_nir(struct st_context *st, struct gl_program *prog, NIR_PASS_V(nir, nir_lower_var_copies); if (is_scalar) { - NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL); + NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL, NULL); } /* before buffers and vars_to_ssa */ diff --git a/src/panfrost/bifrost/bifrost_compile.c b/src/panfrost/bifrost/bifrost_compile.c index 5e34b95d308..2af36ee8668 100644 --- a/src/panfrost/bifrost/bifrost_compile.c +++ b/src/panfrost/bifrost/bifrost_compile.c @@ -57,7 +57,7 @@ optimize_nir(nir_shader *nir) NIR_PASS(progress, nir, nir_opt_constant_folding); NIR_PASS(progress, nir, nir_lower_vars_to_ssa); - NIR_PASS(progress, nir, nir_lower_alu_to_scalar, NULL); + NIR_PASS(progress, nir, nir_lower_alu_to_scalar, NULL, NULL); NIR_PASS(progress, nir, nir_opt_if, true); } while (progress); diff --git a/src/panfrost/bifrost/cmdline.c b/src/panfrost/bifrost/cmdline.c index 6dec34af0e5..0c495d25d54 100644 --- a/src/panfrost/bifrost/cmdline.c +++ b/src/panfrost/bifrost/cmdline.c @@ -59,7 +59,7 @@ compile_shader(char **argv) NIR_PASS_V(nir[i], nir_split_var_copies); NIR_PASS_V(nir[i], nir_lower_var_copies); - NIR_PASS_V(nir[i], nir_lower_alu_to_scalar, NULL); + NIR_PASS_V(nir[i], nir_lower_alu_to_scalar, NULL, NULL); /* before buffers and vars_to_ssa */ NIR_PASS_V(nir[i], gl_nir_lower_bindless_images); -- 2.30.2