From: Alyssa Rosenzweig Date: Wed, 24 Jul 2019 22:37:24 +0000 (-0700) Subject: pan/midgard: Optimize varying projection X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=9ce75826cb00a252c6012d74046fa15bf0998080;p=mesa.git pan/midgard: Optimize varying projection We add a new opt pass fusing perspective projection with varyings. Minor win..? We don't combine non-varying projections, since if we're too agressive, the extra load/store traffic will hurt us so it's not really a win in practice. total instructions in shared programs: 3915 -> 3913 (-0.05%) instructions in affected programs: 76 -> 74 (-2.63%) helped: 1 HURT: 0 total bundles in shared programs: 2520 -> 2519 (-0.04%) bundles in affected programs: 46 -> 45 (-2.17%) helped: 1 HURT: 0 total quadwords in shared programs: 4027 -> 4025 (-0.05%) quadwords in affected programs: 80 -> 78 (-2.50%) helped: 1 HURT: 0 Signed-off-by: Alyssa Rosenzweig --- diff --git a/src/panfrost/midgard/compiler.h b/src/panfrost/midgard/compiler.h index 1daf876a0f4..da3539e62bd 100644 --- a/src/panfrost/midgard/compiler.h +++ b/src/panfrost/midgard/compiler.h @@ -376,6 +376,7 @@ void mir_rewrite_index_src_single(midgard_instruction *ins, unsigned old, unsign void mir_rewrite_index_src_tag(compiler_context *ctx, unsigned old, unsigned new, unsigned tag); bool mir_single_use(compiler_context *ctx, unsigned value); bool mir_special_index(compiler_context *ctx, unsigned idx); +unsigned mir_use_count(compiler_context *ctx, unsigned value); /* MIR printing */ @@ -499,5 +500,6 @@ nir_clamp_psiz(nir_shader *shader, float min_size, float max_size); bool midgard_opt_copy_prop(compiler_context *ctx, midgard_block *block); bool midgard_opt_combine_projection(compiler_context *ctx, midgard_block *block); +bool midgard_opt_varying_projection(compiler_context *ctx, midgard_block *block); #endif diff --git a/src/panfrost/midgard/helpers.h b/src/panfrost/midgard/helpers.h index 8b3e417a56d..76bda286f7e 100644 --- a/src/panfrost/midgard/helpers.h +++ b/src/panfrost/midgard/helpers.h @@ -25,6 +25,11 @@ #include "util/macros.h" #include +#define OP_IS_LOAD_VARY_F(op) (\ + op == midgard_op_ld_vary_16 || \ + op == midgard_op_ld_vary_32 \ + ) + #define OP_IS_STORE_VARY(op) (\ op == midgard_op_st_vary_16 || \ op == midgard_op_st_vary_32 || \ @@ -46,11 +51,15 @@ op == midgard_op_st_cubemap_coords \ ) -#define OP_IS_R27_ONLY(op) ( \ +#define OP_IS_PROJECTION(op) ( \ op == midgard_op_ldst_perspective_division_z || \ op == midgard_op_ldst_perspective_division_w \ ) +#define OP_IS_R27_ONLY(op) ( \ + OP_IS_PROJECTION(op) \ + ) + #define OP_IS_MOVE(op) ( \ op == midgard_alu_op_fmov || \ op == midgard_alu_op_imov \ diff --git a/src/panfrost/midgard/midgard_compile.c b/src/panfrost/midgard/midgard_compile.c index 63191cec0e4..6f50e345baa 100644 --- a/src/panfrost/midgard/midgard_compile.c +++ b/src/panfrost/midgard/midgard_compile.c @@ -2421,6 +2421,8 @@ midgard_compile_shader_nir(struct midgard_screen *screen, nir_shader *nir, midga progress |= midgard_opt_pos_propagate(ctx, block); progress |= midgard_opt_copy_prop(ctx, block); progress |= midgard_opt_dead_code_eliminate(ctx, block); + progress |= midgard_opt_combine_projection(ctx, block); + progress |= midgard_opt_varying_projection(ctx, block); } } while (progress); diff --git a/src/panfrost/midgard/midgard_opt_perspective.c b/src/panfrost/midgard/midgard_opt_perspective.c index 7abfd1544e9..df826bc30c1 100644 --- a/src/panfrost/midgard/midgard_opt_perspective.c +++ b/src/panfrost/midgard/midgard_opt_perspective.c @@ -25,10 +25,14 @@ * load/store pipes. So the first perspective projection pass looks for * lowered/open-coded perspective projection of the form "fmul (A.xyz, * frcp(A.w))" or "fmul (A.xy, frcp(A.z))" and rewrite with a native - * perspective division opcode (on the load/store pipe). + * perspective division opcode (on the load/store pipe). Caveats apply: the + * frcp should be used only once to make this optimization worthwhile. And the + * source of the frcp ought to be a varying to make it worthwhile... * - * Caveats apply: the frcp should be used only once to make this optimization - * worthwhile. + * The second pass in this file is a step #2 of sorts: fusing that load/store + * projection into a varying load instruction (they can be done together + * implicitly). This depends on the combination pass. Again caveat: the vary + * should only be used once to make this worthwhile. */ #include "compiler.h" @@ -86,6 +90,25 @@ midgard_opt_combine_projection(compiler_context *ctx, midgard_block *block) if (frcp_component != COMPONENT_W && frcp_component != COMPONENT_Z) continue; if (!mir_single_use(ctx, frcp)) continue; + /* Heuristic: check if the frcp is from a single-use varying */ + + bool ok = false; + + /* One for frcp and one for fmul */ + if (mir_use_count(ctx, frcp_from) > 2) continue; + + mir_foreach_instr_in_block_safe(block, v) { + if (v->ssa_args.dest != frcp_from) continue; + if (v->type != TAG_LOAD_STORE_4) break; + if (!OP_IS_LOAD_VARY_F(v->load_store.op)) break; + + ok = true; + break; + } + + if (!ok) + continue; + /* Nice, we got the form spot on. Let's convert! */ midgard_instruction accel = { @@ -113,3 +136,66 @@ midgard_opt_combine_projection(compiler_context *ctx, midgard_block *block) return progress; } + +bool +midgard_opt_varying_projection(compiler_context *ctx, midgard_block *block) +{ + bool progress = false; + + mir_foreach_instr_in_block_safe(block, ins) { + /* Search for a projection */ + if (ins->type != TAG_LOAD_STORE_4) continue; + if (!OP_IS_PROJECTION(ins->load_store.op)) continue; + + unsigned vary = ins->ssa_args.src0; + unsigned to = ins->ssa_args.dest; + + if (vary >= ctx->func->impl->ssa_alloc) continue; + if (to >= ctx->func->impl->ssa_alloc) continue; + if (!mir_single_use(ctx, vary)) continue; + + /* Check for a varying source. If we find it, we rewrite */ + + bool rewritten = false; + + mir_foreach_instr_in_block_safe(block, v) { + if (v->ssa_args.dest != vary) continue; + if (v->type != TAG_LOAD_STORE_4) break; + if (!OP_IS_LOAD_VARY_F(v->load_store.op)) break; + + /* We found it, so rewrite it to project. Grab the + * modifier */ + + unsigned param = v->load_store.varying_parameters; + midgard_varying_parameter p; + memcpy(&p, ¶m, sizeof(p)); + + if (p.modifier != midgard_varying_mod_none) + break; + + bool projects_w = + ins->load_store.op == midgard_op_ldst_perspective_division_w; + + p.modifier = projects_w ? + midgard_varying_mod_perspective_w : + midgard_varying_mod_perspective_z; + + /* Aliasing rules are annoying */ + memcpy(¶m, &p, sizeof(p)); + v->load_store.varying_parameters = param; + + /* Use the new destination */ + v->ssa_args.dest = to; + + rewritten = true; + break; + } + + if (rewritten) + mir_remove_instruction(ins); + + progress |= rewritten; + } + + return progress; +} diff --git a/src/panfrost/midgard/mir.c b/src/panfrost/midgard/mir.c index 1be224b8464..2c449e0684e 100644 --- a/src/panfrost/midgard/mir.c +++ b/src/panfrost/midgard/mir.c @@ -72,25 +72,26 @@ mir_rewrite_index(compiler_context *ctx, unsigned old, unsigned new) mir_rewrite_index_dst(ctx, old, new); } -/* Checks if a value is used only once (or totally dead), which is an important - * heuristic to figure out if certain optimizations are Worth It (TM) */ - -bool -mir_single_use(compiler_context *ctx, unsigned value) +unsigned +mir_use_count(compiler_context *ctx, unsigned value) { unsigned used_count = 0; mir_foreach_instr_global(ctx, ins) { if (mir_has_arg(ins, value)) ++used_count; - - /* Short circuit for speed */ - if (used_count > 1) - return false; } - return used_count <= 1; + return used_count; +} +/* Checks if a value is used only once (or totally dead), which is an important + * heuristic to figure out if certain optimizations are Worth It (TM) */ + +bool +mir_single_use(compiler_context *ctx, unsigned value) +{ + return mir_use_count(ctx, value) <= 1; } bool