From f351e1d137603213b5daacece5ff67ad0786d982 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Mon, 4 May 2020 15:13:20 -0700 Subject: [PATCH] freedreno/ir3: limit # of tex prefetch by shader size It seems for short frag shaders, too much prefetch can be detrimental. I think what we *really* want to do is decide after pre-RA sched, when we also know about nop's and what the actual ir3 instruction count is. But that will require re-working how prefetch lowering works. For now this is a super crude heuristic to attempt to approximate a good solution. Signed-off-by: Rob Clark Part-of: --- src/freedreno/ir3/ir3_compiler_nir.c | 2 +- src/freedreno/ir3/ir3_context.c | 37 ++++++++++++++++++++++++++++ src/freedreno/ir3/ir3_context.h | 2 ++ 3 files changed, 40 insertions(+), 1 deletion(-) diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index dc2492e242a..76f2f7525bf 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -2261,7 +2261,7 @@ emit_tex(struct ir3_context *ctx, nir_tex_instr *tex) compile_assert(ctx, nir_tex_instr_src_index(tex, nir_tex_src_texture_offset) < 0); compile_assert(ctx, nir_tex_instr_src_index(tex, nir_tex_src_sampler_offset) < 0); - if (ctx->so->num_sampler_prefetch < IR3_MAX_SAMPLER_PREFETCH) { + if (ctx->so->num_sampler_prefetch < ctx->prefetch_limit) { opc = OPC_META_TEX_PREFETCH; ctx->so->num_sampler_prefetch++; break; diff --git a/src/freedreno/ir3/ir3_context.c b/src/freedreno/ir3/ir3_context.c index 38f870321a7..6b6f485af05 100644 --- a/src/freedreno/ir3/ir3_context.c +++ b/src/freedreno/ir3/ir3_context.c @@ -114,6 +114,43 @@ ir3_context_init(struct ir3_compiler *compiler, NIR_PASS_V(ctx->s, nir_convert_from_ssa, true); + /* Super crude heuristic to limit # of tex prefetch in small + * shaders. This completely ignores loops.. but that's really + * not the worst of it's problems. (A frag shader that has + * loops is probably going to be big enough to not trigger a + * lower threshold.) + * + * 1) probably want to do this in terms of ir3 instructions + * 2) probably really want to decide this after scheduling + * (or at least pre-RA sched) so we have a rough idea about + * nops, and don't count things that get cp'd away + * 3) blob seems to use higher thresholds with a mix of more + * SFU instructions. Which partly makes sense, more SFU + * instructions probably means you want to get the real + * shader started sooner, but that considers where in the + * shader the SFU instructions are, which blob doesn't seem + * to do. + * + * This uses more conservative thresholds assuming a more alu + * than sfu heavy instruction mix. + */ + if (so->type == MESA_SHADER_FRAGMENT) { + nir_function_impl *fxn = nir_shader_get_entrypoint(ctx->s); + + unsigned instruction_count = 0; + nir_foreach_block (block, fxn) { + instruction_count += exec_list_length(&block->instr_list); + } + + if (instruction_count < 50) { + ctx->prefetch_limit = 2; + } else if (instruction_count < 70) { + ctx->prefetch_limit = 3; + } else { + ctx->prefetch_limit = IR3_MAX_SAMPLER_PREFETCH; + } + } + if (shader_debug_enabled(so->type)) { fprintf(stdout, "NIR (final form) for %s shader %s:\n", ir3_shader_stage(so), so->shader->nir->info.name); diff --git a/src/freedreno/ir3/ir3_context.h b/src/freedreno/ir3/ir3_context.h index d1de1a67a26..659303de9e1 100644 --- a/src/freedreno/ir3/ir3_context.h +++ b/src/freedreno/ir3/ir3_context.h @@ -146,6 +146,8 @@ struct ir3_context { unsigned max_texture_index; + unsigned prefetch_limit; + /* set if we encounter something we can't handle yet, so we * can bail cleanly and fallback to TGSI compiler f/e */ -- 2.30.2