From d699a17475b5d123e6a22778e8ac6e005774ce92 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Fri, 30 Aug 2019 12:56:55 -0700 Subject: [PATCH] pan/midgard: Schedule before RA This is a tradeoff. Scheduling before RA means we don't do RA on what-will-become pipeline registers. Importantly, it means the scheduler is able to reorder instructions, as registers have not been decided yet. Unfortunately, it also complicates register spilling, since the spills themselves won't get bundled optimally and we can only spill twice per ALU bundle (only one spill per bundle allowed here). It also prevents us from eliminating dead moves introduced by register allocation, as they are not dead before RA. The shader-db regressions are from poor spilling choices introduced by the new bundling requirements. These could be solved by the combination of a post-scheduler (to combine adjacent spills into bundles) with a VLIW-aware spill cost calculation. Nevertheless, the change is small enough that I feel it's worth it to eat a tiny shader-db regression for the sake of flexibility. Signed-off-by: Alyssa Rosenzweig --- src/panfrost/midgard/midgard_schedule.c | 56 +++++++++++++------------ 1 file changed, 29 insertions(+), 27 deletions(-) diff --git a/src/panfrost/midgard/midgard_schedule.c b/src/panfrost/midgard/midgard_schedule.c index 6693a1b725b..8f86701e33f 100644 --- a/src/panfrost/midgard/midgard_schedule.c +++ b/src/panfrost/midgard/midgard_schedule.c @@ -798,11 +798,26 @@ static void mir_spill_register( ra_set_node_spill_cost(g, i, 1.0); } - mir_foreach_instr_global(ctx, ins) { - if (ins->no_spill && - ins->dest >= 0 && - ins->dest < ctx->temp_count) - ra_set_node_spill_cost(g, ins->dest, -1.0); + /* We can't spill any bundles that contain unspills. This could be + * optimized to allow use of r27 to spill twice per bundle, but if + * you're at the point of optimizing spilling, it's too late. */ + + mir_foreach_block(ctx, block) { + mir_foreach_bundle_in_block(block, bun) { + bool no_spill = false; + + for (unsigned i = 0; i < bun->instruction_count; ++i) + no_spill |= bun->instructions[i]->no_spill; + + if (!no_spill) + continue; + + for (unsigned i = 0; i < bun->instruction_count; ++i) { + unsigned dest = bun->instructions[i]->dest; + if (dest < ctx->temp_count) + ra_set_node_spill_cost(g, dest, -1.0); + } + } } int spill_node = ra_get_best_spill_node(g); @@ -831,7 +846,8 @@ static void mir_spill_register( if (is_special_w) spill_slot = spill_index++; - mir_foreach_instr_global_safe(ctx, ins) { + mir_foreach_block(ctx, block) { + mir_foreach_instr_in_block_safe(block, ins) { if (ins->dest != spill_node) continue; midgard_instruction st; @@ -841,17 +857,19 @@ static void mir_spill_register( st.no_spill = true; } else { ins->dest = SSA_FIXED_REGISTER(26); + ins->no_spill = true; st = v_load_store_scratch(ins->dest, spill_slot, true, ins->mask); } /* Hint: don't rewrite this node */ st.hint = true; - mir_insert_instruction_before(ctx, mir_next_op(ins), st); + mir_insert_instruction_after_scheduled(ctx, block, ins, st); if (!is_special) ctx->spills++; } + } } /* For special reads, figure out how many components we need */ @@ -915,7 +933,7 @@ static void mir_spill_register( st.mask = read_mask; - mir_insert_instruction_before(ctx, before, st); + mir_insert_instruction_before_scheduled(ctx, block, before, st); // consecutive_skip = true; } else { /* Special writes already have their move spilled in */ @@ -962,8 +980,11 @@ schedule_program(compiler_context *ctx) mir_foreach_block(ctx, block) { midgard_opt_dead_move_eliminate(ctx, block); + schedule_block(ctx, block); } + mir_create_pipeline_registers(ctx); + do { if (spilled) mir_spill_register(ctx, g, &spill_count); @@ -974,25 +995,6 @@ schedule_program(compiler_context *ctx) g = allocate_registers(ctx, &spilled); } while(spilled && ((iter_count--) > 0)); - /* We can simplify a bit after RA */ - - mir_foreach_block(ctx, block) { - midgard_opt_post_move_eliminate(ctx, block, g); - } - - /* After RA finishes, we schedule all at once */ - - mir_foreach_block(ctx, block) { - schedule_block(ctx, block); - } - - /* Finally, we create pipeline registers as a peephole pass after - * scheduling. This isn't totally optimal, since there are cases where - * the usage of pipeline registers can eliminate spills, but it does - * save some power */ - - mir_create_pipeline_registers(ctx); - if (iter_count <= 0) { fprintf(stderr, "panfrost: Gave up allocating registers, rendering will be incomplete\n"); assert(0); -- 2.30.2