pan/midgard: Schedule before RA
authorAlyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Fri, 30 Aug 2019 19:56:55 +0000 (12:56 -0700)
committerAlyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Fri, 30 Aug 2019 22:50:28 +0000 (15:50 -0700)
This is a tradeoff.

Scheduling before RA means we don't do RA on what-will-become pipeline
registers. Importantly, it means the scheduler is able to reorder
instructions, as registers have not been decided yet.

Unfortunately, it also complicates register spilling, since the spills
themselves won't get bundled optimally and we can only spill twice per
ALU bundle (only one spill per bundle allowed here). It also prevents us
from eliminating dead moves introduced by register allocation, as they
are not dead before RA. The shader-db regressions are from poor spilling
choices introduced by the new bundling requirements. These could be
solved by the combination of a post-scheduler (to combine adjacent
spills into bundles) with a VLIW-aware spill cost calculation.
Nevertheless, the change is small enough that I feel it's worth it to
eat a tiny shader-db regression for the sake of flexibility.

Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
src/panfrost/midgard/midgard_schedule.c

index 6693a1b725bff62e6d43827d5c64812f5563aa70..8f86701e33fdc633de279a91e526ef125d8e232a 100644 (file)
@@ -798,11 +798,26 @@ static void mir_spill_register(
                 ra_set_node_spill_cost(g, i, 1.0);
         }
 
-        mir_foreach_instr_global(ctx, ins) {
-                if (ins->no_spill &&
-                    ins->dest >= 0 &&
-                    ins->dest < ctx->temp_count)
-                        ra_set_node_spill_cost(g, ins->dest, -1.0);
+        /* We can't spill any bundles that contain unspills. This could be
+         * optimized to allow use of r27 to spill twice per bundle, but if
+         * you're at the point of optimizing spilling, it's too late. */
+
+        mir_foreach_block(ctx, block) {
+                mir_foreach_bundle_in_block(block, bun) {
+                        bool no_spill = false;
+
+                        for (unsigned i = 0; i < bun->instruction_count; ++i)
+                                no_spill |= bun->instructions[i]->no_spill;
+
+                        if (!no_spill)
+                                continue;
+
+                        for (unsigned i = 0; i < bun->instruction_count; ++i) {
+                                unsigned dest = bun->instructions[i]->dest;
+                                if (dest < ctx->temp_count)
+                                        ra_set_node_spill_cost(g, dest, -1.0);
+                        }
+                }
         }
 
         int spill_node = ra_get_best_spill_node(g);
@@ -831,7 +846,8 @@ static void mir_spill_register(
                 if (is_special_w)
                         spill_slot = spill_index++;
 
-                mir_foreach_instr_global_safe(ctx, ins) {
+                mir_foreach_block(ctx, block) {
+                mir_foreach_instr_in_block_safe(block, ins) {
                         if (ins->dest != spill_node) continue;
 
                         midgard_instruction st;
@@ -841,17 +857,19 @@ static void mir_spill_register(
                                 st.no_spill = true;
                         } else {
                                 ins->dest = SSA_FIXED_REGISTER(26);
+                                ins->no_spill = true;
                                 st = v_load_store_scratch(ins->dest, spill_slot, true, ins->mask);
                         }
 
                         /* Hint: don't rewrite this node */
                         st.hint = true;
 
-                        mir_insert_instruction_before(ctx, mir_next_op(ins), st);
+                        mir_insert_instruction_after_scheduled(ctx, block, ins, st);
 
                         if (!is_special)
                                 ctx->spills++;
                 }
+                }
         }
 
         /* For special reads, figure out how many components we need */
@@ -915,7 +933,7 @@ static void mir_spill_register(
 
                                 st.mask = read_mask;
 
-                                mir_insert_instruction_before(ctx, before, st);
+                                mir_insert_instruction_before_scheduled(ctx, block, before, st);
                                // consecutive_skip = true;
                         } else {
                                 /* Special writes already have their move spilled in */
@@ -962,8 +980,11 @@ schedule_program(compiler_context *ctx)
 
         mir_foreach_block(ctx, block) {
                 midgard_opt_dead_move_eliminate(ctx, block);
+                schedule_block(ctx, block);
         }
 
+        mir_create_pipeline_registers(ctx);
+
         do {
                 if (spilled) 
                         mir_spill_register(ctx, g, &spill_count);
@@ -974,25 +995,6 @@ schedule_program(compiler_context *ctx)
                 g = allocate_registers(ctx, &spilled);
         } while(spilled && ((iter_count--) > 0));
 
-        /* We can simplify a bit after RA */
-
-        mir_foreach_block(ctx, block) {
-                midgard_opt_post_move_eliminate(ctx, block, g);
-        }
-
-        /* After RA finishes, we schedule all at once */
-
-        mir_foreach_block(ctx, block) {
-                schedule_block(ctx, block);
-        }
-
-        /* Finally, we create pipeline registers as a peephole pass after
-         * scheduling. This isn't totally optimal, since there are cases where
-         * the usage of pipeline registers can eliminate spills, but it does
-         * save some power */
-
-        mir_create_pipeline_registers(ctx);
-
         if (iter_count <= 0) {
                 fprintf(stderr, "panfrost: Gave up allocating registers, rendering will be incomplete\n");
                 assert(0);