aco: fix accidential reordering of instructions when scheduling
authorDaniel Schürmann <daniel@schuermann.dev>
Thu, 31 Oct 2019 16:33:35 +0000 (17:33 +0100)
committerDaniel Schürmann <daniel@schuermann.dev>
Mon, 4 Nov 2019 19:14:14 +0000 (20:14 +0100)
Fixes: 86786999189c43b4a2c8e1c1a18b55cd2f369fff "aco: implement VGPR spilling"
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
src/amd/compiler/aco_scheduler.cpp

index 08e627ecc2862bdf14e928408df3ffdd444b8bad..eb0bb0d93e9a92078b016dea993553529f489e74 100644 (file)
@@ -172,11 +172,11 @@ bool can_move_instr(aco_ptr<Instruction>& instr, Instruction* current, int movin
    }
 }
 
-bool can_reorder(Instruction* candidate, bool allow_smem)
+bool can_reorder(Instruction* candidate)
 {
    switch (candidate->format) {
    case Format::SMEM:
-      return allow_smem || static_cast<SMEM_instruction*>(candidate)->can_reorder;
+      return static_cast<SMEM_instruction*>(candidate)->can_reorder;
    case Format::MUBUF:
       return static_cast<MUBUF_instruction*>(candidate)->can_reorder;
    case Format::MIMG:
@@ -200,7 +200,7 @@ void schedule_SMEM(sched_ctx& ctx, Block* block,
    int window_size = SMEM_WINDOW_SIZE;
    int max_moves = SMEM_MAX_MOVES;
    int16_t k = 0;
-   bool can_reorder_cur = can_reorder(current, false);
+   bool can_reorder_cur = can_reorder(current);
 
    /* don't move s_memtime/s_memrealtime */
    if (current->opcode == aco_opcode::s_memtime || current->opcode == aco_opcode::s_memrealtime)
@@ -224,6 +224,7 @@ void schedule_SMEM(sched_ctx& ctx, Block* block,
    for (int candidate_idx = idx - 1; k < max_moves && candidate_idx > (int) idx - window_size; candidate_idx--) {
       assert(candidate_idx >= 0);
       aco_ptr<Instruction>& candidate = block->instructions[candidate_idx];
+      bool can_reorder_candidate = can_reorder(candidate.get());
 
       /* break if we'd make the previous SMEM instruction stall */
       bool can_stall_prev_smem = idx <= ctx.last_SMEM_dep_idx && candidate_idx < ctx.last_SMEM_dep_idx;
@@ -231,7 +232,7 @@ void schedule_SMEM(sched_ctx& ctx, Block* block,
          break;
 
       /* break when encountering another MEM instruction, logical_start or barriers */
-      if (!can_reorder(candidate.get(), false) && !can_reorder_cur)
+      if (!can_reorder_candidate && !can_reorder_cur)
          break;
       if (candidate->opcode == aco_opcode::p_logical_start)
          break;
@@ -239,6 +240,8 @@ void schedule_SMEM(sched_ctx& ctx, Block* block,
          break;
       if (!can_move_instr(candidate, current, moving_interaction))
          break;
+      if (candidate->isVMEM())
+         break;
       register_pressure.update(register_demand[candidate_idx]);
 
       /* if current depends on candidate, add additional dependencies and continue */
@@ -264,6 +267,7 @@ void schedule_SMEM(sched_ctx& ctx, Block* block,
             if (op.isTemp())
                ctx.depends_on[op.tempId()] = true;
          }
+         can_reorder_cur &= can_reorder_candidate;
          continue;
       }
 
@@ -280,6 +284,7 @@ void schedule_SMEM(sched_ctx& ctx, Block* block,
             if (op.isTemp())
                ctx.depends_on[op.tempId()] = true;
          }
+         can_reorder_cur &= can_reorder_candidate;
          continue;
       }
 
@@ -323,12 +328,14 @@ void schedule_SMEM(sched_ctx& ctx, Block* block,
    insert_idx = idx + 1;
    moving_interaction = barrier_none;
    moving_spill = false;
+   can_reorder_cur = true;
 
    bool found_dependency = false;
    /* second, check if we have instructions after current to move up */
    for (int candidate_idx = idx + 1; k < max_moves && candidate_idx < (int) idx + window_size; candidate_idx++) {
       assert(candidate_idx < (int) block->instructions.size());
       aco_ptr<Instruction>& candidate = block->instructions[candidate_idx];
+      bool can_reorder_candidate = can_reorder(candidate.get());
 
       if (candidate->opcode == aco_opcode::p_logical_end)
          break;
@@ -369,7 +376,7 @@ void schedule_SMEM(sched_ctx& ctx, Block* block,
          }
       }
 
-      if (!can_reorder(candidate.get(), false) && !can_reorder_cur)
+      if (!can_reorder_candidate && !can_reorder_cur)
          break;
 
       if (!found_dependency) {
@@ -380,8 +387,10 @@ void schedule_SMEM(sched_ctx& ctx, Block* block,
       /* update register pressure */
       register_pressure.update(register_demand[candidate_idx - 1]);
 
-      if (is_dependency)
+      if (is_dependency) {
+         can_reorder_cur &= can_reorder_candidate;
          continue;
+      }
       assert(insert_idx != idx);
 
       // TODO: correctly calculate register pressure for this case
@@ -392,6 +401,8 @@ void schedule_SMEM(sched_ctx& ctx, Block* block,
             register_pressure_unknown = true;
       }
       if (register_pressure_unknown) {
+         if (candidate->isVMEM())
+            break;
          for (const Definition& def : candidate->definitions) {
             if (def.isTemp())
                ctx.RAR_dependencies[def.tempId()] = true;
@@ -400,6 +411,7 @@ void schedule_SMEM(sched_ctx& ctx, Block* block,
             if (op.isTemp())
                ctx.RAR_dependencies[op.tempId()] = true;
          }
+         can_reorder_cur &= can_reorder_candidate;
          continue;
       }
 
@@ -440,7 +452,10 @@ void schedule_VMEM(sched_ctx& ctx, Block* block,
    int max_moves = VMEM_MAX_MOVES;
    int clause_max_grab_dist = VMEM_CLAUSE_MAX_GRAB_DIST;
    int16_t k = 0;
-   bool can_reorder_cur = can_reorder(current, false);
+   /* initially true as we don't pull other VMEM instructions
+    * through the current instruction */
+   bool can_reorder_vmem = true;
+   bool can_reorder_smem = true;
 
    /* create the initial set of values which current depends on */
    std::fill(ctx.depends_on.begin(), ctx.depends_on.end(), false);
@@ -467,9 +482,10 @@ void schedule_VMEM(sched_ctx& ctx, Block* block,
    for (int candidate_idx = idx - 1; k < max_moves && candidate_idx > (int) idx - window_size; candidate_idx--) {
       assert(candidate_idx >= 0);
       aco_ptr<Instruction>& candidate = block->instructions[candidate_idx];
+      bool can_reorder_candidate = can_reorder(candidate.get());
 
       /* break when encountering another VMEM instruction, logical_start or barriers */
-      if (!can_reorder(candidate.get(), true) && !can_reorder_cur)
+      if (!can_reorder_smem && candidate->format == Format::SMEM && !can_reorder_candidate)
          break;
       if (candidate->opcode == aco_opcode::p_logical_start)
          break;
@@ -487,10 +503,11 @@ void schedule_VMEM(sched_ctx& ctx, Block* block,
       bool part_of_clause = false;
       if (candidate->isVMEM()) {
          bool same_resource = candidate->operands[1].tempId() == current->operands[1].tempId();
+         bool can_reorder = can_reorder_vmem || can_reorder_candidate;
          int grab_dist = clause_insert_idx - candidate_idx;
          /* We can't easily tell how much this will decrease the def-to-use
           * distances, so just use how far it will be moved as a heuristic. */
-         part_of_clause = same_resource && grab_dist < clause_max_grab_dist;
+         part_of_clause = can_reorder && same_resource && grab_dist < clause_max_grab_dist;
       }
 
       /* if current depends on candidate, add additional dependencies and continue */
@@ -522,6 +539,8 @@ void schedule_VMEM(sched_ctx& ctx, Block* block,
             }
          }
          register_pressure_clause.update(register_demand[candidate_idx]);
+         can_reorder_smem &= candidate->format != Format::SMEM || can_reorder_candidate;
+         can_reorder_vmem &= !candidate->isVMEM() || can_reorder_candidate;
          continue;
       }
 
@@ -555,6 +574,8 @@ void schedule_VMEM(sched_ctx& ctx, Block* block,
             }
          }
          register_pressure_clause.update(register_demand[candidate_idx]);
+         can_reorder_smem &= candidate->format != Format::SMEM || can_reorder_candidate;
+         can_reorder_vmem &= !candidate->isVMEM() || can_reorder_candidate;
          continue;
       }
 
@@ -605,12 +626,16 @@ void schedule_VMEM(sched_ctx& ctx, Block* block,
    int insert_idx = idx;
    moving_interaction = barrier_none;
    moving_spill = false;
+   // TODO: differentiate between loads and stores (load-load can always reorder)
+   can_reorder_vmem = true;
+   can_reorder_smem = true;
 
    bool found_dependency = false;
    /* second, check if we have instructions after current to move up */
    for (int candidate_idx = idx + 1; k < max_moves && candidate_idx < (int) idx + window_size; candidate_idx++) {
       assert(candidate_idx < (int) block->instructions.size());
       aco_ptr<Instruction>& candidate = block->instructions[candidate_idx];
+      bool can_reorder_candidate = can_reorder(candidate.get());
 
       if (candidate->opcode == aco_opcode::p_logical_end)
          break;
@@ -623,7 +648,11 @@ void schedule_VMEM(sched_ctx& ctx, Block* block,
          break;
 
       /* check if candidate depends on current */
-      bool is_dependency = !can_reorder(candidate.get(), true) && !can_reorder_cur;
+      bool is_dependency = false;
+      if (candidate->format == Format::SMEM)
+         is_dependency = !can_reorder_smem && !can_reorder_candidate;
+      if (candidate->isVMEM())
+         is_dependency = !can_reorder_vmem && !can_reorder_candidate;
       for (const Operand& op : candidate->operands) {
          if (op.isTemp() && ctx.depends_on[op.tempId()]) {
             is_dependency = true;
@@ -645,6 +674,10 @@ void schedule_VMEM(sched_ctx& ctx, Block* block,
             if (op.isTemp())
                ctx.RAR_dependencies[op.tempId()] = true;
          }
+         /* update flag whether we can reorder other memory instructions */
+         can_reorder_smem &= candidate->format != Format::SMEM || can_reorder_candidate;
+         can_reorder_vmem &= !candidate->isVMEM() || can_reorder_candidate;
+
          if (!found_dependency) {
             insert_idx = candidate_idx;
             found_dependency = true;
@@ -652,7 +685,9 @@ void schedule_VMEM(sched_ctx& ctx, Block* block,
             register_pressure = register_demand[insert_idx - 1];
             continue;
          }
+
       } else if (candidate->isVMEM()) {
+         /* don't move up dependencies of other VMEM instructions */
          for (const Definition& def : candidate->definitions) {
             if (def.isTemp())
                ctx.depends_on[def.tempId()] = true;
@@ -681,6 +716,8 @@ void schedule_VMEM(sched_ctx& ctx, Block* block,
             if (op.isTemp())
                ctx.RAR_dependencies[op.tempId()] = true;
          }
+         can_reorder_smem &= candidate->format != Format::SMEM || can_reorder_candidate;
+         can_reorder_vmem &= !candidate->isVMEM() || can_reorder_candidate;
          continue;
       }