From efe737fc4f8f76f7d0b3bd8655eafc3196576a3d Mon Sep 17 00:00:00 2001 From: =?utf8?q?Daniel=20Sch=C3=BCrmann?= Date: Thu, 31 Oct 2019 17:33:35 +0100 Subject: [PATCH] aco: fix accidential reordering of instructions when scheduling Fixes: 86786999189c43b4a2c8e1c1a18b55cd2f369fff "aco: implement VGPR spilling" Reviewed-by: Rhys Perry --- src/amd/compiler/aco_scheduler.cpp | 57 ++++++++++++++++++++++++------ 1 file changed, 47 insertions(+), 10 deletions(-) diff --git a/src/amd/compiler/aco_scheduler.cpp b/src/amd/compiler/aco_scheduler.cpp index 08e627ecc28..eb0bb0d93e9 100644 --- a/src/amd/compiler/aco_scheduler.cpp +++ b/src/amd/compiler/aco_scheduler.cpp @@ -172,11 +172,11 @@ bool can_move_instr(aco_ptr& instr, Instruction* current, int movin } } -bool can_reorder(Instruction* candidate, bool allow_smem) +bool can_reorder(Instruction* candidate) { switch (candidate->format) { case Format::SMEM: - return allow_smem || static_cast(candidate)->can_reorder; + return static_cast(candidate)->can_reorder; case Format::MUBUF: return static_cast(candidate)->can_reorder; case Format::MIMG: @@ -200,7 +200,7 @@ void schedule_SMEM(sched_ctx& ctx, Block* block, int window_size = SMEM_WINDOW_SIZE; int max_moves = SMEM_MAX_MOVES; int16_t k = 0; - bool can_reorder_cur = can_reorder(current, false); + bool can_reorder_cur = can_reorder(current); /* don't move s_memtime/s_memrealtime */ if (current->opcode == aco_opcode::s_memtime || current->opcode == aco_opcode::s_memrealtime) @@ -224,6 +224,7 @@ void schedule_SMEM(sched_ctx& ctx, Block* block, for (int candidate_idx = idx - 1; k < max_moves && candidate_idx > (int) idx - window_size; candidate_idx--) { assert(candidate_idx >= 0); aco_ptr& candidate = block->instructions[candidate_idx]; + bool can_reorder_candidate = can_reorder(candidate.get()); /* break if we'd make the previous SMEM instruction stall */ bool can_stall_prev_smem = idx <= ctx.last_SMEM_dep_idx && candidate_idx < ctx.last_SMEM_dep_idx; @@ -231,7 +232,7 @@ void schedule_SMEM(sched_ctx& ctx, Block* block, break; /* break when encountering another MEM instruction, logical_start or barriers */ - if (!can_reorder(candidate.get(), false) && !can_reorder_cur) + if (!can_reorder_candidate && !can_reorder_cur) break; if (candidate->opcode == aco_opcode::p_logical_start) break; @@ -239,6 +240,8 @@ void schedule_SMEM(sched_ctx& ctx, Block* block, break; if (!can_move_instr(candidate, current, moving_interaction)) break; + if (candidate->isVMEM()) + break; register_pressure.update(register_demand[candidate_idx]); /* if current depends on candidate, add additional dependencies and continue */ @@ -264,6 +267,7 @@ void schedule_SMEM(sched_ctx& ctx, Block* block, if (op.isTemp()) ctx.depends_on[op.tempId()] = true; } + can_reorder_cur &= can_reorder_candidate; continue; } @@ -280,6 +284,7 @@ void schedule_SMEM(sched_ctx& ctx, Block* block, if (op.isTemp()) ctx.depends_on[op.tempId()] = true; } + can_reorder_cur &= can_reorder_candidate; continue; } @@ -323,12 +328,14 @@ void schedule_SMEM(sched_ctx& ctx, Block* block, insert_idx = idx + 1; moving_interaction = barrier_none; moving_spill = false; + can_reorder_cur = true; bool found_dependency = false; /* second, check if we have instructions after current to move up */ for (int candidate_idx = idx + 1; k < max_moves && candidate_idx < (int) idx + window_size; candidate_idx++) { assert(candidate_idx < (int) block->instructions.size()); aco_ptr& candidate = block->instructions[candidate_idx]; + bool can_reorder_candidate = can_reorder(candidate.get()); if (candidate->opcode == aco_opcode::p_logical_end) break; @@ -369,7 +376,7 @@ void schedule_SMEM(sched_ctx& ctx, Block* block, } } - if (!can_reorder(candidate.get(), false) && !can_reorder_cur) + if (!can_reorder_candidate && !can_reorder_cur) break; if (!found_dependency) { @@ -380,8 +387,10 @@ void schedule_SMEM(sched_ctx& ctx, Block* block, /* update register pressure */ register_pressure.update(register_demand[candidate_idx - 1]); - if (is_dependency) + if (is_dependency) { + can_reorder_cur &= can_reorder_candidate; continue; + } assert(insert_idx != idx); // TODO: correctly calculate register pressure for this case @@ -392,6 +401,8 @@ void schedule_SMEM(sched_ctx& ctx, Block* block, register_pressure_unknown = true; } if (register_pressure_unknown) { + if (candidate->isVMEM()) + break; for (const Definition& def : candidate->definitions) { if (def.isTemp()) ctx.RAR_dependencies[def.tempId()] = true; @@ -400,6 +411,7 @@ void schedule_SMEM(sched_ctx& ctx, Block* block, if (op.isTemp()) ctx.RAR_dependencies[op.tempId()] = true; } + can_reorder_cur &= can_reorder_candidate; continue; } @@ -440,7 +452,10 @@ void schedule_VMEM(sched_ctx& ctx, Block* block, int max_moves = VMEM_MAX_MOVES; int clause_max_grab_dist = VMEM_CLAUSE_MAX_GRAB_DIST; int16_t k = 0; - bool can_reorder_cur = can_reorder(current, false); + /* initially true as we don't pull other VMEM instructions + * through the current instruction */ + bool can_reorder_vmem = true; + bool can_reorder_smem = true; /* create the initial set of values which current depends on */ std::fill(ctx.depends_on.begin(), ctx.depends_on.end(), false); @@ -467,9 +482,10 @@ void schedule_VMEM(sched_ctx& ctx, Block* block, for (int candidate_idx = idx - 1; k < max_moves && candidate_idx > (int) idx - window_size; candidate_idx--) { assert(candidate_idx >= 0); aco_ptr& candidate = block->instructions[candidate_idx]; + bool can_reorder_candidate = can_reorder(candidate.get()); /* break when encountering another VMEM instruction, logical_start or barriers */ - if (!can_reorder(candidate.get(), true) && !can_reorder_cur) + if (!can_reorder_smem && candidate->format == Format::SMEM && !can_reorder_candidate) break; if (candidate->opcode == aco_opcode::p_logical_start) break; @@ -487,10 +503,11 @@ void schedule_VMEM(sched_ctx& ctx, Block* block, bool part_of_clause = false; if (candidate->isVMEM()) { bool same_resource = candidate->operands[1].tempId() == current->operands[1].tempId(); + bool can_reorder = can_reorder_vmem || can_reorder_candidate; int grab_dist = clause_insert_idx - candidate_idx; /* We can't easily tell how much this will decrease the def-to-use * distances, so just use how far it will be moved as a heuristic. */ - part_of_clause = same_resource && grab_dist < clause_max_grab_dist; + part_of_clause = can_reorder && same_resource && grab_dist < clause_max_grab_dist; } /* if current depends on candidate, add additional dependencies and continue */ @@ -522,6 +539,8 @@ void schedule_VMEM(sched_ctx& ctx, Block* block, } } register_pressure_clause.update(register_demand[candidate_idx]); + can_reorder_smem &= candidate->format != Format::SMEM || can_reorder_candidate; + can_reorder_vmem &= !candidate->isVMEM() || can_reorder_candidate; continue; } @@ -555,6 +574,8 @@ void schedule_VMEM(sched_ctx& ctx, Block* block, } } register_pressure_clause.update(register_demand[candidate_idx]); + can_reorder_smem &= candidate->format != Format::SMEM || can_reorder_candidate; + can_reorder_vmem &= !candidate->isVMEM() || can_reorder_candidate; continue; } @@ -605,12 +626,16 @@ void schedule_VMEM(sched_ctx& ctx, Block* block, int insert_idx = idx; moving_interaction = barrier_none; moving_spill = false; + // TODO: differentiate between loads and stores (load-load can always reorder) + can_reorder_vmem = true; + can_reorder_smem = true; bool found_dependency = false; /* second, check if we have instructions after current to move up */ for (int candidate_idx = idx + 1; k < max_moves && candidate_idx < (int) idx + window_size; candidate_idx++) { assert(candidate_idx < (int) block->instructions.size()); aco_ptr& candidate = block->instructions[candidate_idx]; + bool can_reorder_candidate = can_reorder(candidate.get()); if (candidate->opcode == aco_opcode::p_logical_end) break; @@ -623,7 +648,11 @@ void schedule_VMEM(sched_ctx& ctx, Block* block, break; /* check if candidate depends on current */ - bool is_dependency = !can_reorder(candidate.get(), true) && !can_reorder_cur; + bool is_dependency = false; + if (candidate->format == Format::SMEM) + is_dependency = !can_reorder_smem && !can_reorder_candidate; + if (candidate->isVMEM()) + is_dependency = !can_reorder_vmem && !can_reorder_candidate; for (const Operand& op : candidate->operands) { if (op.isTemp() && ctx.depends_on[op.tempId()]) { is_dependency = true; @@ -645,6 +674,10 @@ void schedule_VMEM(sched_ctx& ctx, Block* block, if (op.isTemp()) ctx.RAR_dependencies[op.tempId()] = true; } + /* update flag whether we can reorder other memory instructions */ + can_reorder_smem &= candidate->format != Format::SMEM || can_reorder_candidate; + can_reorder_vmem &= !candidate->isVMEM() || can_reorder_candidate; + if (!found_dependency) { insert_idx = candidate_idx; found_dependency = true; @@ -652,7 +685,9 @@ void schedule_VMEM(sched_ctx& ctx, Block* block, register_pressure = register_demand[insert_idx - 1]; continue; } + } else if (candidate->isVMEM()) { + /* don't move up dependencies of other VMEM instructions */ for (const Definition& def : candidate->definitions) { if (def.isTemp()) ctx.depends_on[def.tempId()] = true; @@ -681,6 +716,8 @@ void schedule_VMEM(sched_ctx& ctx, Block* block, if (op.isTemp()) ctx.RAR_dependencies[op.tempId()] = true; } + can_reorder_smem &= candidate->format != Format::SMEM || can_reorder_candidate; + can_reorder_vmem &= !candidate->isVMEM() || can_reorder_candidate; continue; } -- 2.30.2