From 2c74fc98b803dfbc4b8970e07daa944e2d591bb9 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Daniel=20Sch=C3=BCrmann?= Date: Thu, 2 Apr 2020 19:13:03 +0100 Subject: [PATCH] aco: create helper function to collect variables from register area MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Reviewed-by: Rhys Perry Reviewed-By: Timur Kristóf Part-of: --- src/amd/compiler/aco_register_allocation.cpp | 59 ++++++++++++-------- 1 file changed, 35 insertions(+), 24 deletions(-) diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp index 8e6575ef58e..d12856a3fbb 100644 --- a/src/amd/compiler/aco_register_allocation.cpp +++ b/src/amd/compiler/aco_register_allocation.cpp @@ -360,6 +360,35 @@ std::pair get_reg_simple(ra_ctx& ctx, return {{}, false}; } +/* collect variables from a register area and clear reg_file */ +std::set> collect_vars(ra_ctx& ctx, RegisterFile& reg_file, + PhysReg reg, unsigned size) +{ + std::set> vars; + for (unsigned j = reg; j < reg + size; j++) { + if (reg_file.is_blocked(PhysReg{j})) + continue; + if (reg_file[j] == 0xF0000000) { + for (unsigned k = 0; k < 4; k++) { + unsigned id = reg_file.subdword_regs[j][k]; + if (id) { + std::pair assignment = ctx.assignments.at(id); + vars.emplace(assignment.second.bytes(), id); + reg_file.clear(assignment.first, assignment.second); + if (!reg_file[j]) + break; + } + } + } else if (reg_file[j] != 0) { + unsigned id = reg_file[j]; + std::pair assignment = ctx.assignments.at(id); + vars.emplace(assignment.second.bytes(), id); + reg_file.clear(assignment.first, assignment.second); + } + } + return vars; +} + bool get_regs_for_copies(ra_ctx& ctx, RegisterFile& reg_file, std::vector>& parallelcopies, @@ -375,7 +404,7 @@ bool get_regs_for_copies(ra_ctx& ctx, for (std::set>::const_reverse_iterator it = vars.rbegin(); it != vars.rend(); ++it) { unsigned id = it->second; std::pair var = ctx.assignments[id]; - uint32_t size = it->first; + uint32_t size = var.second.size(); uint32_t stride = 1; if (var.second.type() == RegType::sgpr) { if (size == 2) @@ -493,15 +522,7 @@ bool get_regs_for_copies(ra_ctx& ctx, reg_hi = best_pos + size - 1; /* collect variables and block reg file */ - std::set> new_vars; - for (unsigned j = reg_lo; j <= reg_hi; j++) { - if (reg_file[j] != 0) { - unsigned size = ctx.assignments[reg_file[j]].second.size(); - unsigned id = reg_file[j]; - new_vars.emplace(size, id); - reg_file.clear(ctx.assignments[id].first, ctx.assignments[id].second); - } - } + std::set> new_vars = collect_vars(ctx, reg_file, PhysReg{reg_lo}, size); /* mark the area as blocked */ reg_file.block(PhysReg{reg_lo}, size * 4); @@ -637,12 +658,7 @@ std::pair get_reg_impl(ra_ctx& ctx, RegisterFile register_file = reg_file; /* now, we figured the placement for our definition */ - std::set> vars; - for (unsigned j = best_pos; j < best_pos + size; j++) { - if (reg_file[j] != 0xFFFFFFFF && reg_file[j] != 0) - vars.emplace(ctx.assignments[reg_file[j]].second.size(), reg_file[j]); - reg_file.clear(ctx.assignments[reg_file[j]].first, ctx.assignments[reg_file[j]].second); - } + std::set> vars = collect_vars(ctx, reg_file, PhysReg{best_pos}, size); if (instr->opcode == aco_opcode::p_create_vector) { /* move killed operands which aren't yet at the correct position */ @@ -651,7 +667,7 @@ std::pair get_reg_impl(ra_ctx& ctx, instr->operands[i].getTemp().type() == rc.type()) { if (instr->operands[i].physReg() != best_pos + offset) { - vars.emplace(instr->operands[i].size(), instr->operands[i].tempId()); + vars.emplace(instr->operands[i].bytes(), instr->operands[i].tempId()); reg_file.clear(instr->operands[i]); } else { reg_file.fill(instr->operands[i]); @@ -878,12 +894,7 @@ PhysReg get_reg_create_vector(ra_ctx& ctx, return get_reg(ctx, reg_file, rc, parallelcopies, instr); /* collect variables to be moved */ - std::set> vars; - for (unsigned i = best_pos; i < best_pos + size; i++) { - if (reg_file[i] != 0) - vars.emplace(ctx.assignments[reg_file[i]].second.size(), reg_file[i]); - reg_file[i] = 0; - } + std::set> vars = collect_vars(ctx, reg_file, PhysReg{best_pos}, size); /* move killed operands which aren't yet at the correct position */ for (unsigned i = 0, offset = 0; i < instr->operands.size(); offset += instr->operands[i].size(), i++) { @@ -891,7 +902,7 @@ PhysReg get_reg_create_vector(ra_ctx& ctx, instr->operands[i].isFirstKillBeforeDef() && instr->operands[i].getTemp().type() == rc.type() && instr->operands[i].physReg() != best_pos + offset) - vars.emplace(instr->operands[i].size(), instr->operands[i].tempId()); + vars.emplace(instr->operands[i].bytes(), instr->operands[i].tempId()); } ASSERTED bool success = false; -- 2.30.2