From: Daniel Schürmann Date: Tue, 25 Feb 2020 20:40:38 +0000 (+0100) Subject: aco: adapt register allocation for subdword registers X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=031edbc4a54d5685b05e244f8aa1e094ec246eb5;p=mesa.git aco: adapt register allocation for subdword registers Reviewed-by: Rhys Perry Reviewed-By: Timur Kristóf Part-of: --- diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp index d12856a3fbb..43273531d3c 100644 --- a/src/amd/compiler/aco_register_allocation.cpp +++ b/src/amd/compiler/aco_register_allocation.cpp @@ -291,6 +291,33 @@ std::pair get_reg_simple(ra_ctx& ctx, uint32_t size, uint32_t stride, RegClass rc) { + if (rc.is_subdword()) { + for (std::pair> entry : reg_file.subdword_regs) { + assert(reg_file[entry.first] == 0xF0000000); + if (lb > entry.first || entry.first >= ub) + continue; + + for (unsigned i = 0; i < 4; i+= stride) { + if (entry.second[i] != 0) + continue; + + bool reg_found = true; + for (unsigned j = 1; reg_found && i + j < 4 && j < rc.bytes(); j++) + reg_found &= entry.second[i + j] == 0; + + /* check neighboring reg if needed */ + reg_found &= (i <= 4 - rc.bytes() || reg_file[entry.first + 1] == 0); + if (reg_found) { + PhysReg res{entry.first}; + res.reg_b += i; + return {res, true}; + } + } + } + + stride = 1; /* stride in full registers */ + } + /* best fit algorithm: find the smallest gap to fit in the variable */ if (stride == 1) { unsigned best_pos = 0xFFFF; @@ -481,6 +508,11 @@ bool get_regs_for_copies(ra_ctx& ctx, found = false; break; } + if (reg_file[j] == 0xF0000000) { + k += 1; + n++; + continue; + } /* we cannot split live ranges of linear vgprs */ if (ctx.assignments[reg_file[j]].second & (1 << 6)) { found = false; @@ -610,6 +642,12 @@ std::pair get_reg_impl(ra_ctx& ctx, continue; } + if (reg_file[j] == 0xF0000000) { + k += 1; + n++; + continue; + } + if (ctx.assignments[reg_file[j]].second.size() >= size) { found = false; break; @@ -1686,7 +1724,8 @@ void register_allocation(Program *program, std::vector> live_out_ if (definition.hasHint() && register_file[definition.physReg().reg()] == 0) definition.setFixed(definition.physReg()); else if (instr->opcode == aco_opcode::p_split_vector) { - PhysReg reg = PhysReg{instr->operands[0].physReg() + i * definition.size()}; + PhysReg reg = instr->operands[0].physReg(); + reg.reg_b += i * definition.bytes(); if (!get_reg_specified(ctx, register_file, definition.regClass(), parallelcopy, instr, reg)) reg = get_reg(ctx, register_file, definition.regClass(), parallelcopy, instr); definition.setFixed(reg); @@ -1704,8 +1743,8 @@ void register_allocation(Program *program, std::vector> live_out_ if (instr->operands[0].isKillBeforeDef() && instr->operands[0].getTemp().type() == definition.getTemp().type()) { reg = instr->operands[0].physReg(); - reg = PhysReg(reg.reg() + definition.size() * instr->operands[1].constantValue()); - assert(register_file[reg.reg()] == 0); + reg.reg_b += definition.bytes() * instr->operands[1].constantValue(); + assert(!register_file.test(reg, definition.bytes())); } else { reg = get_reg(ctx, register_file, definition.regClass(), parallelcopy, instr); } @@ -1724,12 +1763,12 @@ void register_allocation(Program *program, std::vector> live_out_ } else if (vectors.find(definition.tempId()) != vectors.end()) { Instruction* vec = vectors[definition.tempId()]; - unsigned offset = 0; + unsigned byte_offset = 0; for (const Operand& op : vec->operands) { if (op.isTemp() && op.tempId() == definition.tempId()) break; else - offset += op.size(); + byte_offset += op.bytes(); } unsigned k = 0; for (const Operand& op : vec->operands) { @@ -1738,19 +1777,19 @@ void register_allocation(Program *program, std::vector> live_out_ op.getTemp().type() == definition.getTemp().type() && ctx.assignments.find(op.tempId()) != ctx.assignments.end()) { PhysReg reg = ctx.assignments[op.tempId()].first; - reg = PhysReg(reg.reg() - k + offset); + reg.reg_b += (byte_offset - k); if (get_reg_specified(ctx, register_file, definition.regClass(), parallelcopy, instr, reg)) { definition.setFixed(reg); break; } } - k += op.size(); + k += op.bytes(); } if (!definition.isFixed()) { std::pair res = get_reg_vec(ctx, register_file, vec->definitions[0].regClass()); PhysReg reg = res.first; if (res.second) { - reg = PhysReg(reg.reg() + offset); + reg.reg_b += byte_offset; } else { reg = get_reg(ctx, register_file, definition.regClass(), parallelcopy, instr); }