From 34424b81df6e5ffb2d22c572864ab6f6b4ac1abb Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Fri, 7 Feb 2020 11:55:43 +0000 Subject: [PATCH] aco: make PhysReg in units of bytes MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Reviewed-by: Daniel Schürmann Reviewed-By: Timur Kristóf Part-of: --- src/amd/compiler/aco_assembler.cpp | 2 +- src/amd/compiler/aco_insert_waitcnt.cpp | 4 +- src/amd/compiler/aco_ir.h | 18 +++++---- src/amd/compiler/aco_lower_to_hw_instr.cpp | 10 ++--- src/amd/compiler/aco_print_ir.cpp | 2 +- src/amd/compiler/aco_register_allocation.cpp | 42 ++++++++++---------- 6 files changed, 40 insertions(+), 38 deletions(-) diff --git a/src/amd/compiler/aco_assembler.cpp b/src/amd/compiler/aco_assembler.cpp index 9544fc35bd4..c46208b13b4 100644 --- a/src/amd/compiler/aco_assembler.cpp +++ b/src/amd/compiler/aco_assembler.cpp @@ -156,7 +156,7 @@ void emit_instruction(asm_context& ctx, std::vector& out, Instruction* encoding |= instr->operands.size() ? (instr->operands[0].physReg() >> 1) << 9 : 0; if (instr->operands.size() >= 2) { if (!instr->operands[1].isConstant() || instr->operands[1].constantValue() >= 1024) { - encoding |= instr->operands[1].physReg().reg; + encoding |= instr->operands[1].physReg().reg(); } else { encoding |= instr->operands[1].constantValue() >> 2; encoding |= 1 << 8; diff --git a/src/amd/compiler/aco_insert_waitcnt.cpp b/src/amd/compiler/aco_insert_waitcnt.cpp index c0a93e3a929..fc874ae793c 100644 --- a/src/amd/compiler/aco_insert_waitcnt.cpp +++ b/src/amd/compiler/aco_insert_waitcnt.cpp @@ -685,7 +685,7 @@ void insert_wait_entry(wait_ctx& ctx, PhysReg reg, RegClass rc, wait_event event wait_entry new_entry(event, imm, !rc.is_linear(), wait_on_read); for (unsigned i = 0; i < rc.size(); i++) { - auto it = ctx.gpr_map.emplace(PhysReg{reg.reg+i}, new_entry); + auto it = ctx.gpr_map.emplace(PhysReg{reg.reg()+i}, new_entry); if (!it.second) it.first->second.join(new_entry); } @@ -696,7 +696,7 @@ void insert_wait_entry(wait_ctx& ctx, PhysReg reg, RegClass rc, wait_event event unsigned i = u_bit_scan(&counters_todo); ctx.unwaited_instrs[i].insert(std::make_pair(ctx.gen_instr, 0u)); for (unsigned j = 0; j < rc.size(); j++) - ctx.reg_instrs[i][PhysReg{reg.reg+j}].insert(ctx.gen_instr); + ctx.reg_instrs[i][PhysReg{reg.reg()+j}].insert(ctx.gen_instr); } } } diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h index a62525016fb..986b2d666be 100644 --- a/src/amd/compiler/aco_ir.h +++ b/src/amd/compiler/aco_ir.h @@ -267,10 +267,12 @@ private: */ struct PhysReg { constexpr PhysReg() = default; - explicit constexpr PhysReg(unsigned r) : reg(r) {} - constexpr operator unsigned() const { return reg; } + explicit constexpr PhysReg(unsigned r) : reg_b(r << 2) {} + constexpr unsigned reg() const { return reg_b >> 2; } + constexpr unsigned byte() const { return reg_b & 0x3; } + constexpr operator unsigned() const { return reg(); } - uint16_t reg = 0; + uint16_t reg_b = 0; }; /* helper expressions for special registers */ @@ -475,12 +477,12 @@ public: constexpr uint64_t constantValue64(bool signext=false) const noexcept { if (is64BitConst_) { - if (reg_.reg <= 192) - return reg_.reg - 128; - else if (reg_.reg <= 208) - return 0xFFFFFFFFFFFFFFFF - (reg_.reg - 193); + if (reg_ <= 192) + return reg_ - 128; + else if (reg_ <= 208) + return 0xFFFFFFFFFFFFFFFF - (reg_ - 193); - switch (reg_.reg) { + switch (reg_) { case 240: return 0x3FE0000000000000; case 241: diff --git a/src/amd/compiler/aco_lower_to_hw_instr.cpp b/src/amd/compiler/aco_lower_to_hw_instr.cpp index 606f2fde65c..0dcdf7084f2 100644 --- a/src/amd/compiler/aco_lower_to_hw_instr.cpp +++ b/src/amd/compiler/aco_lower_to_hw_instr.cpp @@ -195,15 +195,15 @@ void emit_int64_op(lower_context *ctx, PhysReg dst_reg, PhysReg src0_reg, PhysRe { Builder bld(ctx->program, &ctx->instructions); Definition dst[] = {Definition(dst_reg, v1), Definition(PhysReg{dst_reg+1}, v1)}; - RegClass src0_rc = src0_reg.reg >= 256 ? v1 : s1; + RegClass src0_rc = src0_reg.reg() >= 256 ? v1 : s1; Operand src0[] = {Operand(src0_reg, src0_rc), Operand(PhysReg{src0_reg+1}, src0_rc)}; Operand src1[] = {Operand(src1_reg, v1), Operand(PhysReg{src1_reg+1}, v1)}; - Operand src0_64 = Operand(src0_reg, src0_reg.reg >= 256 ? v2 : s2); + Operand src0_64 = Operand(src0_reg, src0_reg.reg() >= 256 ? v2 : s2); Operand src1_64 = Operand(src1_reg, v2); if (src0_rc == s1 && (op == imul64 || op == umin64 || op == umax64 || op == imin64 || op == imax64)) { - assert(vtmp.reg != 0); + assert(vtmp.reg() != 0); bld.vop1(aco_opcode::v_mov_b32, Definition(vtmp, v1), src0[0]); bld.vop1(aco_opcode::v_mov_b32, Definition(PhysReg{vtmp+1}, v1), src0[1]); src0_reg = vtmp; @@ -211,7 +211,7 @@ void emit_int64_op(lower_context *ctx, PhysReg dst_reg, PhysReg src0_reg, PhysRe src0[1] = Operand(PhysReg{vtmp+1}, v1); src0_64 = Operand(vtmp, v2); } else if (src0_rc == s1 && op == iadd64) { - assert(vtmp.reg != 0); + assert(vtmp.reg() != 0); bld.vop1(aco_opcode::v_mov_b32, Definition(PhysReg{vtmp+1}, v1), src0[1]); src0[1] = Operand(PhysReg{vtmp+1}, v1); } @@ -330,7 +330,7 @@ void emit_op(lower_context *ctx, PhysReg dst_reg, PhysReg src0_reg, PhysReg src1 Builder bld(ctx->program, &ctx->instructions); RegClass rc = RegClass(RegType::vgpr, size); Definition dst(dst_reg, rc); - Operand src0(src0_reg, RegClass(src0_reg.reg >= 256 ? RegType::vgpr : RegType::sgpr, size)); + Operand src0(src0_reg, RegClass(src0_reg.reg() >= 256 ? RegType::vgpr : RegType::sgpr, size)); Operand src1(src1_reg, rc); aco_opcode opcode = get_reduce_opcode(ctx->program->chip_class, op); diff --git a/src/amd/compiler/aco_print_ir.cpp b/src/amd/compiler/aco_print_ir.cpp index 8f89236ff90..7564b52c17c 100644 --- a/src/amd/compiler/aco_print_ir.cpp +++ b/src/amd/compiler/aco_print_ir.cpp @@ -126,7 +126,7 @@ static void print_operand(const Operand *operand, FILE *output) if (operand->isLiteral()) { fprintf(output, "0x%x", operand->constantValue()); } else if (operand->isConstant()) { - print_constant(operand->physReg().reg, output); + print_constant(operand->physReg().reg(), output); } else if (operand->isUndefined()) { print_reg_class(operand->regClass(), output); fprintf(output, "undef"); diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp index c3726acf1b2..1a6775121fe 100644 --- a/src/amd/compiler/aco_register_allocation.cpp +++ b/src/amd/compiler/aco_register_allocation.cpp @@ -212,7 +212,7 @@ void update_renames(ra_ctx& ctx, RegisterFile& reg_file, // FIXME: if a definition got moved, change the target location and remove the parallelcopy copy.second.setTemp(Temp(ctx.program->allocateId(), copy.second.regClass())); ctx.assignments[copy.second.tempId()] = {copy.second.physReg(), copy.second.regClass()}; - for (unsigned i = copy.second.physReg().reg; i < copy.second.physReg() + copy.second.size(); i++) + for (unsigned i = copy.second.physReg().reg(); i < copy.second.physReg() + copy.second.size(); i++) reg_file[i] = copy.second.tempId(); /* check if we moved an operand */ for (Operand& op : instr->operands) { @@ -223,8 +223,8 @@ void update_renames(ra_ctx& ctx, RegisterFile& reg_file, for (std::pair& pc : parallelcopies) { PhysReg def_reg = pc.second.physReg(); omit_renaming &= def_reg > copy.first.physReg() ? - (copy.first.physReg() + copy.first.size() <= def_reg.reg) : - (def_reg + pc.second.size() <= copy.first.physReg().reg); + (copy.first.physReg() + copy.first.size() <= def_reg.reg()) : + (def_reg + pc.second.size() <= copy.first.physReg().reg()); } if (omit_renaming) continue; @@ -492,7 +492,7 @@ std::pair get_reg_impl(ra_ctx& ctx, instr->operands[j].physReg() >= lb && instr->operands[j].physReg() < ub) { assert(instr->operands[j].isFixed()); - assert(reg_file[instr->operands[j].physReg().reg] == 0); + assert(reg_file[instr->operands[j].physReg()] == 0); reg_file.fill(instr->operands[j].physReg(), instr->operands[j].size(), 0xFFFFFFFF); killed_ops += instr->operands[j].getTemp().size(); } @@ -879,7 +879,7 @@ bool get_reg_specified(ra_ctx& ctx, ub = ctx.program->max_reg_demand.sgpr; } - uint32_t reg_lo = reg.reg; + uint32_t reg_lo = reg.reg(); uint32_t reg_hi = reg + (size - 1); if (reg_lo < lb || reg_hi >= ub || reg_lo > reg_hi) @@ -930,7 +930,7 @@ void handle_pseudo(ra_ctx& ctx, return; Pseudo_instruction *pi = (Pseudo_instruction *)instr; - if (reg_file[scc.reg]) { + if (reg_file[scc.reg()]) { pi->tmp_in_scc = true; int reg = ctx.max_used_sgpr; @@ -1385,7 +1385,7 @@ void register_allocation(Program *program, std::vector> live_out_ /* fill in sgpr_live_in */ for (unsigned i = 0; i <= ctx.max_used_sgpr; i++) sgpr_live_in[block.index][i] = register_file[i]; - sgpr_live_in[block.index][127] = register_file[scc.reg]; + sgpr_live_in[block.index][127] = register_file[scc.reg()]; /* Handle all other instructions of the block */ for (; it != block.instructions.end(); ++it) { @@ -1445,8 +1445,8 @@ void register_allocation(Program *program, std::vector> live_out_ } else { /* check if target reg is blocked, and move away the blocking var */ - if (register_file[operand.physReg().reg]) { - uint32_t blocking_id = register_file[operand.physReg().reg]; + if (register_file[operand.physReg().reg()]) { + uint32_t blocking_id = register_file[operand.physReg().reg()]; RegClass rc = ctx.assignments[blocking_id].second; Operand pc_op = Operand(Temp{blocking_id, rc}); pc_op.setFixed(operand.physReg()); @@ -1503,7 +1503,7 @@ void register_allocation(Program *program, std::vector> live_out_ (instr->isVMEM() && i == 3 && program->chip_class == GFX6) || (instr->format == Format::DS && static_cast(instr.get())->gds)) { for (unsigned j = 0; j < operand.size(); j++) - ctx.war_hint.set(operand.physReg().reg + j); + ctx.war_hint.set(operand.physReg().reg() + j); } } std::map::iterator phi = phi_map.find(operand.getTemp().id()); @@ -1563,11 +1563,11 @@ void register_allocation(Program *program, std::vector> live_out_ adjust_max_used_regs(ctx, definition.regClass(), definition.physReg()); /* check if the target register is blocked */ - if (register_file[definition.physReg().reg] != 0) { + if (register_file[definition.physReg().reg()] != 0) { /* create parallelcopy pair to move blocking var */ Temp tmp = {register_file[definition.physReg()], ctx.assignments[register_file[definition.physReg()]].second}; Operand pc_op = Operand(tmp); - pc_op.setFixed(ctx.assignments[register_file[definition.physReg().reg]].first); + pc_op.setFixed(ctx.assignments[register_file[definition.physReg().reg()]].first); RegClass rc = pc_op.regClass(); tmp = Temp{program->allocateId(), rc}; Definition pc_def = Definition(tmp); @@ -1621,7 +1621,7 @@ void register_allocation(Program *program, std::vector> live_out_ continue; /* find free reg */ - if (definition.hasHint() && register_file[definition.physReg().reg] == 0) + if (definition.hasHint() && register_file[definition.physReg().reg()] == 0) definition.setFixed(definition.physReg()); else if (instr->opcode == aco_opcode::p_split_vector) { PhysReg reg = PhysReg{instr->operands[0].physReg() + i * definition.size()}; @@ -1632,7 +1632,7 @@ void register_allocation(Program *program, std::vector> live_out_ PhysReg reg; if (instr->operands[0].isKillBeforeDef() && instr->operands[0].getTemp().type() == definition.getTemp().type()) { reg = instr->operands[0].physReg(); - assert(register_file[reg.reg] == 0); + assert(register_file[reg.reg()] == 0); } else { reg = get_reg(ctx, register_file, definition.regClass(), parallelcopy, instr); } @@ -1642,8 +1642,8 @@ void register_allocation(Program *program, std::vector> live_out_ if (instr->operands[0].isKillBeforeDef() && instr->operands[0].getTemp().type() == definition.getTemp().type()) { reg = instr->operands[0].physReg(); - reg.reg += definition.size() * instr->operands[1].constantValue(); - assert(register_file[reg.reg] == 0); + reg = PhysReg(reg.reg() + definition.size() * instr->operands[1].constantValue()); + assert(register_file[reg.reg()] == 0); } else { reg = get_reg(ctx, register_file, definition.regClass(), parallelcopy, instr); } @@ -1676,7 +1676,7 @@ void register_allocation(Program *program, std::vector> live_out_ op.getTemp().type() == definition.getTemp().type() && ctx.assignments.find(op.tempId()) != ctx.assignments.end()) { PhysReg reg = ctx.assignments[op.tempId()].first; - reg.reg = reg - k + offset; + reg = PhysReg(reg.reg() - k + offset); if (get_reg_specified(ctx, register_file, definition.regClass(), parallelcopy, instr, reg)) { definition.setFixed(reg); break; @@ -1688,7 +1688,7 @@ void register_allocation(Program *program, std::vector> live_out_ std::pair res = get_reg_vec(ctx, register_file, vec->definitions[0].regClass()); PhysReg reg = res.first; if (res.second) { - reg.reg += offset; + reg = PhysReg(reg.reg() + offset); } else { reg = get_reg(ctx, register_file, definition.regClass(), parallelcopy, instr); } @@ -1726,17 +1726,17 @@ void register_allocation(Program *program, std::vector> live_out_ if (!parallelcopy.empty()) { aco_ptr pc; pc.reset(create_instruction(aco_opcode::p_parallelcopy, Format::PSEUDO, parallelcopy.size(), parallelcopy.size())); - bool temp_in_scc = register_file[scc.reg]; + bool temp_in_scc = register_file[scc.reg()]; bool sgpr_operands_alias_defs = false; uint64_t sgpr_operands[4] = {0, 0, 0, 0}; for (unsigned i = 0; i < parallelcopy.size(); i++) { if (temp_in_scc && parallelcopy[i].first.isTemp() && parallelcopy[i].first.getTemp().type() == RegType::sgpr) { if (!sgpr_operands_alias_defs) { - unsigned reg = parallelcopy[i].first.physReg().reg; + unsigned reg = parallelcopy[i].first.physReg().reg(); unsigned size = parallelcopy[i].first.getTemp().size(); sgpr_operands[reg / 64u] |= ((1u << size) - 1) << (reg % 64u); - reg = parallelcopy[i].second.physReg().reg; + reg = parallelcopy[i].second.physReg().reg(); size = parallelcopy[i].second.getTemp().size(); if (sgpr_operands[reg / 64u] & ((1u << size) - 1) << (reg % 64u)) sgpr_operands_alias_defs = true; -- 2.30.2