From: Giacomo Gabrielli Date: Mon, 26 Feb 2018 13:41:08 +0000 (+0000) Subject: cpu-o3: Add support for pinned writes X-Git-Tag: v19.0.0.0~794 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=fc61172dbe4e3a93f941227a1f36b7f07e97ab68;p=gem5.git cpu-o3: Add support for pinned writes This patch adds support for pinning registers for a certain number of consecutive writes. This is only relevant for timing CPU models (functional-only models are unaffected), and it is primarily needed to provide a realistic execution model for micro-coded operations whose microops can write to non-overlapping portions of a destination register, e.g. vector gather loads. In those cases, this mechanism can disable renaming for a sequence of consecutive writes, thus making the resulting execution more efficient: allocating a new physical register for each microop would introduce a read-modify-write chain of dependencies, while with these modifications the microops can write back in parallel. Please note that this new feature is only leveraged by O3CPU for the time being. Additional authors: - Gabor Dozsa Change-Id: I07eb5fdbd1fa0b748c9bdc1174d9f330fda34f81 Signed-off-by: Giacomo Gabrielli Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/13520 Reviewed-by: Andreas Sandberg Maintainer: Andreas Sandberg Tested-by: kokoro --- diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh index 22a32ec10..6f9555a38 100644 --- a/src/cpu/base_dyn_inst.hh +++ b/src/cpu/base_dyn_inst.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011, 2013, 2016-2018 ARM Limited + * Copyright (c) 2011, 2013, 2016-2019 ARM Limited * Copyright (c) 2013 Advanced Micro Devices, Inc. * All rights reserved. * @@ -116,6 +116,9 @@ class BaseDynInst : public ExecContext, public RefCounted SquashedInIQ, /// Instruction is squashed in the IQ SquashedInLSQ, /// Instruction is squashed in the LSQ SquashedInROB, /// Instruction is squashed in the ROB + PinnedRegsRenamed, /// Pinned registers are renamed + PinnedRegsWritten, /// Pinned registers are written back + PinnedRegsSquashDone, /// Regs pinning status updated after squash RecoverInst, /// Is a recover instruction BlockingInst, /// Is a blocking instruction ThreadsyncWait, /// Is a thread synchronization instruction @@ -173,12 +176,14 @@ class BaseDynInst : public ExecContext, public RefCounted /** PC state for this instruction. */ TheISA::PCState pc; + private: /* An amalgamation of a lot of boolean values into one */ std::bitset instFlags; /** The status of this BaseDynInst. Several bits can be set. */ std::bitset status; + protected: /** Whether or not the source register is ready. * @todo: Not sure this should be here vs the derived class. */ @@ -385,6 +390,8 @@ class BaseDynInst : public ExecContext, public RefCounted { _destRegIdx[idx] = renamed_dest; _prevDestRegIdx[idx] = previous_rename; + if (renamed_dest->isPinned()) + setPinnedRegsRenamed(); } /** Renames a source logical register to the physical register which @@ -767,7 +774,7 @@ class BaseDynInst : public ExecContext, public RefCounted bool isCommitted() const { return status[Committed]; } /** Sets this instruction as squashed. */ - void setSquashed() { status.set(Squashed); } + void setSquashed(); /** Returns whether or not this instruction is squashed. */ bool isSquashed() const { return status[Squashed]; } @@ -802,7 +809,7 @@ class BaseDynInst : public ExecContext, public RefCounted bool isInLSQ() const { return status[LsqEntry]; } /** Sets this instruction as squashed in the LSQ. */ - void setSquashedInLSQ() { status.set(SquashedInLSQ);} + void setSquashedInLSQ() { status.set(SquashedInLSQ); status.set(Squashed);} /** Returns whether or not this instruction is squashed in the LSQ. */ bool isSquashedInLSQ() const { return status[SquashedInLSQ]; } @@ -825,6 +832,41 @@ class BaseDynInst : public ExecContext, public RefCounted /** Returns whether or not this instruction is squashed in the ROB. */ bool isSquashedInROB() const { return status[SquashedInROB]; } + /** Returns whether pinned registers are renamed */ + bool isPinnedRegsRenamed() const { return status[PinnedRegsRenamed]; } + + /** Sets the destination registers as renamed */ + void + setPinnedRegsRenamed() + { + assert(!status[PinnedRegsSquashDone]); + assert(!status[PinnedRegsWritten]); + status.set(PinnedRegsRenamed); + } + + /** Returns whether destination registers are written */ + bool isPinnedRegsWritten() const { return status[PinnedRegsWritten]; } + + /** Sets destination registers as written */ + void + setPinnedRegsWritten() + { + assert(!status[PinnedRegsSquashDone]); + assert(status[PinnedRegsRenamed]); + status.set(PinnedRegsWritten); + } + + /** Return whether dest registers' pinning status updated after squash */ + bool + isPinnedRegsSquashDone() const { return status[PinnedRegsSquashDone]; } + + /** Sets dest registers' status updated after squash */ + void + setPinnedRegsSquashDone() { + assert(!status[PinnedRegsSquashDone]); + status.set(PinnedRegsSquashDone); + } + /** Read the PC state of this instruction. */ TheISA::PCState pcState() const { return pc; } diff --git a/src/cpu/base_dyn_inst_impl.hh b/src/cpu/base_dyn_inst_impl.hh index 6d3a3ac4e..41eb64c17 100644 --- a/src/cpu/base_dyn_inst_impl.hh +++ b/src/cpu/base_dyn_inst_impl.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011, 2018 ARM Limited + * Copyright (c) 2011, 2019 ARM Limited * All rights reserved. * * The license below extends only to copyright in the software and shall @@ -237,4 +237,34 @@ BaseDynInst::eaSrcsReady() const return true; } + + +template +void +BaseDynInst::setSquashed() +{ + status.set(Squashed); + + if (!isPinnedRegsRenamed() || isPinnedRegsSquashDone()) + return; + + // This inst has been renamed already so it may go through rename + // again (e.g. if the squash is due to memory access order violation). + // Reset the write counters for all pinned destination register to ensure + // that they are in a consistent state for a possible re-rename. This also + // ensures that dest regs will be pinned to the same phys register if + // re-rename happens. + for (int idx = 0; idx < numDestRegs(); idx++) { + PhysRegIdPtr phys_dest_reg = renamedDestRegIdx(idx); + if (phys_dest_reg->isPinned()) { + phys_dest_reg->incrNumPinnedWrites(); + if (isPinnedRegsWritten()) + phys_dest_reg->incrNumPinnedWritesToComplete(); + } + } + setPinnedRegsSquashDone(); +} + + + #endif//__CPU_BASE_DYN_INST_IMPL_HH__ diff --git a/src/cpu/o3/free_list.hh b/src/cpu/o3/free_list.hh index 46bebf30d..82ff25d3b 100644 --- a/src/cpu/o3/free_list.hh +++ b/src/cpu/o3/free_list.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2017 ARM Limited + * Copyright (c) 2016-2018 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -79,10 +79,9 @@ class SimpleFreeList template void addRegs(InputIt first, InputIt last) { - std::for_each(first, last, - [this](const typename InputIt::value_type& reg) { - this->freeRegs.push(®); - }); + std::for_each(first, last, [this](typename InputIt::value_type& reg) { + this->freeRegs.push(®); + }); } /** Get the next available register from the free list */ diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh index 56c182564..5872f90d2 100644 --- a/src/cpu/o3/iew_impl.hh +++ b/src/cpu/o3/iew_impl.hh @@ -1480,11 +1480,14 @@ DefaultIEW::writebackInsts() int dependents = instQueue.wakeDependents(inst); for (int i = 0; i < inst->numDestRegs(); i++) { - //mark as Ready - DPRINTF(IEW,"Setting Destination Register %i (%s)\n", - inst->renamedDestRegIdx(i)->index(), - inst->renamedDestRegIdx(i)->className()); - scoreboard->setReg(inst->renamedDestRegIdx(i)); + // Mark register as ready if not pinned + if (inst->renamedDestRegIdx(i)-> + getNumPinnedWritesToComplete() == 0) { + DPRINTF(IEW,"Setting Destination Register %i (%s)\n", + inst->renamedDestRegIdx(i)->index(), + inst->renamedDestRegIdx(i)->className()); + scoreboard->setReg(inst->renamedDestRegIdx(i)); + } } if (dependents) { diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh index 20b41e51d..c3e3fdf32 100644 --- a/src/cpu/o3/inst_queue_impl.hh +++ b/src/cpu/o3/inst_queue_impl.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011-2014, 2017-2018 ARM Limited + * Copyright (c) 2011-2014, 2017-2019 ARM Limited * Copyright (c) 2013 Advanced Micro Devices, Inc. * All rights reserved. * @@ -1033,6 +1033,17 @@ InstructionQueue::wakeDependents(const DynInstPtr &completed_inst) continue; } + // Avoid waking up dependents if the register is pinned + dest_reg->decrNumPinnedWritesToComplete(); + if (dest_reg->isPinned()) + completed_inst->setPinnedRegsWritten(); + + if (dest_reg->getNumPinnedWritesToComplete() != 0) { + DPRINTF(IQ, "Reg %d [%s] is pinned, skipping\n", + dest_reg->index(), dest_reg->className()); + continue; + } + DPRINTF(IQ, "Waking any dependents on register %i (%s).\n", dest_reg->index(), dest_reg->className()); diff --git a/src/cpu/o3/regfile.cc b/src/cpu/o3/regfile.cc index cc4bba6b0..afed8f9fa 100644 --- a/src/cpu/o3/regfile.cc +++ b/src/cpu/o3/regfile.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2017 ARM Limited + * Copyright (c) 2016-2018 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -216,8 +216,8 @@ PhysRegFile::getRegIds(RegClass cls) -> IdRange return std::make_pair(miscRegIds.begin(), miscRegIds.end()); } /* There is no way to make an empty iterator */ - return std::make_pair(PhysIds::const_iterator(), - PhysIds::const_iterator()); + return std::make_pair(PhysIds::iterator(), + PhysIds::iterator()); } PhysRegIdPtr diff --git a/src/cpu/o3/regfile.hh b/src/cpu/o3/regfile.hh index d4b6602ad..352e6ac93 100644 --- a/src/cpu/o3/regfile.hh +++ b/src/cpu/o3/regfile.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2017 ARM Limited + * Copyright (c) 2016-2018 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -70,8 +70,8 @@ class PhysRegFile using VecMode = Enums::VecRegRenameMode; using VecPredRegContainer = TheISA::VecPredRegContainer; public: - using IdRange = std::pair; + using IdRange = std::pair; private: static constexpr auto NumVecElemPerVecReg = TheISA::NumVecElemPerVecReg; diff --git a/src/cpu/o3/rename_impl.hh b/src/cpu/o3/rename_impl.hh index 43f0b2746..d55bed655 100644 --- a/src/cpu/o3/rename_impl.hh +++ b/src/cpu/o3/rename_impl.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010-2012, 2014-2016 ARM Limited + * Copyright (c) 2010-2012, 2014-2019 ARM Limited * Copyright (c) 2013 Advanced Micro Devices, Inc. * All rights reserved. * @@ -976,7 +976,9 @@ DefaultRename::doSquash(const InstSeqNum &squashed_seq_num, ThreadID tid) assert(hb_it != historyBuffer[tid].end()); DPRINTF(Rename, "[tid:%i] Removing history entry with sequence " - "number %i.\n", tid, hb_it->instSeqNum); + "number %i (archReg: %d, newPhysReg: %d, prevPhysReg: %d).\n", + tid, hb_it->instSeqNum, hb_it->archReg.index(), + hb_it->newPhysReg->index(), hb_it->prevPhysReg->index()); // Undo the rename mapping only if it was really a change. // Special regs that are not really renamed (like misc regs @@ -1140,12 +1142,12 @@ DefaultRename::renameDestRegs(const DynInstPtr &inst, ThreadID tid) typename RenameMap::RenameInfo rename_result; RegId flat_dest_regid = tc->flattenRegId(dest_reg); + flat_dest_regid.setNumPinnedWrites(dest_reg.getNumPinnedWrites()); rename_result = map->rename(flat_dest_regid); inst->flattenDestReg(dest_idx, flat_dest_regid); - // Mark Scoreboard entry as not ready scoreboard->unsetReg(rename_result.first); DPRINTF(Rename, diff --git a/src/cpu/o3/rename_map.cc b/src/cpu/o3/rename_map.cc index 9d912e582..64f3dbf8c 100644 --- a/src/cpu/o3/rename_map.cc +++ b/src/cpu/o3/rename_map.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2017,2019 ARM Limited + * Copyright (c) 2016-2018,2019 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -78,16 +78,23 @@ SimpleRenameMap::rename(const RegId& arch_reg) // requested architected register. PhysRegIdPtr prev_reg = map[arch_reg.flatIndex()]; - // If it's not referencing the zero register, then rename the - // register. - if (arch_reg != zeroReg) { - renamed_reg = freeList->getReg(); - - map[arch_reg.flatIndex()] = renamed_reg; - } else { - // Otherwise return the zero register so nothing bad happens. + if (arch_reg == zeroReg) { assert(prev_reg->isZeroReg()); renamed_reg = prev_reg; + } else if (prev_reg->getNumPinnedWrites() > 0) { + // Do not rename if the register is pinned + assert(arch_reg.getNumPinnedWrites() == 0); // Prevent pinning the + // same register twice + DPRINTF(Rename, "Renaming pinned reg, numPinnedWrites %d\n", + prev_reg->getNumPinnedWrites()); + renamed_reg = prev_reg; + renamed_reg->decrNumPinnedWrites(); + } else { + renamed_reg = freeList->getReg(); + map[arch_reg.flatIndex()] = renamed_reg; + renamed_reg->setNumPinnedWrites(arch_reg.getNumPinnedWrites()); + renamed_reg->setNumPinnedWritesToComplete( + arch_reg.getNumPinnedWrites() + 1); } DPRINTF(Rename, "Renamed reg %d to physical reg %d (%d) old mapping was" diff --git a/src/cpu/reg_class.hh b/src/cpu/reg_class.hh index 63a6c86a3..bd49d15b0 100644 --- a/src/cpu/reg_class.hh +++ b/src/cpu/reg_class.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2017 ARM Limited + * Copyright (c) 2016-2018 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -77,24 +77,29 @@ const int NumRegClasses = MiscRegClass + 1; * index 3 is represented by Regid(IntRegClass, 3). */ class RegId { - private: + protected: static const char* regClassStrings[]; RegClass regClass; RegIndex regIdx; ElemIndex elemIdx; static constexpr size_t Scale = TheISA::NumVecElemPerVecReg; + int numPinnedWrites; + friend struct std::hash; + public: RegId() : regClass(IntRegClass), regIdx(0), elemIdx(-1) {} RegId(RegClass reg_class, RegIndex reg_idx) - : regClass(reg_class), regIdx(reg_idx), elemIdx(-1) + : regClass(reg_class), regIdx(reg_idx), elemIdx(-1), + numPinnedWrites(0) { panic_if(regClass == VecElemClass, "Creating vector physical index w/o element index"); } explicit RegId(RegClass reg_class, RegIndex reg_idx, ElemIndex elem_idx) - : regClass(reg_class), regIdx(reg_idx), elemIdx(elem_idx) + : regClass(reg_class), regIdx(reg_idx), elemIdx(elem_idx), + numPinnedWrites(0) { panic_if(regClass != VecElemClass, "Creating non-vector physical index w/ element index"); @@ -202,6 +207,9 @@ class RegId { /** Return a const char* with the register class name. */ const char* className() const { return regClassStrings[regClass]; } + int getNumPinnedWrites() const { return numPinnedWrites; } + void setNumPinnedWrites(int num_writes) { numPinnedWrites = num_writes; } + friend std::ostream& operator<<(std::ostream& os, const RegId& rid) { return os << rid.className() << "{" << rid.index() << "}"; @@ -221,20 +229,27 @@ using PhysRegIndex = short int; class PhysRegId : private RegId { private: PhysRegIndex flatIdx; + int numPinnedWritesToComplete; + bool pinned; public: - explicit PhysRegId() : RegId(IntRegClass, -1), flatIdx(-1) {} + explicit PhysRegId() : RegId(IntRegClass, -1), flatIdx(-1), + numPinnedWritesToComplete(0) + {} /** Scalar PhysRegId constructor. */ explicit PhysRegId(RegClass _regClass, PhysRegIndex _regIdx, PhysRegIndex _flatIdx) - : RegId(_regClass, _regIdx), flatIdx(_flatIdx) + : RegId(_regClass, _regIdx), flatIdx(_flatIdx), + numPinnedWritesToComplete(0), pinned(false) {} /** Vector PhysRegId constructor (w/ elemIndex). */ explicit PhysRegId(RegClass _regClass, PhysRegIndex _regIdx, ElemIndex elem_idx, PhysRegIndex flat_idx) - : RegId(_regClass, _regIdx, elem_idx), flatIdx(flat_idx) { } + : RegId(_regClass, _regIdx, elem_idx), flatIdx(flat_idx), + numPinnedWritesToComplete(0), pinned(false) + {} /** Visible RegId methods */ /** @{ */ @@ -295,17 +310,46 @@ class PhysRegId : private RegId { /** Flat index accessor */ const PhysRegIndex& flatIndex() const { return flatIdx; } - static PhysRegId elemId(const PhysRegId* vid, ElemIndex elem) + static PhysRegId elemId(PhysRegId* vid, ElemIndex elem) { assert(vid->isVectorPhysReg()); return PhysRegId(VecElemClass, vid->index(), elem); } + + int getNumPinnedWrites() const { return numPinnedWrites; } + + void setNumPinnedWrites(int numWrites) + { + // An instruction with a pinned destination reg can get + // squashed. The numPinnedWrites counter may be zero when + // the squash happens but we need to know if the dest reg + // was pinned originally in order to reset counters properly + // for a possible re-rename using the same physical reg (which + // may be required in case of a mem access order violation). + pinned = (numWrites != 0); + numPinnedWrites = numWrites; + } + + void decrNumPinnedWrites() { --numPinnedWrites; } + void incrNumPinnedWrites() { ++numPinnedWrites; } + + bool isPinned() const { return pinned; } + + int getNumPinnedWritesToComplete() const + { + return numPinnedWritesToComplete; + } + + void setNumPinnedWritesToComplete(int numWrites) + { + numPinnedWritesToComplete = numWrites; + } + + void decrNumPinnedWritesToComplete() { --numPinnedWritesToComplete; } + void incrNumPinnedWritesToComplete() { ++numPinnedWritesToComplete; } }; -/** Constant pointer definition. - * PhysRegIds only need to be created once and then we can just share - * pointers */ -using PhysRegIdPtr = const PhysRegId*; +using PhysRegIdPtr = PhysRegId*; namespace std {