#include <deque>
+#include "base/timebuf.hh"
#include "cpu/inst_seq.hh"
#include "cpu/o3/bpred_unit.hh"
#include "cpu/ozone/rename_table.hh"
void dumpInsts();
private:
+ TimeBuffer<int> numInstsReady;
+
typedef typename std::deque<DynInstPtr> InstBuff;
typedef typename InstBuff::iterator InstBuffIt;
+ InstBuff feBuffer;
+
InstBuff instBuffer;
int instBufferSize;
int maxInstBufferSize;
+ int latency;
+
int width;
int freeRegs;
FrontEnd<Impl>::FrontEnd(Params *params)
: branchPred(params),
icacheInterface(params->icacheInterface),
+ numInstsReady(params->frontEndLatency, 0),
instBufferSize(0),
maxInstBufferSize(params->maxInstBufferSize),
+ latency(params->frontEndLatency),
width(params->frontEndWidth),
freeRegs(params->numPhysicalRegs),
numPhysRegs(params->numPhysicalRegs),
if (switchedOut)
return;
+ for (int insts_to_queue = numInstsReady[-latency];
+ !instBuffer.empty() && insts_to_queue;
+ --insts_to_queue)
+ {
+ DPRINTF(FE, "Transferring instruction [sn:%lli] to the feBuffer\n",
+ instBuffer.front()->seqNum);
+ feBuffer.push_back(instBuffer.front());
+ instBuffer.pop_front();
+ }
+
+ numInstsReady.advance();
+
// @todo: Maybe I want to just have direct communication...
if (fromCommit->doneSeqNum) {
branchPred.update(fromCommit->doneSeqNum, 0);
// latency
instBuffer.push_back(inst);
++instBufferSize;
+ numInstsReady[0]++;
++num_inst;
#if FULL_SYSTEM
instruction->fault = fault;
instruction->setCanIssue();
instBuffer.push_back(instruction);
+ numInstsReady[0]++;
++instBufferSize;
}
freeRegs+= inst->numDestRegs();
}
+ while (!feBuffer.empty() &&
+ feBuffer.back()->seqNum > squash_num) {
+ DynInstPtr inst = feBuffer.back();
+
+ DPRINTF(FE, "Squashing instruction [sn:%lli] PC %#x\n",
+ inst->seqNum, inst->readPC());
+
+ inst->clearDependents();
+
+ feBuffer.pop_back();
+ --instBufferSize;
+
+ freeRegs+= inst->numDestRegs();
+ }
+
// Copy over rename table from the back end.
renameTable.copyFrom(backEnd->renameTable);
typename Impl::DynInstPtr
FrontEnd<Impl>::getInst()
{
- if (instBufferSize == 0) {
+ if (feBuffer.empty()) {
return NULL;
}
- DynInstPtr inst = instBuffer.front();
+ DynInstPtr inst = feBuffer.front();
- instBuffer.pop_front();
+ feBuffer.pop_front();
--instBufferSize;
squash(0, 0);
instBuffer.clear();
instBufferSize = 0;
+ feBuffer.clear();
status = Idle;
}
TimeBuffer<IssueToExec> i2e;
typename TimeBuffer<IssueToExec>::wire instsToExecute;
TimeBuffer<ExecToCommit> e2c;
- TimeBuffer<Writeback> numInstsToWB;
+ TimeBuffer<int> numInstsToWB;
TimeBuffer<CommStruct> *comm;
typename TimeBuffer<CommStruct>::wire toIEW;
Tick lastCommitCycle;
- bool robEmpty() { return instList.empty(); }
+ bool robEmpty() { return numInsts == 0; }
bool isFull() { return numInsts >= numROBEntries; }
bool isBlocked() { return status == Blocked || dispatchStatus == Blocked; }
}
void instToCommit(DynInstPtr &inst);
+ void readyInstsForCommit();
void switchOut();
void doSwitchOut();
MemReqPtr memReq;
+ int latency;
+
// General back end width. Used if the more specific isn't given.
int width;
// Dispatch width.
int dispatchWidth;
- int numDispatchEntries;
int dispatchSize;
int waitingInsts;
int numROBEntries;
int numInsts;
+ bool lsqLimits;
std::set<InstSeqNum> waitingMemOps;
typedef std::set<InstSeqNum>::iterator MemIt;
InstSeqNum squashSeqNum;
Addr squashNextPC;
- Fault faultFromFetch;
- bool fetchHasFault;
-
bool switchedOut;
bool switchPending;
std::list<DynInstPtr> replayList;
std::list<DynInstPtr> writeback;
- int latency;
-
int squashLatency;
bool exactFullStall;
Stats::Scalar<> lsqInversion;
Stats::Vector<> nIssuedDist;
+/*
Stats::VectorDistribution<> issueDelayDist;
Stats::VectorDistribution<> queueResDist;
+*/
/*
Stats::Vector<> stat_fu_busy;
Stats::Vector2d<> stat_fuBusy;
Stats::Vector<> ROBCount; // cumulative ROB occupancy
Stats::Formula ROBOccRate;
- Stats::VectorDistribution<> ROBOccDist;
+// Stats::VectorDistribution<> ROBOccDist;
public:
void dumpInsts();
// iewStage->wakeCPU();
- if (be->isSwitchedOut())
- return;
+ assert(inst->isSquashed() || !be->isSwitchedOut());
+
+// if (be->isSwitchedOut() && inst->isLoad())
+// return;
if (dcacheMiss) {
be->removeDcacheMiss(inst);
template <class Impl>
LWBackEnd<Impl>::LWBackEnd(Params *params)
- : d2i(5, 5), i2e(5, 5), e2c(5, 5), numInstsToWB(5, 5),
+ : d2i(5, 5), i2e(5, 5), e2c(5, 5), numInstsToWB(params->backEndLatency, 0),
trapSquash(false), xcSquash(false), cacheCompletionEvent(this),
- dcacheInterface(params->dcacheInterface), width(params->backEndWidth),
+ dcacheInterface(params->dcacheInterface), latency(params->backEndLatency),
+ width(params->backEndWidth), lsqLimits(params->lsqLimits),
exactFullStall(true)
{
numROBEntries = params->numROBEntries;
numInsts = 0;
- numDispatchEntries = 32;
maxOutstandingMemOps = params->maxOutstandingMemOps;
numWaitingMemOps = 0;
waitingInsts = 0;
LWBackEnd<Impl>::regStats()
{
using namespace Stats;
+ LSQ.regStats();
+
robCapEvents
.init(cpu->number_of_threads)
.name(name() + ".ROB:cap_events")
.desc("Number of insts issued each cycle")
.flags(total | pdf | dist)
;
+/*
issueDelayDist
.init(Num_OpClasses,0,99,2)
.name(name() + ".ISSUE:")
for (int i = 0; i < Num_OpClasses; ++i) {
queueResDist.subname(i, opClassStrings[i]);
}
-
+*/
writebackCount
.init(cpu->number_of_threads)
.name(name() + ".WB:count")
.flags(total)
;
ROBOccRate = ROBCount / cpu->numCycles;
-
+/*
ROBOccDist
.init(cpu->number_of_threads,0,numROBEntries,2)
.name(name() + ".ROB:occ_dist")
.desc("ROB Occupancy per cycle")
.flags(total | cdf)
;
+*/
}
template <class Impl>
{
DPRINTF(BE, "Ticking back end\n");
+ // Read in any done instruction information and update the IQ or LSQ.
+ updateStructures();
+
if (switchPending && robEmpty() && !LSQ.hasStoresToWB()) {
cpu->signalSwitched();
return;
}
+ readyInstsForCommit();
+
+ numInstsToWB.advance();
+
ROBCount[0]+= numInsts;
wbCycle = 0;
- // Read in any done instruction information and update the IQ or LSQ.
- updateStructures();
-
#if FULL_SYSTEM
checkInterrupts();
while (numInsts < numROBEntries &&
numWaitingMemOps < maxOutstandingMemOps) {
// Get instruction from front of time buffer
+ if (lsqLimits && LSQ.isFull()) {
+ break;
+ }
+
DynInstPtr inst = frontEnd->getInst();
if (!inst) {
break;
inst->setIssued();
inst->setExecuted();
inst->setCanCommit();
+ numInstsToWB[0]++;
} else {
DPRINTF(BE, "Instruction [sn:%lli] ready, addding to "
"exeList.\n",
void
LWBackEnd<Impl>::instToCommit(DynInstPtr &inst)
{
-
DPRINTF(BE, "Sending instructions to commit [sn:%lli] PC %#x.\n",
inst->seqNum, inst->readPC());
if (!inst->isSquashed()) {
- DPRINTF(BE, "Writing back instruction [sn:%lli] PC %#x.\n",
- inst->seqNum, inst->readPC());
-
- inst->setCanCommit();
-
if (inst->isExecuted()) {
inst->setResultReady();
int dependents = wakeDependents(inst);
}
}
+ writeback.push_back(inst);
+
+ numInstsToWB[0]++;
+
writebackCount[0]++;
}
+
+template <class Impl>
+void
+LWBackEnd<Impl>::readyInstsForCommit()
+{
+ for (int i = numInstsToWB[-latency];
+ !writeback.empty() && i;
+ --i)
+ {
+ DynInstPtr inst = writeback.front();
+ writeback.pop_front();
+ if (!inst->isSquashed()) {
+ DPRINTF(BE, "Writing back instruction [sn:%lli] PC %#x.\n",
+ inst->seqNum, inst->readPC());
+
+ inst->setCanCommit();
+ }
+ }
+}
+
#if 0
template <class Impl>
void
++freed_regs;
}
+#if FULL_SYSTEM
+ if (thread->profile) {
+// bool usermode =
+// (xc->readMiscReg(AlphaISA::IPR_DTB_CM) & 0x18) != 0;
+// thread->profilePC = usermode ? 1 : inst->readPC();
+ thread->profilePC = inst->readPC();
+ ProfileNode *node = thread->profile->consume(thread->getXCProxy(),
+ inst->staticInst);
+
+ if (node)
+ thread->profileNode = node;
+ }
+#endif
+
if (inst->traceData) {
inst->traceData->setFetchSeq(inst->seqNum);
inst->traceData->setCPSeq(thread->numInst);
while (!instList.empty() && inst_num < commitWidth) {
if (instList.back()->isSquashed()) {
instList.back()->clearDependents();
+ ROBSquashedInsts[instList.back()->threadNumber]++;
instList.pop_back();
--numInsts;
- ROBSquashedInsts[instList.back()->threadNumber]++;
continue;
}
LSQ.squash(sn);
int freed_regs = 0;
- InstListIt waiting_list_end = waitingList.end();
+ InstListIt insts_end_it = waitingList.end();
InstListIt insts_it = waitingList.begin();
- while (insts_it != waiting_list_end && (*insts_it)->seqNum > sn)
+ while (insts_it != insts_end_it && (*insts_it)->seqNum > sn)
{
if ((*insts_it)->isSquashed()) {
++insts_it;
while (!instList.empty() && (*insts_it)->seqNum > sn)
{
if ((*insts_it)->isSquashed()) {
+ panic("Instruction should not be already squashed and on list!");
++insts_it;
continue;
}
--numInsts;
}
- insts_it = waitingList.begin();
- while (!waitingList.empty() && insts_it != waitingList.end()) {
- if ((*insts_it)->seqNum < sn) {
- ++insts_it;
- continue;
- }
- assert((*insts_it)->isSquashed());
-
- waitingList.erase(insts_it++);
- waitingInsts--;
- }
-
while (memBarrier && memBarrier->seqNum > sn) {
DPRINTF(BE, "[sn:%lli] Memory barrier squashed (or previously "
"squashed)\n", memBarrier->seqNum);
}
}
+ insts_it = replayList.begin();
+ insts_end_it = replayList.end();
+ while (!replayList.empty() && insts_it != insts_end_it) {
+ if ((*insts_it)->seqNum < sn) {
+ ++insts_it;
+ continue;
+ }
+ assert((*insts_it)->isSquashed());
+
+ replayList.erase(insts_it++);
+ }
+
frontEnd->addFreeRegs(freed_regs);
}
frontEnd->squash(inst->seqNum - 1, inst->readPC());
}
-template <class Impl>
-void
-LWBackEnd<Impl>::fetchFault(Fault &fault)
-{
- faultFromFetch = fault;
- fetchHasFault = true;
-}
-
template <class Impl>
void
LWBackEnd<Impl>::switchOut()
// yet written back.
assert(robEmpty());
assert(!LSQ.hasStoresToWB());
-
+ writeback.clear();
+ for (int i = 0; i < numInstsToWB.getSize() + 1; ++i)
+ numInstsToWB.advance();
+
+// squash(0);
+ assert(waitingList.empty());
+ assert(instList.empty());
+ assert(replayList.empty());
+ assert(writeback.empty());
LSQ.switchOut();
-
- squash(0);
}
template <class Impl>
void
LWBackEnd<Impl>::takeOverFrom(ExecContext *old_xc)
{
+ assert(!squashPending);
+ squashSeqNum = 0;
+ squashNextPC = 0;
xcSquash = false;
trapSquash = false;
++num;
}
+ inst_list_it = --(writeback.end());
+
+ cprintf("Writeback list size: %i\n", writeback.size());
+
+ while (inst_list_it != writeback.end())
+ {
+ cprintf("Instruction:%i\n",
+ num);
+ if (!(*inst_list_it)->isSquashed()) {
+ if (!(*inst_list_it)->isIssued()) {
+ ++valid_num;
+ cprintf("Count:%i\n", valid_num);
+ } else if ((*inst_list_it)->isMemRef() &&
+ !(*inst_list_it)->memOpDone) {
+ // Loads that have not been marked as executed still count
+ // towards the total instructions.
+ ++valid_num;
+ cprintf("Count:%i\n", valid_num);
+ }
+ }
+
+ cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
+ "Issued:%i\nSquashed:%i\n",
+ (*inst_list_it)->readPC(),
+ (*inst_list_it)->seqNum,
+ (*inst_list_it)->threadNumber,
+ (*inst_list_it)->isIssued(),
+ (*inst_list_it)->isSquashed());
+
+ if ((*inst_list_it)->isMemRef()) {
+ cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
+ }
+
+ cprintf("\n");
+
+ inst_list_it--;
+ ++num;
+ }
+
cprintf("Waiting list size: %i\n", waitingList.size());
inst_list_it = --(waitingList.end());
/** Returns the name of the LSQ unit. */
std::string name() const;
+ void regStats();
+
/** Sets the CPU pointer. */
void setCPU(FullCPU *cpu_ptr)
{ cpu = cpu_ptr; }
int numLoads() { return loads; }
/** Returns the number of stores in the SQ. */
- int numStores() { return stores; }
+ int numStores() { return stores + storesInFlight; }
/** Returns if either the LQ or SQ is full. */
bool isFull() { return lqFull() || sqFull(); }
bool lqFull() { return loads >= (LQEntries - 1); }
/** Returns if the SQ is full. */
- bool sqFull() { return stores >= (SQEntries - 1); }
+ bool sqFull() { return (stores + storesInFlight) >= (SQEntries - 1); }
/** Debugging function to dump instructions in the LSQ. */
void dumpInsts();
private:
/** Completes the store at the specified index. */
- void completeStore(int store_idx);
+ void completeStore(DynInstPtr &inst);
+
+ void removeStore(int store_idx);
private:
/** Pointer to the CPU. */
int storesToWB;
+ public:
+ int storesInFlight;
+
+ private:
/// @todo Consider moving to a more advanced model with write vs read ports
/** The number of cache ports available each cycle. */
int cachePorts;
//list<InstSeqNum> mshrSeqNums;
+ /** Tota number of memory ordering violations. */
+ Stats::Scalar<> lsqMemOrderViolation;
+
//Stats::Scalar<> dcacheStallCycles;
Counter lastDcacheStall;
// lsqPtr->cpu->wakeCPU();
if (lsqPtr->isSwitchedOut()) {
+ panic("Should not be switched out!");
if (wbEvent)
delete wbEvent;
delete wbEvent;
}
- lsqPtr->completeStore(inst->sqIdx);
+ lsqPtr->completeStore(inst);
+ lsqPtr->removeStore(inst->sqIdx);
+ --(lsqPtr->storesInFlight);
+
+ DPRINTF(OzoneLSQ, "StoresInFlight: %i\n", lsqPtr->storesInFlight);
if (miss)
be->removeDcacheMiss(inst);
}
template <class Impl>
OzoneLWLSQ<Impl>::OzoneLWLSQ()
- : loads(0), stores(0), storesToWB(0), stalled(false), isLoadBlocked(false),
+ : loads(0), stores(0), storesToWB(0), storesInFlight(0), stalled(false), isLoadBlocked(false),
loadBlockedHandled(false)
{
}
return "lsqunit";
}
+template<class Impl>
+void
+OzoneLWLSQ<Impl>::regStats()
+{
+ lsqMemOrderViolation
+ .name(name() + ".memOrderViolation")
+ .desc("Number of memory ordering violations");
+}
+
template<class Impl>
void
OzoneLWLSQ<Impl>::clearLQ()
OzoneLWLSQ<Impl>::numFreeEntries()
{
unsigned free_lq_entries = LQEntries - loads;
- unsigned free_sq_entries = SQEntries - stores;
+ unsigned free_sq_entries = SQEntries - (stores + storesInFlight);
// Both the LQ and SQ entries have an extra dummy entry to differentiate
// empty/full conditions. Subtract 1 from the free entries.
// A load incorrectly passed this store. Squash and refetch.
// For now return a fault to show that it was unsuccessful.
memDepViolator = (*lq_it);
+ ++lsqMemOrderViolation;
return TheISA::genMachineCheckFault();
}
if ((*sq_it).size == 0 && !(*sq_it).completed) {
sq_it--;
- completeStore(inst->sqIdx);
-
+ removeStore(inst->sqIdx);
+ completeStore(inst);
continue;
}
inst->sqIdx,inst->readPC(),
req->paddr, *(req->data),
inst->seqNum);
+ DPRINTF(OzoneLSQ, "StoresInFlight: %i\n",
+ storesInFlight + 1);
if (dcacheInterface) {
assert(!req->completionEvent);
}
sq_it--;
}
+ ++storesInFlight;
+// removeStore(inst->sqIdx);
} else {
panic("Must HAVE DCACHE!!!!!\n");
}
OzoneLWLSQ<Impl>::squash(const InstSeqNum &squashed_num)
{
DPRINTF(OzoneLSQ, "Squashing until [sn:%lli]!"
- "(Loads:%i Stores:%i)\n",squashed_num,loads,stores);
+ "(Loads:%i Stores:%i)\n",squashed_num,loads,stores+storesInFlight);
LQIt lq_it = loadQueue.begin();
template <class Impl>
void
-OzoneLWLSQ<Impl>::completeStore(int store_idx)
+OzoneLWLSQ<Impl>::removeStore(int store_idx)
{
SQHashIt sq_hash_it = SQItHash.find(store_idx);
assert(sq_hash_it != SQItHash.end());
(*sq_it).completed = true;
DynInstPtr inst = (*sq_it).inst;
- --storesToWB;
-
if (isStalled() &&
inst->seqNum == stallingStoreIsn) {
DPRINTF(OzoneLSQ, "Unstalling, stalling store [sn:%lli] "
SQItHash.erase(sq_hash_it);
SQIndices.push(inst->sqIdx);
storeQueue.erase(sq_it);
+}
+
+template <class Impl>
+void
+OzoneLWLSQ<Impl>::completeStore(DynInstPtr &inst)
+{
+ --storesToWB;
--stores;
inst->setCompleted();
}
// Clear the queue to free up resources
+ assert(stores == 0);
+ assert(storeQueue.empty());
+ assert(loads == 0);
+ assert(loadQueue.empty());
+ assert(storesInFlight == 0);
storeQueue.clear();
loadQueue.clear();
- loads = stores = storesToWB = 0;
+ loads = stores = storesToWB = storesInFlight = 0;
}
template <class Impl>
unsigned cachePorts;
unsigned width;
+ unsigned frontEndLatency;
unsigned frontEndWidth;
+ unsigned backEndLatency;
unsigned backEndWidth;
unsigned backEndSquashLatency;
- unsigned backEndLatency;
unsigned maxInstBufferSize;
unsigned numPhysicalRegs;
unsigned maxOutstandingMemOps;
//
unsigned LQEntries;
unsigned SQEntries;
+ bool lsqLimits;
//
// Memory dependence