--- /dev/null
+
+#ifndef __CPU_O3_DEP_GRAPH_HH__
+#define __CPU_O3_DEP_GRAPH_HH__
+
+#include "cpu/o3/comm.hh"
+
+template <class DynInstPtr>
+class DependencyEntry
+{
+ public:
+ DependencyEntry()
+ : inst(NULL), next(NULL)
+ { }
+
+ DynInstPtr inst;
+ //Might want to include data about what arch. register the
+ //dependence is waiting on.
+ DependencyEntry<DynInstPtr> *next;
+};
+
+template <class DynInstPtr>
+class DependencyGraph
+{
+ public:
+ typedef DependencyEntry<DynInstPtr> DepEntry;
+
+ DependencyGraph()
+ : numEntries(0), memAllocCounter(0), nodesTraversed(0), nodesRemoved(0)
+ { }
+
+ void resize(int num_entries);
+
+ void reset();
+
+ void insert(PhysRegIndex idx, DynInstPtr &new_inst);
+
+ void setInst(PhysRegIndex idx, DynInstPtr &new_inst)
+ { dependGraph[idx].inst = new_inst; }
+
+ void clearInst(PhysRegIndex idx)
+ { dependGraph[idx].inst = NULL; }
+
+ void remove(PhysRegIndex idx, DynInstPtr &inst_to_remove);
+
+ DynInstPtr pop(PhysRegIndex idx);
+
+ bool empty(PhysRegIndex idx) { return !dependGraph[idx].next; }
+
+ /** Debugging function to dump out the dependency graph.
+ */
+ void dump();
+
+ private:
+ /** Array of linked lists. Each linked list is a list of all the
+ * instructions that depend upon a given register. The actual
+ * register's index is used to index into the graph; ie all
+ * instructions in flight that are dependent upon r34 will be
+ * in the linked list of dependGraph[34].
+ */
+ DepEntry *dependGraph;
+
+ int numEntries;
+
+ // Debug variable, remove when done testing.
+ unsigned memAllocCounter;
+
+ public:
+ uint64_t nodesTraversed;
+ uint64_t nodesRemoved;
+};
+
+template <class DynInstPtr>
+void
+DependencyGraph<DynInstPtr>::resize(int num_entries)
+{
+ numEntries = num_entries;
+ dependGraph = new DepEntry[numEntries];
+}
+
+template <class DynInstPtr>
+void
+DependencyGraph<DynInstPtr>::reset()
+{
+ // Clear the dependency graph
+ DepEntry *curr;
+ DepEntry *prev;
+
+ for (int i = 0; i < numEntries; ++i) {
+ curr = dependGraph[i].next;
+
+ while (curr) {
+ memAllocCounter--;
+
+ prev = curr;
+ curr = prev->next;
+ prev->inst = NULL;
+
+ delete prev;
+ }
+
+ if (dependGraph[i].inst) {
+ dependGraph[i].inst = NULL;
+ }
+
+ dependGraph[i].next = NULL;
+ }
+}
+
+template <class DynInstPtr>
+void
+DependencyGraph<DynInstPtr>::insert(PhysRegIndex idx, DynInstPtr &new_inst)
+{
+ //Add this new, dependent instruction at the head of the dependency
+ //chain.
+
+ // First create the entry that will be added to the head of the
+ // dependency chain.
+ DepEntry *new_entry = new DepEntry;
+ new_entry->next = dependGraph[idx].next;
+ new_entry->inst = new_inst;
+
+ // Then actually add it to the chain.
+ dependGraph[idx].next = new_entry;
+
+ ++memAllocCounter;
+}
+
+
+template <class DynInstPtr>
+void
+DependencyGraph<DynInstPtr>::remove(PhysRegIndex idx,
+ DynInstPtr &inst_to_remove)
+{
+ DepEntry *prev = &dependGraph[idx];
+ DepEntry *curr = dependGraph[idx].next;
+
+ // Make sure curr isn't NULL. Because this instruction is being
+ // removed from a dependency list, it must have been placed there at
+ // an earlier time. The dependency chain should not be empty,
+ // unless the instruction dependent upon it is already ready.
+ if (curr == NULL) {
+ return;
+ }
+
+ nodesRemoved++;
+
+ // Find the instruction to remove within the dependency linked list.
+ while (curr->inst != inst_to_remove) {
+ prev = curr;
+ curr = curr->next;
+ nodesTraversed++;
+
+ assert(curr != NULL);
+ }
+
+ // Now remove this instruction from the list.
+ prev->next = curr->next;
+
+ --memAllocCounter;
+
+ // Could push this off to the destructor of DependencyEntry
+ curr->inst = NULL;
+
+ delete curr;
+}
+
+template <class DynInstPtr>
+DynInstPtr
+DependencyGraph<DynInstPtr>::pop(PhysRegIndex idx)
+{
+ DepEntry *node;
+ node = dependGraph[idx].next;
+ DynInstPtr inst = NULL;
+ if (node) {
+ inst = node->inst;
+ dependGraph[idx].next = node->next;
+ node->inst = NULL;
+ memAllocCounter--;
+ delete node;
+ }
+ return inst;
+}
+
+template <class DynInstPtr>
+void
+DependencyGraph<DynInstPtr>::dump()
+{
+ DepEntry *curr;
+
+ for (int i = 0; i < numEntries; ++i)
+ {
+ curr = &dependGraph[i];
+
+ if (curr->inst) {
+ cprintf("dependGraph[%i]: producer: %#x [sn:%lli] consumer: ",
+ i, curr->inst->readPC(), curr->inst->seqNum);
+ } else {
+ cprintf("dependGraph[%i]: No producer. consumer: ", i);
+ }
+
+ while (curr->next != NULL) {
+ curr = curr->next;
+
+ cprintf("%#x [sn:%lli] ",
+ curr->inst->readPC(), curr->inst->seqNum);
+ }
+
+ cprintf("\n");
+ }
+ cprintf("memAllocCounter: %i\n", memAllocCounter);
+}
+
+#endif // __CPU_O3_DEP_GRAPH_HH__
/*
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * Copyright (c) 2004-2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
class FUPool;
/**
- * DefaultIEW handles both single threaded and SMT IEW(issue/execute/writeback).
- * It handles the dispatching of instructions to the LSQ/IQ as part of the issue
- * stage, and has the IQ try to issue instructions each cycle. The execute
- * latency is actually tied into the issue latency to allow the IQ to be able to
+ * DefaultIEW handles both single threaded and SMT IEW
+ * (issue/execute/writeback). It handles the dispatching of
+ * instructions to the LSQ/IQ as part of the issue stage, and has the
+ * IQ try to issue instructions each cycle. The execute latency is
+ * actually tied into the issue latency to allow the IQ to be able to
* do back-to-back scheduling without having to speculatively schedule
- * instructions. This happens by having the IQ have access to the functional
- * units, and the IQ gets the execution latencies from the FUs when it issues
- * instructions. Instructions reach the execute stage on the last cycle of
- * their execution, which is when the IQ knows to wake up any dependent
- * instructions, allowing back to back scheduling. The execute portion of IEW
- * separates memory instructions from non-memory instructions, either telling
- * the LSQ to execute the instruction, or executing the instruction directly.
- * The writeback portion of IEW completes the instructions by waking up any
- * dependents, and marking the register ready on the scoreboard.
+ * instructions. This happens by having the IQ have access to the
+ * functional units, and the IQ gets the execution latencies from the
+ * FUs when it issues instructions. Instructions reach the execute
+ * stage on the last cycle of their execution, which is when the IQ
+ * knows to wake up any dependent instructions, allowing back to back
+ * scheduling. The execute portion of IEW separates memory
+ * instructions from non-memory instructions, either telling the LSQ
+ * to execute the instruction, or executing the instruction directly.
+ * The writeback portion of IEW completes the instructions by waking
+ * up any dependents, and marking the register ready on the
+ * scoreboard.
*/
template<class Impl>
class DefaultIEW
/** Tells CPU that the IEW stage is inactive and idle. */
inline void deactivateStage();
-//#if !FULL_SYSTEM
/** Returns if the LSQ has any stores to writeback. */
bool hasStoresToWB() { return ldstQueue.hasStoresToWB(); }
-//#endif
private:
/** Sends commit proper information for a squash due to a branch
/** Stat for total number of mispredicted branches detected at execute. */
Stats::Formula branchMispredicts;
- Stats::Vector<> exe_swp;
- Stats::Vector<> exe_nop;
- Stats::Vector<> exe_refs;
- Stats::Vector<> exe_branches;
+ Stats::Vector<> exeSwp;
+ Stats::Vector<> exeNop;
+ Stats::Vector<> exeRefs;
+ Stats::Vector<> exeBranches;
// Stats::Vector<> issued_ops;
/*
Stats::Vector<> dist_unissued;
Stats::Vector2d<> stat_issued_inst_type;
*/
- Stats::Formula issue_rate;
+ Stats::Formula issueRate;
Stats::Formula iewExecStoreInsts;
// Stats::Formula issue_op_rate;
// Stats::Formula fu_busy_rate;
Stats::Vector<> iewInstsToCommit;
- Stats::Vector<> writeback_count;
- Stats::Vector<> producer_inst;
- Stats::Vector<> consumer_inst;
- Stats::Vector<> wb_penalized;
-
- Stats::Formula wb_rate;
- Stats::Formula wb_fanout;
- Stats::Formula wb_penalized_rate;
+ Stats::Vector<> writebackCount;
+ Stats::Vector<> producerInst;
+ Stats::Vector<> consumerInst;
+ Stats::Vector<> wbPenalized;
+
+ Stats::Formula wbRate;
+ Stats::Formula wbFanout;
+ Stats::Formula wbPenalizedRate;
};
#endif // __CPU_O3_IEW_HH__
/*
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * Copyright (c) 2004-2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
if (!inst->isExecuted()) {
inst->setExecuted();
- // Execute again to copy data to proper place.
+ // Complete access to copy data to proper place.
if (inst->isStore()) {
inst->completeAcc();
}
// Need to insert instruction into queue to commit
iewStage->instToCommit(inst);
- //wroteToTimeBuffer = true;
iewStage->activityThisCycle();
inst = NULL;
template<class Impl>
DefaultIEW<Impl>::DefaultIEW(Params *params)
- : // Just make this time buffer really big for now
- // @todo: Make this into a parameter.
+ : // @todo: Make this into a parameter.
issueToExecQueue(5, 5),
instQueue(params),
ldstQueue(params),
numThreads(params->numberOfThreads),
switchedOut(false)
{
- DPRINTF(IEW, "executeIntWidth: %i.\n", params->executeIntWidth);
_status = Active;
exeStatus = Running;
wbStatus = Idle;
updateLSQNextCycle = false;
- // @todo: Make into a parameter
skidBufferMax = (3 * (renameToIEWDelay * params->renameWidth)) + issueWidth;
}
instQueue.regStats();
- //ldstQueue.regStats();
-
iewIdleCycles
.name(name() + ".iewIdleCycles")
.desc("Number of cycles IEW is idle");
.name(name() + ".iewUnblockCycles")
.desc("Number of cycles IEW is unblocking");
-// iewWBInsts;
-
iewDispatchedInsts
.name(name() + ".iewDispatchedInsts")
.desc("Number of instructions dispatched to IQ");
.name(name() + ".iewExecLoadInsts")
.desc("Number of load instructions executed")
.flags(total);
-/*
- iewExecStoreInsts
- .name(name() + ".iewExecStoreInsts")
- .desc("Number of store instructions executed");
-*/
+
iewExecSquashedInsts
.name(name() + ".iewExecSquashedInsts")
.desc("Number of squashed instructions skipped in execute");
branchMispredicts = predictedTakenIncorrect + predictedNotTakenIncorrect;
- exe_swp
+ exeSwp
.init(cpu->number_of_threads)
.name(name() + ".EXEC:swp")
.desc("number of swp insts executed")
.flags(total)
;
- exe_nop
+ exeNop
.init(cpu->number_of_threads)
.name(name() + ".EXEC:nop")
.desc("number of nop insts executed")
.flags(total)
;
- exe_refs
+ exeRefs
.init(cpu->number_of_threads)
.name(name() + ".EXEC:refs")
.desc("number of memory reference insts executed")
.flags(total)
;
- exe_branches
+ exeBranches
.init(cpu->number_of_threads)
.name(name() + ".EXEC:branches")
.desc("Number of branches executed")
.flags(total)
;
- issue_rate
+ issueRate
.name(name() + ".EXEC:rate")
.desc("Inst execution rate")
.flags(total)
;
- issue_rate = iewExecutedInsts / cpu->numCycles;
+ issueRate = iewExecutedInsts / cpu->numCycles;
iewExecStoreInsts
.name(name() + ".EXEC:stores")
.desc("Number of stores executed")
.flags(total)
;
- iewExecStoreInsts = exe_refs - iewExecLoadInsts;
+ iewExecStoreInsts = exeRefs - iewExecLoadInsts;
/*
for (int i=0; i<Num_OpClasses; ++i) {
stringstream subname;
.flags(total)
;
- writeback_count
+ writebackCount
.init(cpu->number_of_threads)
.name(name() + ".WB:count")
.desc("cumulative count of insts written-back")
.flags(total)
;
- producer_inst
+ producerInst
.init(cpu->number_of_threads)
.name(name() + ".WB:producers")
.desc("num instructions producing a value")
.flags(total)
;
- consumer_inst
+ consumerInst
.init(cpu->number_of_threads)
.name(name() + ".WB:consumers")
.desc("num instructions consuming a value")
.flags(total)
;
- wb_penalized
+ wbPenalized
.init(cpu->number_of_threads)
.name(name() + ".WB:penalized")
.desc("number of instrctions required to write to 'other' IQ")
.flags(total)
;
- wb_penalized_rate
+ wbPenalizedRate
.name(name() + ".WB:penalized_rate")
.desc ("fraction of instructions written-back that wrote to 'other' IQ")
.flags(total)
;
- wb_penalized_rate = wb_penalized / writeback_count;
+ wbPenalizedRate = wbPenalized / writebackCount;
- wb_fanout
+ wbFanout
.name(name() + ".WB:fanout")
.desc("average fanout of values written-back")
.flags(total)
;
- wb_fanout = producer_inst / consumer_inst;
+ wbFanout = producerInst / consumerInst;
- wb_rate
+ wbRate
.name(name() + ".WB:rate")
.desc("insts written-back per cycle")
.flags(total)
;
- wb_rate = writeback_count / cpu->numCycles;
+ wbRate = writebackCount / cpu->numCycles;
}
template<class Impl>
instQueue.squash(tid);
// Tell the LDSTQ to start squashing.
- ldstQueue.squash(fromCommit->commitInfo[tid].doneSeqNum,tid);
+ ldstQueue.squash(fromCommit->commitInfo[tid].doneSeqNum, tid);
updatedQueues = true;
DPRINTF(IEW, "[tid:%i]: Squashing from a specific instruction, PC: %#x "
"[sn:%i].\n", tid, inst->readPC(), inst->seqNum);
- // Tell rename to squash through the time buffer.
toCommit->squash[tid] = true;
toCommit->squashedSeqNum[tid] = inst->seqNum;
toCommit->mispredPC[tid] = inst->readPC();
toCommit->nextPC[tid] = inst->readNextPC();
toCommit->branchMispredict[tid] = true;
- // Prediction was incorrect, so send back inverse.
toCommit->branchTaken[tid] = inst->readNextPC() !=
(inst->readPC() + sizeof(TheISA::MachInst));
toCommit->includeSquashInst[tid] = false;
- //toCommit->iewSquashNum[tid] = inst->seqNum;
wroteToTimeBuffer = true;
}
DPRINTF(IEW, "[tid:%i]: Squashing from a specific instruction, "
"PC: %#x [sn:%i].\n", tid, inst->readPC(), inst->seqNum);
- // Tell rename to squash through the time buffer.
toCommit->squash[tid] = true;
toCommit->squashedSeqNum[tid] = inst->seqNum;
toCommit->nextPC[tid] = inst->readNextPC();
toCommit->includeSquashInst[tid] = false;
- //toCommit->iewSquashNum[tid] = inst->seqNum;
wroteToTimeBuffer = true;
}
// reprocessed when this stage unblocks.
skidInsert(tid);
- // Set the status to Blocked.
dispatchStatus[tid] = Blocked;
}
// to. If there are free write ports at the time, then go ahead
// and write the instruction to that time. If there are not,
// keep looking back to see where's the first time there's a
- // free slot. What happens if you run out of free spaces?
- // For now naively assume that all instructions take one cycle.
- // Otherwise would have to look into the time buffer based on the
- // latency of the instruction.
+ // free slot.
while ((*iewQueue)[wbCycle].insts[wbNumInst]) {
++wbNumInst;
if (wbNumInst == issueWidth) {
DefaultIEW<Impl>::sortInsts()
{
int insts_from_rename = fromRename->size;
-
+#ifdef DEBUG
for (int i = 0; i < numThreads; i++)
assert(insts[i].empty());
-
+#endif
for (int i = 0; i < insts_from_rename; ++i) {
insts[fromRename->insts[i]->threadNumber].push(fromRename->insts[i]);
}
// Be sure to mark these instructions as ready so that the
// commit stage can go ahead and execute them, and mark
// them as issued so the IQ doesn't reprocess them.
- // -------------
- // @TODO: What happens if the ldstqueue is full?
- // Do we process the other instructions?
// Check for squashed instructions.
if (inst->isSquashed()) {
++iewDispStoreInsts;
if (inst->isNonSpeculative()) {
+ // Non-speculative stores (namely store conditionals)
+ // need to be set as "canCommit()" so that commit can
+ // process them when they reach the head of commit.
inst->setCanCommit();
instQueue.insertNonSpec(inst);
add_to_iq = false;
toRename->iewInfo[tid].dispatchedToLSQ++;
#if FULL_SYSTEM
} else if (inst->isMemBarrier() || inst->isWriteBarrier()) {
+ // Same as non-speculative stores.
inst->setCanCommit();
instQueue.insertBarrier(inst);
add_to_iq = false;
DPRINTF(IEW, "[tid:%i]: Issue: Nonspeculative instruction "
"encountered, skipping.\n", tid);
- // Same hack as with stores.
+ // Same as non-speculative stores.
inst->setCanCommit();
// Specifically insert it as nonspeculative.
inst->setExecuted();
inst->setCanCommit();
- instQueue.advanceTail(inst);
+ instQueue.recordProducer(inst);
- exe_nop[tid]++;
+ exeNop[tid]++;
add_to_iq = false;
} else if (inst->isExecuted()) {
inst->setIssued();
inst->setCanCommit();
- instQueue.advanceTail(inst);
+ instQueue.recordProducer(inst);
add_to_iq = false;
} else {
void
DefaultIEW<Impl>::executeInsts()
{
- //bool fetch_redirect[(*activeThreads).size()];
wbNumInst = 0;
wbCycle = 0;
// Execute/writeback any instructions that are available.
int inst_num = 0;
- for ( ; inst_num < issueWidth && /* Haven't exceeded issue bandwidth */
- fromIssue->insts[inst_num];
- ++inst_num) {
+ for ( ; inst_num < issueWidth && fromIssue->insts[inst_num];
+ ++inst_num) {
DPRINTF(IEW, "Execute: Executing instructions from IQ.\n");
- // Get instruction from issue's queue.
DynInstPtr inst = fromIssue->insts[inst_num];
DPRINTF(IEW, "Execute: Processing PC %#x, [tid:%i] [sn:%i].\n",
inst->readPC(), inst->threadNumber,inst->seqNum);
// Check if the instruction is squashed; if so then skip it
- // and don't count it towards the FU usage.
if (inst->isSquashed()) {
DPRINTF(IEW, "Execute: Instruction was squashed.\n");
// Loads will mark themselves as executed, and their writeback
// event adds the instruction to the queue to commit
fault = ldstQueue.executeLoad(inst);
-
-// ++iewExecLoadInsts;
} else if (inst->isStore()) {
ldstQueue.executeStore(inst);
-// ++iewExecStoreInsts;
-
// If the store had a fault then it may not have a mem req
if (inst->req && !(inst->req->flags & LOCKED)) {
inst->setExecuted();
instToCommit(inst);
}
- // Store conditionals will mark themselves as executed, and
- // their writeback event will add the instruction to the queue
- // to commit.
+
+ // Store conditionals will mark themselves as
+ // executed, and their writeback event will add the
+ // instruction to the queue to commit.
} else {
panic("Unexpected memory type!\n");
}
updateExeInstStats(inst);
- // Check if branch was correct. This check happens after the
- // instruction is added to the queue because even if the branch
- // is mispredicted, the branch instruction itself is still valid.
- // Only handle this if there hasn't already been something that
+ // Check if branch prediction was correct, if not then we need
+ // to tell commit to squash in flight instructions. Only
+ // handle this if there hasn't already been something that
// redirects fetch in this group of instructions.
// This probably needs to prioritize the redirects if a different
} else if (ldstQueue.violation(tid)) {
fetchRedirect[tid] = true;
- // Get the DynInst that caused the violation. Note that this
+ // If there was an ordering violation, then get the
+ // DynInst that caused the violation. Note that this
// clears the violation signal.
DynInstPtr violator;
violator = ldstQueue.getMemDepViolator(tid);
void
DefaultIEW<Impl>::writebackInsts()
{
- // Loop through the head of the time buffer and wake any dependents.
- // These instructions are about to write back. In the simple model
- // this loop can really happen within the previous loop, but when
- // instructions have actual latencies, this loop must be separate.
- // Also mark scoreboard that this instruction is finally complete.
- // Either have IEW have direct access to rename map, or have this as
- // part of backwards communication.
+ // Loop through the head of the time buffer and wake any
+ // dependents. These instructions are about to write back. Also
+ // mark scoreboard that this instruction is finally complete.
+ // Either have IEW have direct access to scoreboard, or have this
+ // as part of backwards communication.
for (int inst_num = 0; inst_num < issueWidth &&
toCommit->insts[inst_num]; inst_num++) {
DynInstPtr inst = toCommit->insts[inst_num];
scoreboard->setReg(inst->renamedDestRegIdx(i));
}
- producer_inst[tid]++;
- consumer_inst[tid]+= dependents;
- writeback_count[tid]++;
+ producerInst[tid]++;
+ consumerInst[tid]+= dependents;
+ writebackCount[tid]++;
}
}
}
void
DefaultIEW<Impl>::tick()
{
- // Try to fill up issue queue with as many instructions as bandwidth
- // allows.
wbNumInst = 0;
wbCycle = 0;
sortInsts();
+ // Free function units marked as being freed this cycle.
+ fuPool->processFreeUnits();
+
list<unsigned>::iterator threads = (*activeThreads).begin();
- // Check stall and squash signals.
+ // Check stall and squash signals, dispatch any instructions.
while (threads != (*activeThreads).end()) {
unsigned tid = *threads++;
checkSignalsAndUpdate(tid);
dispatch(tid);
-
}
if (exeStatus != Squashing) {
// Writeback any stores using any leftover bandwidth.
ldstQueue.writebackStores();
- // Free function units marked as being freed this cycle.
- fuPool->processFreeUnits();
-
// Check the committed load/store signals to see if there's a load
// or store to commit. Also check if it's being told to execute a
// nonspeculative instruction.
DPRINTF(IEW, "[tid:%i], Dispatch dispatched %i instructions.\n",
tid, toRename->iewInfo[tid].dispatched);
-
- //thread_queue.pop();
}
DPRINTF(IEW, "IQ has %i free entries (Can schedule: %i). "
//
#ifdef TARGET_ALPHA
if (inst->isDataPrefetch())
- exe_swp[thread_number]++;
+ exeSwp[thread_number]++;
else
iewExecutedInsts++;
#else
// Control operations
//
if (inst->isControl())
- exe_branches[thread_number]++;
+ exeBranches[thread_number]++;
//
// Memory operations
//
if (inst->isMemRef()) {
- exe_refs[thread_number]++;
+ exeRefs[thread_number]++;
if (inst->isLoad()) {
iewExecLoadInsts[thread_number]++;
// Force instantiation of InstructionQueue.
template class InstructionQueue<AlphaSimpleImpl>;
-
-template<>
-unsigned
-InstructionQueue<AlphaSimpleImpl>::DependencyEntry::mem_alloc_counter = 0;
/*
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * Copyright (c) 2004-2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
#include "base/statistics.hh"
#include "base/timebuf.hh"
#include "cpu/inst_seq.hh"
+#include "cpu/o3/dep_graph.hh"
#include "encumbered/cpu/full/op_class.hh"
#include "sim/host.hh"
/** Pointer back to the instruction queue. */
InstructionQueue<Impl> *iqPtr;
+ bool freeFU;
+
public:
/** Construct a FU completion event. */
FUCompletion(DynInstPtr &_inst, int fu_idx,
virtual void process();
virtual const char *description();
+ void setFreeFU() { freeFU = true; }
};
/** Constructs an IQ. */
void resetState();
- void resetDependencyGraph();
-
/** Sets CPU pointer. */
void setCPU(FullCPU *_cpu) { cpu = _cpu; }
void insertBarrier(DynInstPtr &barr_inst);
/**
- * Advances the tail of the IQ, used if an instruction is not added to the
- * IQ for scheduling.
- * @todo: Rename this function.
+ * Records the instruction as the producer of a register without
+ * adding it to the rest of the IQ.
*/
- void advanceTail(DynInstPtr &inst);
+ void recordProducer(DynInstPtr &inst)
+ { addToProducers(inst); }
/** Process FU completion event. */
void processFUCompletion(DynInstPtr &inst, int fu_idx);
/** Returns the number of used entries for a thread. */
unsigned getCount(unsigned tid) { return count[tid]; };
- /** Updates the number of free entries. */
- void updateFreeEntries(int num) { freeEntries += num; }
-
/** Debug function to print all instructions. */
void printInsts();
}
};
- /**
- * Struct for an IQ entry. It includes the instruction and an iterator
- * to the instruction's spot in the IQ.
- */
- struct IQEntry {
- DynInstPtr inst;
- ListIt iqIt;
- };
-
typedef std::priority_queue<DynInstPtr, std::vector<DynInstPtr>, pqCompare>
ReadyInstQueue;
* inside of DynInst), when these instructions are woken up only
* the sequence number will be available. Thus it is most efficient to be
* able to search by the sequence number alone.
- * @todo: Maybe change this to a priority queue per thread.
*/
std::map<InstSeqNum, DynInstPtr> nonSpecInsts;
/** List that contains the age order of the oldest instruction of each
* ready queue. Used to select the oldest instruction available
* among op classes.
+ * @todo: Might be better to just move these entries around instead
+ * of creating new ones every time the position changes due to an
+ * instruction issuing. Not sure std::list supports this.
*/
std::list<ListOrderEntry> listOrder;
*/
void moveToYoungerInst(ListOrderIt age_order_it);
+ DependencyGraph<DynInstPtr> dependGraph;
+
//////////////////////////////////////
// Various parameters
//////////////////////////////////////
bool switchedOut;
- //////////////////////////////////
- // Variables needed for squashing
- //////////////////////////////////
-
/** The sequence number of the squashed instruction. */
InstSeqNum squashedSeqNum[Impl::MaxThreads];
- /** Iterator that points to the last instruction that has been squashed.
- * This will not be valid unless the IQ is in the process of squashing.
- */
- ListIt squashIt[Impl::MaxThreads];
-
- ///////////////////////////////////
- // Dependency graph stuff
- ///////////////////////////////////
-
- class DependencyEntry
- {
- public:
- DependencyEntry()
- : inst(NULL), next(NULL)
- { }
-
- DynInstPtr inst;
- //Might want to include data about what arch. register the
- //dependence is waiting on.
- DependencyEntry *next;
-
- //This function, and perhaps this whole class, stand out a little
- //bit as they don't fit a classification well. I want access
- //to the underlying structure of the linked list, yet at
- //the same time it feels like this should be something abstracted
- //away. So for now it will sit here, within the IQ, until
- //a better implementation is decided upon.
- // This function probably shouldn't be within the entry...
- void insert(DynInstPtr &new_inst);
-
- void remove(DynInstPtr &inst_to_remove);
-
- // Debug variable, remove when done testing.
- static unsigned mem_alloc_counter;
- };
-
- /** Array of linked lists. Each linked list is a list of all the
- * instructions that depend upon a given register. The actual
- * register's index is used to index into the graph; ie all
- * instructions in flight that are dependent upon r34 will be
- * in the linked list of dependGraph[34].
- */
- DependencyEntry *dependGraph;
-
/** A cache of the recently woken registers. It is 1 if the register
* has been woken up recently, and 0 if the register has been added
* to the dependency graph and has not yet received its value. It
*/
std::vector<bool> regScoreboard;
- /** Adds an instruction to the dependency graph, as a producer. */
+ /** Adds an instruction to the dependency graph, as a consumer. */
bool addToDependents(DynInstPtr &new_inst);
- /** Adds an instruction to the dependency graph, as a consumer. */
- void createDependency(DynInstPtr &new_inst);
+ /** Adds an instruction to the dependency graph, as a producer. */
+ void addToProducers(DynInstPtr &new_inst);
/** Moves an instruction to the ready queue if it is ready. */
void addIfReady(DynInstPtr &inst);
*/
int countInsts();
- /** Debugging function to dump out the dependency graph.
- */
- void dumpDependGraph();
-
/** Debugging function to dump all the list sizes, as well as print
* out the list of nonspeculative instructions. Should not be used
* in any other capacity, but it has no harmful sideaffects.
Stats::Scalar<> iqInstsAdded;
/** Stat for number of non-speculative instructions added. */
Stats::Scalar<> iqNonSpecInstsAdded;
-// Stats::Scalar<> iqIntInstsAdded;
+
Stats::Scalar<> iqInstsIssued;
/** Stat for number of integer instructions issued. */
Stats::Scalar<> iqIntInstsIssued;
-// Stats::Scalar<> iqFloatInstsAdded;
/** Stat for number of floating point instructions issued. */
Stats::Scalar<> iqFloatInstsIssued;
-// Stats::Scalar<> iqBranchInstsAdded;
/** Stat for number of branch instructions issued. */
Stats::Scalar<> iqBranchInstsIssued;
-// Stats::Scalar<> iqMemInstsAdded;
/** Stat for number of memory instructions issued. */
Stats::Scalar<> iqMemInstsIssued;
-// Stats::Scalar<> iqMiscInstsAdded;
/** Stat for number of miscellaneous instructions issued. */
Stats::Scalar<> iqMiscInstsIssued;
/** Stat for number of squashed instructions that were ready to issue. */
*/
Stats::Scalar<> iqSquashedNonSpecRemoved;
- Stats::VectorDistribution<> queue_res_dist;
- Stats::Distribution<> n_issued_dist;
- Stats::VectorDistribution<> issue_delay_dist;
+ Stats::VectorDistribution<> queueResDist;
+ Stats::Distribution<> numIssuedDist;
+ Stats::VectorDistribution<> issueDelayDist;
- Stats::Vector<> stat_fu_busy;
+ Stats::Vector<> statFuBusy;
// Stats::Vector<> dist_unissued;
- Stats::Vector2d<> stat_issued_inst_type;
+ Stats::Vector2d<> statIssuedInstType;
- Stats::Formula issue_rate;
+ Stats::Formula issueRate;
// Stats::Formula issue_stores;
// Stats::Formula issue_op_rate;
- Stats::Vector<> fu_busy; //cumulative fu busy
+ Stats::Vector<> fuBusy; //cumulative fu busy
- Stats::Formula fu_busy_rate;
+ Stats::Formula fuBusyRate;
};
#endif //__CPU_O3_INST_QUEUE_HH__
/*
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * Copyright (c) 2004-2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-// Todo:
-// Current ordering allows for 0 cycle added-to-scheduled. Could maybe fake
-// it; either do in reverse order, or have added instructions put into a
-// different ready queue that, in scheduleRreadyInsts(), gets put onto the
-// normal ready queue. This would however give only a one cycle delay,
-// but probably is more flexible to actually add in a delay parameter than
-// just running it backwards.
-
#include <limits>
#include <vector>
int fu_idx,
InstructionQueue<Impl> *iq_ptr)
: Event(&mainEventQueue, Stat_Event_Pri),
- inst(_inst), fuIdx(fu_idx), iqPtr(iq_ptr)
+ inst(_inst), fuIdx(fu_idx), iqPtr(iq_ptr), freeFU(false)
{
this->setFlags(Event::AutoDelete);
}
void
InstructionQueue<Impl>::FUCompletion::process()
{
- iqPtr->processFUCompletion(inst, fuIdx);
+ iqPtr->processFUCompletion(inst, freeFU ? fuIdx : -1);
inst = NULL;
}
//Create an entry for each physical register within the
//dependency graph.
- dependGraph = new DependencyEntry[numPhysRegs];
-
- // Initialize all the head pointers to point to NULL, and all the
- // entries as unready.
- for (int i = 0; i < numPhysRegs; ++i) {
- dependGraph[i].next = NULL;
- dependGraph[i].inst = NULL;
- }
+ dependGraph.resize(numPhysRegs);
// Resize the register scoreboard.
regScoreboard.resize(numPhysRegs);
template <class Impl>
InstructionQueue<Impl>::~InstructionQueue()
{
- resetDependencyGraph();
- assert(DependencyEntry::mem_alloc_counter == 0);
-
- delete [] dependGraph;
+ dependGraph.reset();
+ cprintf("Nodes traversed: %i, removed: %i\n",
+ dependGraph.nodesTraversed, dependGraph.nodesRemoved);
}
template <class Impl>
.desc("Number of non-speculative instructions added to the IQ")
.prereq(iqNonSpecInstsAdded);
-// iqIntInstsAdded;
-
iqInstsIssued
.name(name() + ".iqInstsIssued")
.desc("Number of instructions issued")
.desc("Number of integer instructions issued")
.prereq(iqIntInstsIssued);
-// iqFloatInstsAdded;
-
iqFloatInstsIssued
.name(name() + ".iqFloatInstsIssued")
.desc("Number of float instructions issued")
.prereq(iqFloatInstsIssued);
-// iqBranchInstsAdded;
-
iqBranchInstsIssued
.name(name() + ".iqBranchInstsIssued")
.desc("Number of branch instructions issued")
.prereq(iqBranchInstsIssued);
-// iqMemInstsAdded;
-
iqMemInstsIssued
.name(name() + ".iqMemInstsIssued")
.desc("Number of memory instructions issued")
.prereq(iqMemInstsIssued);
-// iqMiscInstsAdded;
-
iqMiscInstsIssued
.name(name() + ".iqMiscInstsIssued")
.desc("Number of miscellaneous instructions issued")
.desc("Number of squashed non-spec instructions that were removed")
.prereq(iqSquashedNonSpecRemoved);
- queue_res_dist
+ queueResDist
.init(Num_OpClasses, 0, 99, 2)
.name(name() + ".IQ:residence:")
.desc("cycles from dispatch to issue")
.flags(total | pdf | cdf )
;
for (int i = 0; i < Num_OpClasses; ++i) {
- queue_res_dist.subname(i, opClassStrings[i]);
+ queueResDist.subname(i, opClassStrings[i]);
}
- n_issued_dist
+ numIssuedDist
.init(0,totalWidth,1)
.name(name() + ".ISSUE:issued_per_cycle")
.desc("Number of insts issued each cycle")
dist_unissued.subname(i, unissued_names[i]);
}
*/
- stat_issued_inst_type
+ statIssuedInstType
.init(numThreads,Num_OpClasses)
.name(name() + ".ISSUE:FU_type")
.desc("Type of FU issued")
.flags(total | pdf | dist)
;
- stat_issued_inst_type.ysubnames(opClassStrings);
+ statIssuedInstType.ysubnames(opClassStrings);
//
// How long did instructions for a particular FU type wait prior to issue
//
- issue_delay_dist
+ issueDelayDist
.init(Num_OpClasses,0,99,2)
.name(name() + ".ISSUE:")
.desc("cycles from operands ready to issue")
for (int i=0; i<Num_OpClasses; ++i) {
stringstream subname;
subname << opClassStrings[i] << "_delay";
- issue_delay_dist.subname(i, subname.str());
+ issueDelayDist.subname(i, subname.str());
}
- issue_rate
+ issueRate
.name(name() + ".ISSUE:rate")
.desc("Inst issue rate")
.flags(total)
;
- issue_rate = iqInstsIssued / cpu->numCycles;
+ issueRate = iqInstsIssued / cpu->numCycles;
/*
issue_stores
.name(name() + ".ISSUE:stores")
;
issue_op_rate = issued_ops / numCycles;
*/
- stat_fu_busy
+ statFuBusy
.init(Num_OpClasses)
.name(name() + ".ISSUE:fu_full")
.desc("attempts to use FU when none available")
.flags(pdf | dist)
;
for (int i=0; i < Num_OpClasses; ++i) {
- stat_fu_busy.subname(i, opClassStrings[i]);
+ statFuBusy.subname(i, opClassStrings[i]);
}
- fu_busy
+ fuBusy
.init(numThreads)
.name(name() + ".ISSUE:fu_busy_cnt")
.desc("FU busy when requested")
.flags(total)
;
- fu_busy_rate
+ fuBusyRate
.name(name() + ".ISSUE:fu_busy_rate")
.desc("FU busy rate (busy events/executed inst)")
.flags(total)
;
- fu_busy_rate = fu_busy / iqInstsIssued;
+ fuBusyRate = fuBusy / iqInstsIssued;
for ( int i=0; i < numThreads; i++) {
// Tell mem dependence unit to reg stats as well.
listOrder.clear();
}
-template <class Impl>
-void
-InstructionQueue<Impl>::resetDependencyGraph()
-{
- // Clear the dependency graph
- DependencyEntry *curr;
- DependencyEntry *prev;
-
- for (int i = 0; i < numPhysRegs; ++i) {
- curr = dependGraph[i].next;
-
- while (curr) {
- DependencyEntry::mem_alloc_counter--;
-
- prev = curr;
- curr = prev->next;
- prev->inst = NULL;
-
- delete prev;
- }
-
- if (dependGraph[i].inst) {
- dependGraph[i].inst = NULL;
- }
-
- dependGraph[i].next = NULL;
- }
-}
-
template <class Impl>
void
InstructionQueue<Impl>::setActiveThreads(list<unsigned> *at_ptr)
InstructionQueue<Impl>::switchOut()
{
resetState();
- resetDependencyGraph();
+ dependGraph.reset();
switchedOut = true;
for (int i = 0; i < numThreads; ++i) {
memDepUnit[i].switchOut();
// Make sure the instruction is valid
assert(new_inst);
- DPRINTF(IQ, "Adding instruction PC %#x to the IQ.\n",
- new_inst->readPC());
+ DPRINTF(IQ, "Adding instruction [sn:%lli] PC %#x to the IQ.\n",
+ new_inst->seqNum, new_inst->readPC());
- // Check if there are any free entries. Panic if there are none.
- // Might want to have this return a fault in the future instead of
- // panicing.
assert(freeEntries != 0);
instList[new_inst->threadNumber].push_back(new_inst);
- // Decrease the number of free entries.
--freeEntries;
- //Mark Instruction as in IQ
new_inst->setInIQ();
// Look through its source registers (physical regs), and mark any
// Have this instruction set itself as the producer of its destination
// register(s).
- createDependency(new_inst);
+ addToProducers(new_inst);
- // If it's a memory instruction, add it to the memory dependency
- // unit.
if (new_inst->isMemRef()) {
memDepUnit[new_inst->threadNumber].insert(new_inst);
} else {
- // If the instruction is ready then add it to the ready list.
addIfReady(new_inst);
}
++iqInstsAdded;
-
- //Update Thread IQ Count
count[new_inst->threadNumber]++;
assert(freeEntries == (numEntries - countInsts()));
// @todo: Clean up this code; can do it by setting inst as unable
// to issue, then calling normal insert on the inst.
- // Make sure the instruction is valid
assert(new_inst);
nonSpecInsts[new_inst->seqNum] = new_inst;
- DPRINTF(IQ, "Adding instruction PC %#x to the IQ.\n",
- new_inst->readPC());
+ DPRINTF(IQ, "Adding non-speculative instruction [sn:%lli] PC %#x "
+ "to the IQ.\n",
+ new_inst->seqNum, new_inst->readPC());
- // Check if there are any free entries. Panic if there are none.
- // Might want to have this return a fault in the future instead of
- // panicing.
assert(freeEntries != 0);
instList[new_inst->threadNumber].push_back(new_inst);
- // Decrease the number of free entries.
--freeEntries;
- //Mark Instruction as in IQ
new_inst->setInIQ();
// Have this instruction set itself as the producer of its destination
// register(s).
- createDependency(new_inst);
+ addToProducers(new_inst);
// If it's a memory instruction, add it to the memory dependency
// unit.
++iqNonSpecInstsAdded;
- //Update Thread IQ Count
count[new_inst->threadNumber]++;
assert(freeEntries == (numEntries - countInsts()));
insertNonSpec(barr_inst);
}
-template <class Impl>
-void
-InstructionQueue<Impl>::advanceTail(DynInstPtr &inst)
-{
- // Have this instruction set itself as the producer of its destination
- // register(s).
- createDependency(inst);
-}
-
template <class Impl>
void
InstructionQueue<Impl>::addToOrderList(OpClass op_class)
iewStage->wakeCPU();
- fuPool->freeUnit(fu_idx);
+ if (fu_idx > -1)
+ fuPool->freeUnitNextCycle(fu_idx);
+ // @todo: Ensure that these FU Completions happen at the beginning
+ // of a cycle, otherwise they could add too many instructions to
+ // the queue.
+ // @todo: This could break if there's multiple multi-cycle ops
+ // finishing on this cycle. Maybe implement something like
+ // instToCommit in iew_impl.hh.
int &size = issueToExecuteQueue->access(0)->size;
issueToExecuteQueue->access(0)->insts[size++] = inst;
IssueStruct *i2e_info = issueToExecuteQueue->access(0);
- // Will need to reorder the list if either a queue is not on the list,
- // or it has an older instruction than last time.
- for (int i = 0; i < Num_OpClasses; ++i) {
- if (!readyInsts[i].empty()) {
- if (!queueOnList[i]) {
- addToOrderList(OpClass(i));
- } else if (readyInsts[i].top()->seqNum <
- (*readyIt[i]).oldestInst) {
- listOrder.erase(readyIt[i]);
- addToOrderList(OpClass(i));
- }
- }
- }
-
// Have iterator to head of the list
// While I haven't exceeded bandwidth or reached the end of the list,
// Try to get a FU that can do what this op needs.
int total_issued = 0;
int exec_queue_slot = i2e_info->size;
- while (exec_queue_slot < totalWidth && order_it != order_end_it) {
+ while (exec_queue_slot < totalWidth && total_issued < totalWidth &&
+ order_it != order_end_it) {
OpClass op_class = (*order_it).queueType;
assert(!readyInsts[op_class].empty());
continue;
}
- int idx = fuPool->getUnit(op_class);
-
+ int idx = -2;
+ int op_latency = 1;
int tid = issuing_inst->threadNumber;
- if (idx == -2) {
- assert(op_class == No_OpClass);
-
- i2e_info->insts[exec_queue_slot++] = issuing_inst;
- i2e_info->size++;
-
- DPRINTF(IQ, "Thread %i: Issuing instruction PC that needs no FU"
- " %#x [sn:%lli]\n",
- tid, issuing_inst->readPC(),
- issuing_inst->seqNum);
-
- readyInsts[op_class].pop();
-
- if (!readyInsts[op_class].empty()) {
- moveToYoungerInst(order_it);
- } else {
- readyIt[op_class] = listOrder.end();
- queueOnList[op_class] = false;
- }
-
- issuing_inst->setIssued();
- ++total_issued;
+ if (op_class != No_OpClass) {
+ idx = fuPool->getUnit(op_class);
- if (!issuing_inst->isMemRef()) {
- // Memory instructions can not be freed from the IQ until they
- // complete.
- ++freeEntries;
- count[tid]--;
- issuing_inst->removeInIQ();
- } else {
- memDepUnit[tid].issue(issuing_inst);
+ if (idx > -1) {
+ op_latency = fuPool->getOpLatency(op_class);
}
+ }
- listOrder.erase(order_it++);
-
- stat_issued_inst_type[tid][op_class]++;
- } else if (idx != -1) {
- int op_latency = fuPool->getOpLatency(op_class);
-
+ if (idx == -2 || idx != -1) {
if (op_latency == 1) {
i2e_info->insts[exec_queue_slot++] = issuing_inst;
i2e_info->size++;
- // Add the FU onto the list of FU's to be freed next cycle.
- fuPool->freeUnit(idx);
+ // Add the FU onto the list of FU's to be freed next
+ // cycle if we used one.
+ if (idx >= 0)
+ fuPool->freeUnitNextCycle(idx);
} else {
int issue_latency = fuPool->getIssueLatency(op_class);
+ // Generate completion event for the FU
+ FUCompletion *execution = new FUCompletion(issuing_inst,
+ idx, this);
- if (issue_latency > 1) {
- // Generate completion event for the FU
- FUCompletion *execution = new FUCompletion(issuing_inst,
- idx, this);
+ execution->schedule(curTick + cpu->cycles(issue_latency - 1));
- execution->schedule(curTick + cpu->cycles(issue_latency - 1));
+ // @todo: Enforce that issue_latency == 1 or op_latency
+ if (issue_latency > 1) {
+ execution->setFreeFU();
} else {
- i2e_info->insts[exec_queue_slot++] = issuing_inst;
- i2e_info->size++;
+ // @todo: Not sure I'm accounting for the
+ // multi-cycle op in a pipelined FU properly, or
+ // the number of instructions issued in one cycle.
+// i2e_info->insts[exec_queue_slot++] = issuing_inst;
+// i2e_info->size++;
// Add the FU onto the list of FU's to be freed next cycle.
- fuPool->freeUnit(idx);
+ fuPool->freeUnitNextCycle(idx);
}
}
}
listOrder.erase(order_it++);
- stat_issued_inst_type[tid][op_class]++;
+ statIssuedInstType[tid][op_class]++;
} else {
- stat_fu_busy[op_class]++;
- fu_busy[tid]++;
+ statFuBusy[op_class]++;
+ fuBusy[tid]++;
++order_it;
}
}
- n_issued_dist.sample(total_issued);
+ numIssuedDist.sample(total_issued);
+ iqInstsIssued+= total_issued;
if (total_issued) {
cpu->activityThisCycle();
unsigned tid = (*inst_it).second->threadNumber;
- // Mark this instruction as ready to issue.
(*inst_it).second->setCanIssue();
- // Now schedule the instruction.
if (!(*inst_it).second->isMemRef()) {
addIfReady((*inst_it).second);
} else {
void
InstructionQueue<Impl>::commit(const InstSeqNum &inst, unsigned tid)
{
- /*Need to go through each thread??*/
DPRINTF(IQ, "[tid:%i]: Committing instructions older than [sn:%i]\n",
tid,inst);
DPRINTF(IQ, "Waking dependents of completed instruction.\n");
assert(!completed_inst->isSquashed());
- // Look at the physical destination register of the DynInst
- // and look it up on the dependency graph. Then mark as ready
- // any instructions within the instruction queue.
- DependencyEntry *curr;
- DependencyEntry *prev;
// Tell the memory dependence unit to wake any dependents on this
// instruction if it is a memory instruction. Also complete the memory
- // instruction at this point since we know it executed fine.
- // @todo: Might want to rename "completeMemInst" to
- // something that indicates that it won't need to be replayed, and call
- // this earlier. Might not be a big deal.
+ // instruction at this point since we know it executed without issues.
+ // @todo: Might want to rename "completeMemInst" to something that
+ // indicates that it won't need to be replayed, and call this
+ // earlier. Might not be a big deal.
if (completed_inst->isMemRef()) {
memDepUnit[completed_inst->threadNumber].wakeDependents(completed_inst);
completeMemInst(completed_inst);
DPRINTF(IQ, "Waking any dependents on register %i.\n",
(int) dest_reg);
- //Maybe abstract this part into a function.
- //Go through the dependency chain, marking the registers as ready
- //within the waiting instructions.
-
- curr = dependGraph[dest_reg].next;
+ //Go through the dependency chain, marking the registers as
+ //ready within the waiting instructions.
+ DynInstPtr dep_inst = dependGraph.pop(dest_reg);
- while (curr) {
+ while (dep_inst) {
DPRINTF(IQ, "Waking up a dependent instruction, PC%#x.\n",
- curr->inst->readPC());
+ dep_inst->readPC());
// Might want to give more information to the instruction
- // so that it knows which of its source registers is ready.
- // However that would mean that the dependency graph entries
- // would need to hold the src_reg_idx.
- curr->inst->markSrcRegReady();
+ // so that it knows which of its source registers is
+ // ready. However that would mean that the dependency
+ // graph entries would need to hold the src_reg_idx.
+ dep_inst->markSrcRegReady();
- addIfReady(curr->inst);
+ addIfReady(dep_inst);
- DependencyEntry::mem_alloc_counter--;
-
- prev = curr;
- curr = prev->next;
- prev->inst = NULL;
+ dep_inst = dependGraph.pop(dest_reg);
++dependents;
-
- delete prev;
}
- // Reset the head node now that all of its dependents have been woken
- // up.
- dependGraph[dest_reg].next = NULL;
- dependGraph[dest_reg].inst = NULL;
+ // Reset the head node now that all of its dependents have
+ // been woken up.
+ assert(dependGraph.empty(dest_reg));
+ dependGraph.clearInst(dest_reg);
// Mark the scoreboard as having that register ready.
regScoreboard[dest_reg] = true;
readyInsts[op_class].push(ready_inst);
+ // Will need to reorder the list if either a queue is not on the list,
+ // or it has an older instruction than last time.
+ if (!queueOnList[op_class]) {
+ addToOrderList(op_class);
+ } else if (readyInsts[op_class].top()->seqNum <
+ (*readyIt[op_class]).oldestInst) {
+ listOrder.erase(readyIt[op_class]);
+ addToOrderList(op_class);
+ }
+
DPRINTF(IQ, "Instruction is ready to issue, putting it onto "
"the ready list, PC %#x opclass:%i [sn:%lli].\n",
ready_inst->readPC(), op_class, ready_inst->seqNum);
// time buffer.
squashedSeqNum[tid] = fromCommit->commitInfo[tid].doneSeqNum;
- // Setup the squash iterator to point to the tail.
- squashIt[tid] = instList[tid].end();
- --squashIt[tid];
-
// Call doSquash if there are insts in the IQ
if (count[tid] > 0) {
doSquash(tid);
void
InstructionQueue<Impl>::doSquash(unsigned tid)
{
- // Make sure the squashed sequence number is valid.
-// assert(squashedSeqNum[tid] != 0);
+ // Start at the tail.
+ ListIt squash_it = instList[tid].end();
+ --squash_it;
DPRINTF(IQ, "[tid:%i]: Squashing until sequence number %i!\n",
tid, squashedSeqNum[tid]);
// Squash any instructions younger than the squashed sequence number
// given.
- while (squashIt[tid] != instList[tid].end() &&
- (*squashIt[tid])->seqNum > squashedSeqNum[tid]) {
+ while (squash_it != instList[tid].end() &&
+ (*squash_it)->seqNum > squashedSeqNum[tid]) {
- DynInstPtr squashed_inst = (*squashIt[tid]);
+ DynInstPtr squashed_inst = (*squash_it);
// Only handle the instruction if it actually is in the IQ and
// hasn't already been squashed in the IQ.
if (squashed_inst->threadNumber != tid ||
squashed_inst->isSquashedInIQ()) {
- --squashIt[tid];
+ --squash_it;
continue;
}
PhysRegIndex src_reg =
squashed_inst->renamedSrcRegIdx(src_reg_idx);
- // Only remove it from the dependency graph if it was
- // placed there in the first place.
- // HACK: This assumes that instructions woken up from the
- // dependency chain aren't informed that a specific src
- // register has become ready. This may not always be true
- // in the future.
- // Instead of doing a linked list traversal, we can just
- // remove these squashed instructions either at issue time,
- // or when the register is overwritten. The only downside
- // to this is it leaves more room for error.
+ // Only remove it from the dependency graph if it
+ // was placed there in the first place.
+
+ // Instead of doing a linked list traversal, we
+ // can just remove these squashed instructions
+ // either at issue time, or when the register is
+ // overwritten. The only downside to this is it
+ // leaves more room for error.
if (!squashed_inst->isReadySrcRegIdx(src_reg_idx) &&
src_reg < numPhysRegs) {
- dependGraph[src_reg].remove(squashed_inst);
+ dependGraph.remove(src_reg, squashed_inst);
}
++iqSquashedOperandsExamined;
}
-
- // Might want to remove producers as well.
} else {
NonSpecMapIt ns_inst_it =
nonSpecInsts.find(squashed_inst->seqNum);
++freeEntries;
- if (numThreads > 1) {
- DPRINTF(IQ, "[tid:%i]: Instruction [sn:%lli] PC %#x "
- "squashed.\n",
- tid, squashed_inst->seqNum, squashed_inst->readPC());
- } else {
- DPRINTF(IQ, "Instruction [sn:%lli] PC %#x squashed.\n",
- squashed_inst->seqNum, squashed_inst->readPC());
- }
+ DPRINTF(IQ, "[tid:%i]: Instruction [sn:%lli] PC %#x "
+ "squashed.\n",
+ tid, squashed_inst->seqNum, squashed_inst->readPC());
}
- instList[tid].erase(squashIt[tid]--);
+ instList[tid].erase(squash_it--);
++iqSquashedInstsExamined;
}
}
-template <class Impl>
-void
-InstructionQueue<Impl>::DependencyEntry::insert(DynInstPtr &new_inst)
-{
- //Add this new, dependent instruction at the head of the dependency
- //chain.
-
- // First create the entry that will be added to the head of the
- // dependency chain.
- DependencyEntry *new_entry = new DependencyEntry;
- new_entry->next = this->next;
- new_entry->inst = new_inst;
-
- // Then actually add it to the chain.
- this->next = new_entry;
-
- ++mem_alloc_counter;
-}
-
-template <class Impl>
-void
-InstructionQueue<Impl>::DependencyEntry::remove(DynInstPtr &inst_to_remove)
-{
- DependencyEntry *prev = this;
- DependencyEntry *curr = this->next;
-
- // Make sure curr isn't NULL. Because this instruction is being
- // removed from a dependency list, it must have been placed there at
- // an earlier time. The dependency chain should not be empty,
- // unless the instruction dependent upon it is already ready.
- if (curr == NULL) {
- return;
- }
-
- // Find the instruction to remove within the dependency linked list.
- while (curr->inst != inst_to_remove) {
- prev = curr;
- curr = curr->next;
-
- assert(curr != NULL);
- }
-
- // Now remove this instruction from the list.
- prev->next = curr->next;
-
- --mem_alloc_counter;
-
- // Could push this off to the destructor of DependencyEntry
- curr->inst = NULL;
-
- delete curr;
-}
-
template <class Impl>
bool
InstructionQueue<Impl>::addToDependents(DynInstPtr &new_inst)
"is being added to the dependency chain.\n",
new_inst->readPC(), src_reg);
- dependGraph[src_reg].insert(new_inst);
+ dependGraph.insert(src_reg, new_inst);
// Change the return value to indicate that something
// was added to the dependency graph.
"became ready before it reached the IQ.\n",
new_inst->readPC(), src_reg);
// Mark a register ready within the instruction.
- new_inst->markSrcRegReady();
+ new_inst->markSrcRegReady(src_reg_idx);
}
}
}
template <class Impl>
void
-InstructionQueue<Impl>::createDependency(DynInstPtr &new_inst)
+InstructionQueue<Impl>::addToProducers(DynInstPtr &new_inst)
{
- //Actually nothing really needs to be marked when an
- //instruction becomes the producer of a register's value,
- //but for convenience a ptr to the producing instruction will
- //be placed in the head node of the dependency links.
+ // Nothing really needs to be marked when an instruction becomes
+ // the producer of a register's value, but for convenience a ptr
+ // to the producing instruction will be placed in the head node of
+ // the dependency links.
int8_t total_dest_regs = new_inst->numDestRegs();
for (int dest_reg_idx = 0;
continue;
}
- if (dependGraph[dest_reg].next) {
- dumpDependGraph();
+ if (!dependGraph.empty(dest_reg)) {
+ dependGraph.dump();
panic("Dependency graph %i not empty!", dest_reg);
}
- dependGraph[dest_reg].inst = new_inst;
+ dependGraph.setInst(dest_reg, new_inst);
// Mark the scoreboard to say it's not yet ready.
regScoreboard[dest_reg] = false;
void
InstructionQueue<Impl>::addIfReady(DynInstPtr &inst)
{
- //If the instruction now has all of its source registers
+ // If the instruction now has all of its source registers
// available, then add it to the list of ready instructions.
if (inst->readyToIssue()) {
// Message to the mem dependence unit that this instruction has
// its registers ready.
-
memDepUnit[inst->threadNumber].regsReady(inst);
return;
inst->readPC(), op_class, inst->seqNum);
readyInsts[op_class].push(inst);
+
+ // Will need to reorder the list if either a queue is not on the list,
+ // or it has an older instruction than last time.
+ if (!queueOnList[op_class]) {
+ addToOrderList(op_class);
+ } else if (readyInsts[op_class].top()->seqNum <
+ (*readyIt[op_class]).oldestInst) {
+ listOrder.erase(readyIt[op_class]);
+ addToOrderList(op_class);
+ }
}
}
#endif
}
-template <class Impl>
-void
-InstructionQueue<Impl>::dumpDependGraph()
-{
- DependencyEntry *curr;
-
- for (int i = 0; i < numPhysRegs; ++i)
- {
- curr = &dependGraph[i];
-
- if (curr->inst) {
- cprintf("dependGraph[%i]: producer: %#x [sn:%lli] consumer: ",
- i, curr->inst->readPC(), curr->inst->seqNum);
- } else {
- cprintf("dependGraph[%i]: No producer. consumer: ", i);
- }
-
- while (curr->next != NULL) {
- curr = curr->next;
-
- cprintf("%#x [sn:%lli] ",
- curr->inst->readPC(), curr->inst->seqNum);
- }
-
- cprintf("\n");
- }
-}
-
template <class Impl>
void
InstructionQueue<Impl>::dumpLists()
cprintf("Count:%i\n", valid_num);
} else if ((*inst_list_it)->isMemRef() &&
!(*inst_list_it)->memOpDone) {
- // Loads that have not been marked as executed still count
- // towards the total instructions.
+ // Loads that have not been marked as executed
+ // still count towards the total instructions.
++valid_num;
cprintf("Count:%i\n", valid_num);
}