my_hash_t thishash;
#endif
-/** This may need to be specific to an implementation. */
-//int BaseDynInst<Impl>::instcount = 0;
-
-//int break_inst = -1;
-
template <class Impl>
BaseDynInst<Impl>::BaseDynInst(MachInst machInst, Addr inst_PC,
Addr pred_PC, InstSeqNum seq_num,
template <class Impl>
BaseDynInst<Impl>::~BaseDynInst()
{
-/*
- if (specMemWrite) {
- // Remove effects of this instruction from speculative memory
- xc->spec_mem->erase(effAddr);
- }
-*/
--instcount;
DPRINTF(FullCPU, "DynInst: Instruction destroyed. Instcount=%i\n",
instcount);
}
-/*
-template <class Impl>
-FunctionalMemory *
-BaseDynInst<Impl>::getMemory(void)
-{
- return xc->mem;
-}
-template <class Impl>
-IntReg *
-BaseDynInst<Impl>::getIntegerRegs(void)
-{
- return (spec_mode ? xc->specIntRegFile : xc->regs.intRegFile);
-}
-*/
template <class Impl>
void
BaseDynInst<Impl>::prefetch(Addr addr, unsigned flags)
// EA calc depends on. (i.e. src reg 0 is the source of the data to be
// stored)
-// StaticInstPtr<ISA> eaInst = staticInst->eaCompInst();
-
for (int i = 1; i < numSrcRegs(); ++i)
{
if (!_readySrcRegIdx[i])
return true;
}
-// Forward declaration...
+// Forward declaration
template class BaseDynInst<AlphaSimpleImpl>;
template <>
MaxInstDestRegs = ISA::MaxInstDestRegs, //< Max dest regs
};
+ /** The static inst used by this dyn inst. */
StaticInstPtr<ISA> staticInst;
////////////////////////////////////////////
Fault copySrcTranslate(Addr src);
Fault copy(Addr dest);
- // Probably should be private...
+ /** @todo: Consider making this private. */
public:
/** Is this instruction valid. */
bool valid;
~BaseDynInst();
private:
+ /** Function to initialize variables in the constructors. */
void initVars();
public:
*/
bool doneTargCalc() { return false; }
- /** Returns the calculated target of the branch. */
-// Addr readCalcTarg() { return nextPC; }
-
+ /** Returns the next PC. This could be the speculative next PC if it is
+ * called prior to the actual branch target being calculated.
+ */
Addr readNextPC() { return nextPC; }
/** Set the predicted target of this current instruction. */
/** Returns the branch target address. */
Addr branchTarget() const { return staticInst->branchTarget(PC); }
+ /** Number of source registers. */
int8_t numSrcRegs() const { return staticInst->numSrcRegs(); }
+
+ /** Number of destination registers. */
int8_t numDestRegs() const { return staticInst->numDestRegs(); }
// the following are used to track physical register usage
return staticInst->srcRegIdx(i);
}
+ /** Returns the result of an integer instruction. */
uint64_t readIntResult() { return instResult.integer; }
+
+ /** Returns the result of a floating point instruction. */
float readFloatResult() { return instResult.fp; }
+
+ /** Returns the result of a floating point (double) instruction. */
double readDoubleResult() { return instResult.dbl; }
//Push to .cc file.
}
}
+ /** Marks a specific register as ready.
+ * @todo: Move this to .cc file.
+ */
void markSrcRegReady(RegIndex src_idx)
{
++readyRegs;
}
}
+ /** Returns if a source register is ready. */
bool isReadySrcRegIdx(int idx) const
{
return this->_readySrcRegIdx[idx];
}
+ /** Sets this instruction as completed. */
void setCompleted() { completed = true; }
+ /** Returns whethe or not this instruction is completed. */
bool isCompleted() const { return completed; }
/** Sets this instruction as ready to issue. */
/** Set the next PC of this instruction (its actual target). */
void setNextPC(uint64_t val) { nextPC = val; }
+ /** Returns the exec context.
+ * @todo: Remove this once the ExecContext is no longer used.
+ */
ExecContext *xcBase() { return xc; }
private:
+ /** Instruction effective address.
+ * @todo: Consider if this is necessary or not.
+ */
Addr instEffAddr;
+ /** Whether or not the effective address calculation is completed.
+ * @todo: Consider if this is necessary or not.
+ */
bool eaCalcDone;
public:
+ /** Sets the effective address. */
void setEA(Addr &ea) { instEffAddr = ea; eaCalcDone = true; }
+
+ /** Returns the effective address. */
const Addr &getEA() const { return instEffAddr; }
+
+ /** Returns whether or not the eff. addr. calculation has been completed. */
bool doneEACalc() { return eaCalcDone; }
+
+ /** Returns whether or not the eff. addr. source registers are ready. */
bool eaSrcsReady();
public:
+ /** Load queue index. */
int16_t lqIdx;
+
+ /** Store queue index. */
int16_t sqIdx;
};
if (fault == No_Fault) {
fault = cpu->read(req, data, lqIdx);
- }
- else {
+ } else {
// Return a fixed value to keep simulation deterministic even
// along misspeculated paths.
data = (T)-1;
traceData->setData(data);
}
-// storeSize = sizeof(T);
-// storeData = data;
-
MemReqPtr req = new MemReq(addr, xc, sizeof(T), flags);
req->asid = asid;
instShiftAmt);
}
-inline
-bool
-DefaultBP::getPrediction(uint8_t &count)
-{
- // Get the MSB of the count
- return (count >> (localCtrBits - 1));
-}
-
-inline
-unsigned
-DefaultBP::getLocalIndex(Addr &branch_addr)
-{
- return (branch_addr >> instShiftAmt) & indexMask;
-}
-
bool
DefaultBP::lookup(Addr &branch_addr)
{
assert(local_predictor_idx < localPredictorSize);
- // Increment or decrement twice to undo speculative update, then
- // properly update
if (taken) {
DPRINTF(Fetch, "Branch predictor: Branch updated as taken.\n");
localCtrs[local_predictor_idx].increment();
-// localCtrs[local_predictor_idx].increment();
} else {
DPRINTF(Fetch, "Branch predictor: Branch updated as not taken.\n");
localCtrs[local_predictor_idx].decrement();
-// localCtrs[local_predictor_idx].decrement();
}
}
+
+inline
+bool
+DefaultBP::getPrediction(uint8_t &count)
+{
+ // Get the MSB of the count
+ return (count >> (localCtrBits - 1));
+}
+
+inline
+unsigned
+DefaultBP::getLocalIndex(Addr &branch_addr)
+{
+ return (branch_addr >> instShiftAmt) & indexMask;
+}
private:
+ /** Returns the taken/not taken prediction given the value of the
+ * counter.
+ */
inline bool getPrediction(uint8_t &count);
+ /** Calculates the local index based on the PC. */
inline unsigned getLocalIndex(Addr &PC);
/** Array of counters that make up the local predictor. */
-//Todo:
-
#ifndef __CPU_BETA_CPU_ALPHA_DYN_INST_HH__
#define __CPU_BETA_CPU_ALPHA_DYN_INST_HH__
{
return this->cpu->readFloatRegInt(_srcRegIdx[idx]);
}
+
/** @todo: Make results into arrays so they can handle multiple dest
* registers.
*/
AlphaDynInst<Impl>::syscall()
{
this->cpu->syscall(this->threadNumber);
-// this->cpu->syscall();
}
#endif
this->regFile.setFpcr(val);
}
+ // Most of the full system code and syscall emulation is not yet
+ // implemented. These functions do show what the final interface will
+ // look like.
#ifdef FULL_SYSTEM
uint64_t *getIpr();
uint64_t readIpr(int idx, Fault &fault);
// Copy over all important state to xc once all the unrolling is done.
copyToXC();
+ // This is hardcoded to thread 0 while the CPU is only single threaded.
this->thread[0]->syscall();
-// this->thread[thread_num]->syscall();
// Copy over all important state back to CPU.
copyFromXC();
// Will have to lookup in rename map to get physical registers, then
// swap.
-/*
- for (int i = 0; i < AlphaISA::NumIntRegs; i++) {
- if (reg_redir[i]) {
- AlphaISA::IntReg temp = regs->intRegFile[i];
- regs->intRegFile[i] = regs->palregs[i];
- regs->palregs[i] = temp;
- }
- }
-*/
}
#endif // FULL_SYSTEM
#include "arch/alpha/isa_traits.hh"
-#include "cpu/beta_cpu/cpu_policy.hh"
#include "cpu/beta_cpu/alpha_params.hh"
+#include "cpu/beta_cpu/cpu_policy.hh"
// Forward declarations.
template <class Impl>
bool predict(DynInstPtr &inst, Addr &PC);
- void squash(const InstSeqNum &squashed_sn, const Addr &corr_target,
- bool actually_taken);
+ void update(const InstSeqNum &done_sn);
void squash(const InstSeqNum &squashed_sn);
- void update(const InstSeqNum &done_sn);
+ void squash(const InstSeqNum &squashed_sn, const Addr &corr_target,
+ bool actually_taken);
bool BPLookup(Addr &inst_PC)
{ return BP.lookup(inst_PC); }
- unsigned BPReadGlobalHist()
- { return 0; }
-
bool BTBValid(Addr &inst_PC)
{ return BTB.valid(inst_PC); }
{ return BTB.lookup(inst_PC); }
// Will want to include global history.
- void BPUpdate(Addr &inst_PC, unsigned global_history, bool taken)
+ void BPUpdate(Addr &inst_PC, bool taken)
{ BP.update(inst_PC, taken); }
void BTBUpdate(Addr &inst_PC, Addr &target_PC)
-#ifndef __BTB_HH__
-#define __BTB_HH__
+#ifndef __CPU_BETA_CPU_BTB_HH__
+#define __CPU_BETA_CPU_BTB_HH__
// For Addr type.
#include "arch/alpha/isa_traits.hh"
unsigned tagShiftAmt;
};
-#endif // __BTB_HH__
+#endif // __CPU_BETA_CPU_BTB_HH__
#include <stdint.h>
#include <vector>
+
#include "arch/alpha/isa_traits.hh"
#include "cpu/inst_seq.hh"
uint64_t mispredPC;
uint64_t nextPC;
- // Think of better names here.
- // Will need to be a variety of sizes...
- // Maybe make it a vector, that way only need one object.
-// std::vector<PhysRegIndex> freeRegs;
-
bool robSquashing;
// Represents the instruction that has either been retired or
// retired or squashed sequence number.
InstSeqNum doneSeqNum;
- // Extra bits of information so that the LDSTQ only updates when it
+ // Extra bit of information so that the LDSTQ only updates when it
// needs to.
-// bool commitIsStore;
bool commitIsLoad;
// Communication specifically to the IQ to tell the IQ that it can
void commit();
- uint64_t readCommitPC();
-
- void setSquashing() { _status = ROBSquashing; }
-
private:
void commitInsts();
void markCompletedInsts();
+ public:
+ uint64_t readCommitPC();
+
+ void setSquashing() { _status = ROBSquashing; }
+
+ private:
/** Time buffer interface. */
TimeBuffer<TimeStruct> *timeBuffer;
/** Pointer to FullCPU. */
FullCPU *cpu;
- //Store buffer interface? Will need to move committed stores to the
- //store buffer
-
/** Memory interface. Used for d-cache accesses. */
MemInterface *dcacheInterface;
-// @todo: Bug when something reaches execute, and mispredicts, but is never
-// put into the ROB because the ROB is full. Need rename stage to predict
-// the free ROB entries better.
-
-#ifndef __COMMIT_IMPL_HH__
-#define __COMMIT_IMPL_HH__
-
#include "base/timebuf.hh"
#include "cpu/beta_cpu/commit.hh"
#include "cpu/exetrace.hh"
// time. However, we need to avoid updating any other state
// incorrectly if it's already been squashed.
if (head_inst->isSquashed()) {
- // Hack to avoid the instruction being retired (and deleted) if
- // it hasn't been through the IEW stage yet.
-/*
- if (!head_inst->isExecuted()) {
- break;
- }
-*/
DPRINTF(Commit, "Commit: Retiring squashed instruction from "
"ROB.\n");
++commitCommittedBranches;
}
-#if 0
- // Explicit communication back to the LDSTQ that a load has been committed
- // and can be removed from the LDSTQ. Stores don't need this because
- // the LDSTQ will already have been told that a store has reached the head
- // of the ROB. Consider including communication if it's a store as well
- // to keep things orthagonal.
- if (head_inst->isMemRef()) {
- ++commitCommittedMemRefs;
- if (head_inst->isLoad()) {
- toIEW->commitInfo.commitIsLoad = true;
- ++commitCommittedLoads;
- }
- }
-#endif
-
// Now that the instruction is going to be committed, finalize its
// trace data.
if (head_inst->traceData) {
{
return rob->readHeadPC();
}
-
-#endif // __COMMIT_IMPL_HH__
void decode();
- // Might want to make squash a friend function.
- void squash();
-
private:
inline bool fetchInstsValid();
void squash(DynInstPtr &inst);
- void dumpFetchQueue();
+ public:
+ // Might want to make squash a friend function.
+ void squash();
+ private:
// Interfaces to objects outside of decode.
/** CPU interface. */
FullCPU *cpu;
/** Skid buffer between fetch and decode. */
std::queue<FetchStruct> skidBuffer;
- private:
//Consider making these unsigned to avoid any confusion.
/** Rename to decode delay, in ticks. */
unsigned renameToDecodeDelay;
-// Todo: add in statistics, only get the MachInst and let decode actually
-// decode, think about SMT fetch,
-// fix up branch prediction stuff into one thing,
-// Figure out where to advance time buffer. Add a way to get a
-// stage's current status.
+// Todo: SMT fetch,
+// Add a way to get a stage's current status.
#ifndef __CPU_BETA_CPU_SIMPLE_FETCH_HH__
#define __CPU_BETA_CPU_SIMPLE_FETCH_HH__
-//Will want to include: time buffer, structs, MemInterface, Event,
-//whatever class bzero uses, MemReqPtr
-
#include "base/statistics.hh"
#include "base/timebuf.hh"
#include "cpu/pc_event.hh"
bool stalled;
+ public:
+ class CacheCompletionEvent : public Event
+ {
+ private:
+ SimpleFetch *fetch;
+
+ public:
+ CacheCompletionEvent(SimpleFetch *_fetch);
+
+ virtual void process();
+ virtual const char *description();
+ };
+
public:
/** SimpleFetch constructor. */
SimpleFetch(Params ¶ms);
void setFetchQueue(TimeBuffer<FetchStruct> *fq_ptr);
- void tick();
-
- void fetch();
-
void processCacheCompletion();
- // Figure out PC vs next PC and how it should be updated
- void squash(const Addr &new_PC);
-
private:
- inline void doSquash(const Addr &new_PC);
-
- void squashFromDecode(const Addr &new_PC, const InstSeqNum &seq_num);
-
/**
* Looks up in the branch predictor to see if the next PC should be
* either next PC+=MachInst or a branch target.
*/
Fault fetchCacheLine(Addr fetch_PC);
+ inline void doSquash(const Addr &new_PC);
+
+ void squashFromDecode(const Addr &new_PC, const InstSeqNum &seq_num);
+
+ public:
+ // Figure out PC vs next PC and how it should be updated
+ void squash(const Addr &new_PC);
+
+ void tick();
+
+ void fetch();
+
// Align an address (typically a PC) to the start of an I-cache block.
// We fold in the PISA 64- to 32-bit conversion here as well.
Addr icacheBlockAlignPC(Addr addr)
return (addr & ~(cacheBlkMask));
}
- public:
- class CacheCompletionEvent : public Event
- {
- private:
- SimpleFetch *fetch;
-
- public:
- CacheCompletionEvent(SimpleFetch *_fetch);
-
- virtual void process();
- virtual const char *description();
- };
-
-// CacheCompletionEvent cacheCompletionEvent;
-
private:
/** Pointer to the FullCPU. */
FullCPU *cpu;
template<class Impl>
SimpleFetch<Impl>::SimpleFetch(Params ¶ms)
- : //cacheCompletionEvent(this),
- icacheInterface(params.icacheInterface),
+ : icacheInterface(params.icacheInterface),
branchPred(params),
decodeToFetchDelay(params.decodeToFetchDelay),
renameToFetchDelay(params.renameToFetchDelay),
// up this stage once the cache miss completes.
if (result != MA_HIT && icacheInterface->doEvents()) {
memReq->completionEvent = new CacheCompletionEvent(this);
-// lastIcacheStall = curTick;
// How does current model work as far as individual
// stages scheduling/unscheduling?
-#ifndef __FREE_LIST_HH__
-#define __FREE_LIST_HH__
+#ifndef __CPU_BETA_CPU_FREE_LIST_HH__
+#define __CPU_BETA_CPU_FREE_LIST_HH__
#include <iostream>
#include <queue>
#include "arch/alpha/isa_traits.hh"
-#include "cpu/beta_cpu/comm.hh"
-#include "base/traceflags.hh"
#include "base/trace.hh"
+#include "base/traceflags.hh"
+#include "cpu/beta_cpu/comm.hh"
/**
* FreeList class that simply holds the list of free integer and floating
*/
class SimpleFreeList
{
- public:
-
private:
/** The list of free integer registers. */
std::queue<PhysRegIndex> freeIntRegs;
unsigned _numLogicalFloatRegs,
unsigned _numPhysicalFloatRegs);
- PhysRegIndex getIntReg();
+ inline PhysRegIndex getIntReg();
- PhysRegIndex getFloatReg();
+ inline PhysRegIndex getFloatReg();
- void addReg(PhysRegIndex freed_reg);
+ inline void addReg(PhysRegIndex freed_reg);
- void addIntReg(PhysRegIndex freed_reg);
+ inline void addIntReg(PhysRegIndex freed_reg);
- void addFloatReg(PhysRegIndex freed_reg);
+ inline void addFloatReg(PhysRegIndex freed_reg);
bool hasFreeIntRegs()
{ return !freeIntRegs.empty(); }
freeFloatRegs.push(freed_reg);
}
-#endif // __FREE_LIST_HH__
+#endif // __CPU_BETA_CPU_FREE_LIST_HH__
-#ifndef __SIMPLE_FULL_CPU_CC__
-#define __SIMPLE_FULL_CPU_CC__
-
#ifdef FULL_SYSTEM
#include "sim/system.hh"
#else
// Forward declaration of FullBetaCPU.
template class FullBetaCPU<AlphaSimpleImpl>;
-
-#endif // __SIMPLE_FULL_CPU_HH__
#include <list>
#include <vector>
-#include "cpu/beta_cpu/comm.hh"
-
#include "base/statistics.hh"
#include "base/timebuf.hh"
#include "cpu/base_cpu.hh"
-#include "cpu/exec_context.hh"
+#include "cpu/beta_cpu/comm.hh"
#include "cpu/beta_cpu/cpu_policy.hh"
+#include "cpu/exec_context.hh"
#include "sim/process.hh"
#ifdef FULL_SYSTEM
}
public:
- void tick();
-
FullBetaCPU(Params ¶ms);
~FullBetaCPU();
- void init();
-
void fullCPURegStats();
+ void tick();
+
+ void init();
+
void activateContext(int thread_num, int delay);
void suspendContext(int thread_num);
void deallocateContext(int thread_num);
} else if (inst->isStore()) {
ldstQueue.insertStore(inst);
- // A bit of a hack. Set that it can commit so that
- // the commit stage will try committing it, and then
- // once commit realizes it's a store it will send back
- // a signal to this stage to issue and execute that
- // store. Change to be a bit that says the instruction
- // has extra work to do at commit.
-// inst->setCanCommit();
-
-// instQueue.insertNonSpec(inst);
-
++iewDispStoreInsts;
-// ++iewDispNonSpecInsts;
-
-// continue;
} else if (inst->isNonSpeculative()) {
DPRINTF(IEW, "IEW: Issue: Nonspeculative instruction "
"encountered, skipping.\n");
DPRINTF(IEW, "IEW: Issue: Executed branch encountered, "
"skipping.\n");
-// assert(inst->isDirectCtrl());
-
inst->setIssued();
inst->setCanCommit();
}
++iewSquashCycles;
-
- // Also should advance its own time buffers if the stage ran.
- // Not sure about this...
-// issueToExecQueue.advance();
} else if (_status == Blocked) {
// Continue to tell previous stage to stall.
toRename->iewInfo.stall = true;
// or store to commit. Also check if it's being told to execute a
// nonspeculative instruction.
// This is pretty inefficient...
-// if (0/*fromCommit->commitInfo.commitIsStore*/) {
if (!fromCommit->commitInfo.squash &&
!fromCommit->commitInfo.robSquashing) {
ldstQueue.commitStores(fromCommit->commitInfo.doneSeqNum);
-// } else if (fromCommit->commitInfo.commitIsLoad) {
ldstQueue.commitLoads(fromCommit->commitInfo.doneSeqNum);
}
-// }
if (fromCommit->commitInfo.nonSpecSeqNum != 0) {
instQueue.scheduleNonSpec(fromCommit->commitInfo.nonSpecSeqNum);
-#ifndef __INST_QUEUE_HH__
-#define __INST_QUEUE_HH__
+#ifndef __CPU_BETA_CPU_INST_QUEUE_HH__
+#define __CPU_BETA_CPU_INST_QUEUE_HH__
#include <list>
#include <map>
void stopSquash();
- /** Debugging function to dump all the list sizes, as well as print
- * out the list of nonspeculative instructions. Should not be used
- * in any other capacity, but it has no harmful sideaffects.
- */
- void dumpLists();
-
- private:
- /** Debugging function to count how many entries are in the IQ. It does
- * a linear walk through the instructions, so do not call this function
- * during normal execution.
- */
- int countInsts();
-
private:
/** Pointer to the CPU. */
FullCPU *cpu;
/** List of ready branch instructions. */
ReadyInstQueue readyBranchInsts;
- /** List of ready memory instructions. */
-// ReadyInstQueue readyMemInsts;
-
/** List of ready miscellaneous instructions. */
ReadyInstQueue readyMiscInsts;
bool addToDependents(DynInstPtr &new_inst);
void insertDependency(DynInstPtr &new_inst);
void createDependency(DynInstPtr &new_inst);
- void dumpDependGraph();
void addIfReady(DynInstPtr &inst);
+ private:
+ /** Debugging function to count how many entries are in the IQ. It does
+ * a linear walk through the instructions, so do not call this function
+ * during normal execution.
+ */
+ int countInsts();
+
+ /** Debugging function to dump out the dependency graph.
+ */
+ void dumpDependGraph();
+
+ /** Debugging function to dump all the list sizes, as well as print
+ * out the list of nonspeculative instructions. Should not be used
+ * in any other capacity, but it has no harmful sideaffects.
+ */
+ void dumpLists();
+
Stats::Scalar<> iqInstsAdded;
Stats::Scalar<> iqNonSpecInstsAdded;
// Stats::Scalar<> iqIntInstsAdded;
};
-#endif //__INST_QUEUE_HH__
+#endif //__CPU_BETA_CPU_INST_QUEUE_HH__
-#ifndef __INST_QUEUE_IMPL_HH__
-#define __INST_QUEUE_IMPL_HH__
-
// Todo:
// Current ordering allows for 0 cycle added-to-scheduled. Could maybe fake
// it; either do in reverse order, or have added instructions put into a
fromCommit = timeBuffer->getWire(-commitToIEWDelay);
}
+template <class Impl>
+unsigned
+InstructionQueue<Impl>::numFreeEntries()
+{
+ return freeEntries;
+}
+
// Might want to do something more complex if it knows how many instructions
// will be issued this cycle.
template <class Impl>
}
}
-template <class Impl>
-unsigned
-InstructionQueue<Impl>::numFreeEntries()
-{
- return freeEntries;
-}
-
template <class Impl>
void
InstructionQueue<Impl>::insert(DynInstPtr &new_inst)
break;
case Squashed:
-// issuing_inst = squashed_head_inst;
assert(0 && "Squashed insts should not issue any more!");
squashedInsts.pop();
// Set the squashed instruction as able to commit so that commit
nonSpecInsts.erase(inst_it);
}
+template <class Impl>
+void
+InstructionQueue<Impl>::wakeDependents(DynInstPtr &completed_inst)
+{
+ DPRINTF(IQ, "IQ: Waking dependents of completed instruction.\n");
+ //Look at the physical destination register of the DynInst
+ //and look it up on the dependency graph. Then mark as ready
+ //any instructions within the instruction queue.
+ DependencyEntry *curr;
+
+ // Tell the memory dependence unit to wake any dependents on this
+ // instruction if it is a memory instruction.
+
+ if (completed_inst->isMemRef()) {
+ memDepUnit.wakeDependents(completed_inst);
+ }
+
+ for (int dest_reg_idx = 0;
+ dest_reg_idx < completed_inst->numDestRegs();
+ dest_reg_idx++)
+ {
+ PhysRegIndex dest_reg =
+ completed_inst->renamedDestRegIdx(dest_reg_idx);
+
+ // Special case of uniq or control registers. They are not
+ // handled by the IQ and thus have no dependency graph entry.
+ // @todo Figure out a cleaner way to handle this.
+ if (dest_reg >= numPhysRegs) {
+ continue;
+ }
+
+ DPRINTF(IQ, "IQ: Waking any dependents on register %i.\n",
+ (int) dest_reg);
+
+ //Maybe abstract this part into a function.
+ //Go through the dependency chain, marking the registers as ready
+ //within the waiting instructions.
+ while (dependGraph[dest_reg].next) {
+
+ curr = dependGraph[dest_reg].next;
+
+ DPRINTF(IQ, "IQ: Waking up a dependent instruction, PC%#x.\n",
+ curr->inst->readPC());
+
+ // Might want to give more information to the instruction
+ // so that it knows which of its source registers is ready.
+ // However that would mean that the dependency graph entries
+ // would need to hold the src_reg_idx.
+ curr->inst->markSrcRegReady();
+
+ addIfReady(curr->inst);
+
+ dependGraph[dest_reg].next = curr->next;
+
+ DependencyEntry::mem_alloc_counter--;
+
+ curr->inst = NULL;
+
+ delete curr;
+ }
+
+ // Reset the head node now that all of its dependents have been woken
+ // up.
+ dependGraph[dest_reg].next = NULL;
+ dependGraph[dest_reg].inst = NULL;
+
+ // Mark the scoreboard as having that register ready.
+ regScoreboard[dest_reg] = true;
+ }
+}
+
template <class Impl>
void
InstructionQueue<Impl>::violation(DynInstPtr &store,
template <class Impl>
void
-InstructionQueue<Impl>::wakeDependents(DynInstPtr &completed_inst)
+InstructionQueue<Impl>::DependencyEntry::insert(DynInstPtr &new_inst)
{
- DPRINTF(IQ, "IQ: Waking dependents of completed instruction.\n");
- //Look at the physical destination register of the DynInst
- //and look it up on the dependency graph. Then mark as ready
- //any instructions within the instruction queue.
- DependencyEntry *curr;
-
- // Tell the memory dependence unit to wake any dependents on this
- // instruction if it is a memory instruction.
-
- if (completed_inst->isMemRef()) {
- memDepUnit.wakeDependents(completed_inst);
- }
-
- for (int dest_reg_idx = 0;
- dest_reg_idx < completed_inst->numDestRegs();
- dest_reg_idx++)
- {
- PhysRegIndex dest_reg =
- completed_inst->renamedDestRegIdx(dest_reg_idx);
-
- // Special case of uniq or control registers. They are not
- // handled by the IQ and thus have no dependency graph entry.
- // @todo Figure out a cleaner way to handle this.
- if (dest_reg >= numPhysRegs) {
- continue;
- }
-
- DPRINTF(IQ, "IQ: Waking any dependents on register %i.\n",
- (int) dest_reg);
+ //Add this new, dependent instruction at the head of the dependency
+ //chain.
- //Maybe abstract this part into a function.
- //Go through the dependency chain, marking the registers as ready
- //within the waiting instructions.
- while (dependGraph[dest_reg].next) {
+ // First create the entry that will be added to the head of the
+ // dependency chain.
+ DependencyEntry *new_entry = new DependencyEntry;
+ new_entry->next = this->next;
+ new_entry->inst = new_inst;
- curr = dependGraph[dest_reg].next;
+ // Then actually add it to the chain.
+ this->next = new_entry;
- DPRINTF(IQ, "IQ: Waking up a dependent instruction, PC%#x.\n",
- curr->inst->readPC());
+ ++mem_alloc_counter;
+}
- // Might want to give more information to the instruction
- // so that it knows which of its source registers is ready.
- // However that would mean that the dependency graph entries
- // would need to hold the src_reg_idx.
- curr->inst->markSrcRegReady();
+template <class Impl>
+void
+InstructionQueue<Impl>::DependencyEntry::remove(DynInstPtr &inst_to_remove)
+{
+ DependencyEntry *prev = this;
+ DependencyEntry *curr = this->next;
- addIfReady(curr->inst);
+ // Make sure curr isn't NULL. Because this instruction is being
+ // removed from a dependency list, it must have been placed there at
+ // an earlier time. The dependency chain should not be empty,
+ // unless the instruction dependent upon it is already ready.
+ if (curr == NULL) {
+ return;
+ }
- dependGraph[dest_reg].next = curr->next;
+ // Find the instruction to remove within the dependency linked list.
+ while(curr->inst != inst_to_remove)
+ {
+ prev = curr;
+ curr = curr->next;
- DependencyEntry::mem_alloc_counter--;
+ assert(curr != NULL);
+ }
- curr->inst = NULL;
+ // Now remove this instruction from the list.
+ prev->next = curr->next;
- delete curr;
- }
+ --mem_alloc_counter;
- // Reset the head node now that all of its dependents have been woken
- // up.
- dependGraph[dest_reg].next = NULL;
- dependGraph[dest_reg].inst = NULL;
+ // Could push this off to the destructor of DependencyEntry
+ curr->inst = NULL;
- // Mark the scoreboard as having that register ready.
- regScoreboard[dest_reg] = true;
- }
+ delete curr;
}
template <class Impl>
}
}
-template <class Impl>
-void
-InstructionQueue<Impl>::DependencyEntry::insert(DynInstPtr &new_inst)
-{
- //Add this new, dependent instruction at the head of the dependency
- //chain.
-
- // First create the entry that will be added to the head of the
- // dependency chain.
- DependencyEntry *new_entry = new DependencyEntry;
- new_entry->next = this->next;
- new_entry->inst = new_inst;
-
- // Then actually add it to the chain.
- this->next = new_entry;
-
- ++mem_alloc_counter;
-}
-
-template <class Impl>
-void
-InstructionQueue<Impl>::DependencyEntry::remove(DynInstPtr &inst_to_remove)
-{
- DependencyEntry *prev = this;
- DependencyEntry *curr = this->next;
-
- // Make sure curr isn't NULL. Because this instruction is being
- // removed from a dependency list, it must have been placed there at
- // an earlier time. The dependency chain should not be empty,
- // unless the instruction dependent upon it is already ready.
- if (curr == NULL) {
- return;
- }
-
- // Find the instruction to remove within the dependency linked list.
- while(curr->inst != inst_to_remove)
- {
- prev = curr;
- curr = curr->next;
-
- assert(curr != NULL);
- }
-
- // Now remove this instruction from the list.
- prev->next = curr->next;
-
- --mem_alloc_counter;
-
- // Could push this off to the destructor of DependencyEntry
- curr->inst = NULL;
-
- delete curr;
-}
-
template <class Impl>
void
InstructionQueue<Impl>::addIfReady(DynInstPtr &inst)
cprintf("Ready branch list size: %i\n", readyBranchInsts.size());
-// cprintf("Ready memory list size: %i\n", readyMemInsts.size());
-
cprintf("Ready misc list size: %i\n", readyMiscInsts.size());
cprintf("Squashed list size: %i\n", squashedInsts.size());
cprintf("\n");
}
-
-#endif // __INST_QUEUE_IMPL_HH__
-#ifndef __MEM_DEP_UNIT_HH__
-#define __MEM_DEP_UNIT_HH__
+#ifndef __CPU_BETA_CPU_MEM_DEP_UNIT_HH__
+#define __CPU_BETA_CPU_MEM_DEP_UNIT_HH__
-#include <set>
#include <map>
+#include <set>
-#include "cpu/inst_seq.hh"
#include "base/statistics.hh"
+#include "cpu/inst_seq.hh"
/**
* Memory dependency unit class. This holds the memory dependence predictor.
void insertNonSpec(DynInstPtr &inst);
+ // Will want to make this operation relatively fast. Right now it
+ // is somewhat slow.
+ DynInstPtr &top();
+
+ void pop();
+
void regsReady(DynInstPtr &inst);
void nonSpecInstReady(DynInstPtr &inst);
void violation(DynInstPtr &store_inst, DynInstPtr &violating_load);
- // Will want to make this operation relatively fast. Right now it
- // kind of sucks.
- DynInstPtr &top();
-
- void pop();
-
inline bool empty()
{ return readyInsts.empty(); }
}
};
-
- private:
inline void moveToReady(dep_it_t &woken_inst);
- private:
/** List of instructions that have passed through rename, yet are still
* waiting on either a memory dependence to resolve or source registers to
* become available before they can issue.
Stats::Scalar<> conflictingStores;
};
-#endif
+#endif // __CPU_BETA_CPU_MEM_DEP_UNIT_HH__
-#ifndef __RAS_HH__
-#define __RAS_HH__
+#ifndef __CPU_BETA_CPU_RAS_HH__
+#define __CPU_BETA_CPU_RAS_HH__
// For Addr type.
#include "arch/alpha/isa_traits.hh"
private:
inline void incrTos()
- { tos = (tos + 1) % numEntries; }
+ { if (++tos == numEntries) tos = 0; }
inline void decrTos()
{ tos = (tos == 0 ? numEntries - 1 : tos - 1); }
unsigned tos;
};
-#endif // __RAS_HH__
+#endif // __CPU_BETA_CPU_RAS_HH__
#include "cpu/beta_cpu/comm.hh"
#ifdef FULL_SYSTEM
-#include "kern/kernel_stats.hh"
#include "arch/alpha/ev5.hh"
+#include "kern/kernel_stats.hh"
using namespace EV5;
#endif
// Things that are in the ifdef FULL_SYSTEM are pretty dependent on the ISA,
// and should go in the AlphaFullCPU.
-extern void debug_break();
-
template <class Impl>
class PhysRegFile
{
/** Miscellaneous register file. */
MiscRegFile miscRegs;
- Addr pc; // program counter
- Addr npc; // next-cycle program counter
+ /** Program counter. */
+ Addr pc;
+
+ /** Next-cycle program counter. */
+ Addr npc;
#ifdef FULL_SYSTEM
private:
// write entire quad w/ no side-effect
old = ipr[idx];
ipr[idx] = val;
-// kernelStats.context(old, val);
break;
case ISA::IPR_DTB_PTE:
// only write least significant five bits - interrupt level
ipr[idx] = val & 0x1f;
-// kernelStats.swpipl(ipr[idx]);
break;
case ISA::IPR_DTB_CM:
-// if (val & 0x18)
-// kernelStats->mode(Kernel::user);
-// else
-// kernelStats->mode(Kernel::kernel);
case ISA::IPR_ICM:
// only write two mode bits - processor mode
DPRINTF(Rename, "Rename: Done squashing, going to running.\n");
_status = Running;
+ rename();
} else {
doSquash();
}
#endif
}
- // Perhaps put this outside of this function, since this will
- // happen regardless of whether or not the stage is blocked or
- // squashing.
- // Read from the time buffer any necessary data.
- // Read registers that are freed, and add them to the freelist.
- // This is unnecessary due to the history buffer (assuming the history
- // buffer works properly).
-/*
- while(!fromCommit->commitInfo.freeRegs.empty())
- {
- PhysRegIndex freed_reg = fromCommit->commitInfo.freeRegs.back();
- DPRINTF(Rename, "Rename: Adding freed register %i to freelist.\n",
- (int)freed_reg);
- freeList->addReg(freed_reg);
-
- fromCommit->commitInfo.freeRegs.pop_back();
- }
-*/
-
}
template<class Impl>
unsigned _choice_predictor_size,
unsigned _choice_ctr_bits,
unsigned _instShiftAmt)
- : local_predictor_size(_local_predictor_size),
- local_ctr_bits(_local_ctr_bits),
- local_history_table_size(_local_history_table_size),
- local_history_bits(_local_history_bits),
- global_predictor_size(_global_predictor_size),
- global_ctr_bits(_global_ctr_bits),
- global_history_bits(_global_history_bits),
- choice_predictor_size(_global_predictor_size),
- choice_ctr_bits(_choice_ctr_bits),
+ : localPredictorSize(_local_predictor_size),
+ localCtrBits(_local_ctr_bits),
+ localHistoryTableSize(_local_history_table_size),
+ localHistoryBits(_local_history_bits),
+ globalPredictorSize(_global_predictor_size),
+ globalCtrBits(_global_ctr_bits),
+ globalHistoryBits(_global_history_bits),
+ choicePredictorSize(_global_predictor_size),
+ choiceCtrBits(_choice_ctr_bits),
instShiftAmt(_instShiftAmt)
{
//Should do checks here to make sure sizes are correct (powers of 2)
//Setup the array of counters for the local predictor
- local_ctrs = new SatCounter[local_predictor_size];
+ localCtrs = new SatCounter[localPredictorSize];
- for (int i = 0; i < local_predictor_size; ++i)
- local_ctrs[i].setBits(local_ctr_bits);
+ for (int i = 0; i < localPredictorSize; ++i)
+ localCtrs[i].setBits(localCtrBits);
//Setup the history table for the local table
- local_history_table = new unsigned[local_history_table_size];
+ localHistoryTable = new unsigned[localHistoryTableSize];
- for (int i = 0; i < local_history_table_size; ++i)
- local_history_table[i] = 0;
+ for (int i = 0; i < localHistoryTableSize; ++i)
+ localHistoryTable[i] = 0;
// Setup the local history mask
- localHistoryMask = (1 << local_history_bits) - 1;
+ localHistoryMask = (1 << localHistoryBits) - 1;
//Setup the array of counters for the global predictor
- global_ctrs = new SatCounter[global_predictor_size];
+ globalCtrs = new SatCounter[globalPredictorSize];
- for (int i = 0; i < global_predictor_size; ++i)
- global_ctrs[i].setBits(global_ctr_bits);
+ for (int i = 0; i < globalPredictorSize; ++i)
+ globalCtrs[i].setBits(globalCtrBits);
//Clear the global history
- global_history = 0;
+ globalHistory = 0;
// Setup the global history mask
- globalHistoryMask = (1 << global_history_bits) - 1;
+ globalHistoryMask = (1 << globalHistoryBits) - 1;
//Setup the array of counters for the choice predictor
- choice_ctrs = new SatCounter[choice_predictor_size];
+ choiceCtrs = new SatCounter[choicePredictorSize];
- for (int i = 0; i < choice_predictor_size; ++i)
- choice_ctrs[i].setBits(choice_ctr_bits);
+ for (int i = 0; i < choicePredictorSize; ++i)
+ choiceCtrs[i].setBits(choiceCtrBits);
- threshold = (1 << (local_ctr_bits - 1)) - 1;
+ threshold = (1 << (localCtrBits - 1)) - 1;
threshold = threshold / 2;
}
unsigned
TournamentBP::calcLocHistIdx(Addr &branch_addr)
{
- return (branch_addr >> instShiftAmt) & (local_history_table_size - 1);
+ return (branch_addr >> instShiftAmt) & (localHistoryTableSize - 1);
}
inline
void
TournamentBP::updateHistoriesTaken(unsigned local_history_idx)
{
- global_history = (global_history << 1) | 1;
- global_history = global_history & globalHistoryMask;
+ globalHistory = (globalHistory << 1) | 1;
+ globalHistory = globalHistory & globalHistoryMask;
- local_history_table[local_history_idx] =
- (local_history_table[local_history_idx] << 1) | 1;
+ localHistoryTable[local_history_idx] =
+ (localHistoryTable[local_history_idx] << 1) | 1;
}
inline
void
TournamentBP::updateHistoriesNotTaken(unsigned local_history_idx)
{
- global_history = (global_history << 1);
- global_history = global_history & globalHistoryMask;
+ globalHistory = (globalHistory << 1);
+ globalHistory = globalHistory & globalHistoryMask;
- local_history_table[local_history_idx] =
- (local_history_table[local_history_idx] << 1);
+ localHistoryTable[local_history_idx] =
+ (localHistoryTable[local_history_idx] << 1);
}
bool
//Lookup in the local predictor to get its branch prediction
local_history_idx = calcLocHistIdx(branch_addr);
- local_predictor_idx = local_history_table[local_history_idx]
+ local_predictor_idx = localHistoryTable[local_history_idx]
& localHistoryMask;
- local_prediction = local_ctrs[local_predictor_idx].read();
+ local_prediction = localCtrs[local_predictor_idx].read();
//Lookup in the global predictor to get its branch prediction
- global_prediction = global_ctrs[global_history].read();
+ global_prediction = globalCtrs[globalHistory].read();
//Lookup in the choice predictor to see which one to use
- choice_prediction = choice_ctrs[global_history].read();
+ choice_prediction = choiceCtrs[globalHistory].read();
//@todo Put a threshold value in for the three predictors that can
// be set through the constructor (so this isn't hard coded).
if (global_prediction > threshold) {
updateHistoriesTaken(local_history_idx);
- assert(global_history < global_predictor_size &&
- local_history_idx < local_predictor_size);
+ assert(globalHistory < globalPredictorSize &&
+ local_history_idx < localPredictorSize);
- global_ctrs[global_history].increment();
- local_ctrs[local_history_idx].increment();
+ globalCtrs[globalHistory].increment();
+ localCtrs[local_history_idx].increment();
return true;
} else {
updateHistoriesNotTaken(local_history_idx);
- assert(global_history < global_predictor_size &&
- local_history_idx < local_predictor_size);
+ assert(globalHistory < globalPredictorSize &&
+ local_history_idx < localPredictorSize);
- global_ctrs[global_history].decrement();
- local_ctrs[local_history_idx].decrement();
+ globalCtrs[globalHistory].decrement();
+ localCtrs[local_history_idx].decrement();
return false;
}
if (local_prediction > threshold) {
updateHistoriesTaken(local_history_idx);
- assert(global_history < global_predictor_size &&
- local_history_idx < local_predictor_size);
+ assert(globalHistory < globalPredictorSize &&
+ local_history_idx < localPredictorSize);
- global_ctrs[global_history].increment();
- local_ctrs[local_history_idx].increment();
+ globalCtrs[globalHistory].increment();
+ localCtrs[local_history_idx].increment();
return true;
} else {
updateHistoriesNotTaken(local_history_idx);
- assert(global_history < global_predictor_size &&
- local_history_idx < local_predictor_size);
+ assert(globalHistory < globalPredictorSize &&
+ local_history_idx < localPredictorSize);
- global_ctrs[global_history].decrement();
- local_ctrs[local_history_idx].decrement();
+ globalCtrs[globalHistory].decrement();
+ localCtrs[local_history_idx].decrement();
return false;
}
bool global_pred_taken;
// Load the correct global history into the register.
- global_history = correct_gh;
+ globalHistory = correct_gh;
// Get the local predictor's current prediction, remove the incorrect
// update, and update the local predictor
local_history_idx = calcLocHistIdx(branch_addr);
- local_predictor_idx = local_history_table[local_history_idx];
+ local_predictor_idx = localHistoryTable[local_history_idx];
local_predictor_idx = (local_predictor_idx >> 1) & localHistoryMask;
- local_prediction = local_ctrs[local_predictor_idx].read();
+ local_prediction = localCtrs[local_predictor_idx].read();
local_pred_taken = local_prediction > threshold;
//Get the global predictor's current prediction, and update the
//global predictor
- global_prediction = global_ctrs[global_history].read();
+ global_prediction = globalCtrs[globalHistory].read();
global_pred_taken = global_prediction > threshold;
//Update the choice predictor to tell it which one was correct
//If the local prediction matches the actual outcome, decerement
//the counter. Otherwise increment the counter.
if (local_pred_taken == taken) {
- choice_ctrs[global_history].decrement();
+ choiceCtrs[globalHistory].decrement();
} else {
- choice_ctrs[global_history].increment();
+ choiceCtrs[globalHistory].increment();
}
}
if (taken) {
- assert(global_history < global_predictor_size &&
- local_predictor_idx < local_predictor_size);
+ assert(globalHistory < globalPredictorSize &&
+ local_predictor_idx < localPredictorSize);
- local_ctrs[local_predictor_idx].increment();
- global_ctrs[global_history].increment();
+ localCtrs[local_predictor_idx].increment();
+ globalCtrs[globalHistory].increment();
- global_history = (global_history << 1) | 1;
- global_history = global_history & globalHistoryMask;
+ globalHistory = (globalHistory << 1) | 1;
+ globalHistory = globalHistory & globalHistoryMask;
- local_history_table[local_history_idx] |= 1;
+ localHistoryTable[local_history_idx] |= 1;
}
else {
- assert(global_history < global_predictor_size &&
- local_predictor_idx < local_predictor_size);
+ assert(globalHistory < globalPredictorSize &&
+ local_predictor_idx < localPredictorSize);
- local_ctrs[local_predictor_idx].decrement();
- global_ctrs[global_history].decrement();
+ localCtrs[local_predictor_idx].decrement();
+ globalCtrs[globalHistory].decrement();
- global_history = (global_history << 1);
- global_history = global_history & globalHistoryMask;
+ globalHistory = (globalHistory << 1);
+ globalHistory = globalHistory & globalHistoryMask;
- local_history_table[local_history_idx] &= ~1;
+ localHistoryTable[local_history_idx] &= ~1;
}
}
*/
void update(Addr &branch_addr, unsigned global_history, bool taken);
- inline unsigned readGlobalHist() { return global_history; }
+ inline unsigned readGlobalHist() { return globalHistory; }
private:
inline void updateHistoriesNotTaken(unsigned local_history_idx);
/** Local counters. */
- SatCounter *local_ctrs;
+ SatCounter *localCtrs;
/** Size of the local predictor. */
- unsigned local_predictor_size;
+ unsigned localPredictorSize;
/** Number of bits of the local predictor's counters. */
- unsigned local_ctr_bits;
+ unsigned localCtrBits;
/** Array of local history table entries. */
- unsigned *local_history_table;
+ unsigned *localHistoryTable;
/** Size of the local history table. */
- unsigned local_history_table_size;
+ unsigned localHistoryTableSize;
/** Number of bits for each entry of the local history table.
* @todo Doesn't this come from the size of the local predictor?
*/
- unsigned local_history_bits;
+ unsigned localHistoryBits;
/** Mask to get the proper local history. */
unsigned localHistoryMask;
/** Array of counters that make up the global predictor. */
- SatCounter *global_ctrs;
+ SatCounter *globalCtrs;
/** Size of the global predictor. */
- unsigned global_predictor_size;
+ unsigned globalPredictorSize;
/** Number of bits of the global predictor's counters. */
- unsigned global_ctr_bits;
+ unsigned globalCtrBits;
/** Global history register. */
- unsigned global_history;
+ unsigned globalHistory;
/** Number of bits for the global history. */
- unsigned global_history_bits;
+ unsigned globalHistoryBits;
/** Mask to get the proper global history. */
unsigned globalHistoryMask;
/** Array of counters that make up the choice predictor. */
- SatCounter *choice_ctrs;
+ SatCounter *choiceCtrs;
/** Size of the choice predictor (identical to the global predictor). */
- unsigned choice_predictor_size;
+ unsigned choicePredictorSize;
/** Number of bits of the choice predictor's counters. */
- unsigned choice_ctr_bits;
+ unsigned choiceCtrBits;
/** Number of bits to shift the instruction over to get rid of the word
* offset.