*
*/
+#include "config/the_isa.hh"
#include "cpu/inorder/resources/fetch_seq_unit.hh"
#include "cpu/inorder/resource_pool.hh"
+#include "debug/InOrderFetchSeq.hh"
+#include "debug/InOrderStall.hh"
using namespace std;
using namespace TheISA;
using namespace ThePipeline;
FetchSeqUnit::FetchSeqUnit(std::string res_name, int res_id, int res_width,
- int res_latency, InOrderCPU *_cpu, ThePipeline::Params *params)
+ Cycles res_latency, InOrderCPU *_cpu,
+ ThePipeline::Params *params)
: Resource(res_name, res_id, res_width, res_latency, _cpu),
instSize(sizeof(MachInst))
{
- for (int tid = 0; tid < ThePipeline::MaxThreads; tid++) {
- delaySlotInfo[tid].numInsts = 0;
- delaySlotInfo[tid].targetReady = false;
-
+ for (ThreadID tid = 0; tid < ThePipeline::MaxThreads; tid++) {
pcValid[tid] = false;
pcBlockStage[tid] = 0;
+ //@todo: Use CPU's squashSeqNum here instead of maintaining our own
+ // state
squashSeqNum[tid] = (InstSeqNum)-1;
lastSquashCycle[tid] = 0;
}
}
+FetchSeqUnit::~FetchSeqUnit()
+{
+ delete [] resourceEvent;
+}
+
void
FetchSeqUnit::init()
{
resourceEvent = new FetchSeqEvent[width];
+ for (int i = 0; i < width; i++) {
+ reqs[i] = new ResourceRequest(this);
+ }
+
initSlots();
}
void
FetchSeqUnit::execute(int slot_num)
{
- // After this is working, change this to a reinterpret cast
- // for performance considerations
- ResourceRequest* fs_req = reqMap[slot_num];
+ ResourceRequest* fs_req = reqs[slot_num];
DynInstPtr inst = fs_req->inst;
- int tid = inst->readTid();
+ ThreadID tid = inst->readTid();
int stage_num = fs_req->getStageNum();
- int seq_num = inst->seqNum;
- fs_req->fault = NoFault;
+ if (inst->fault != NoFault) {
+ DPRINTF(InOrderFetchSeq,
+ "[tid:%i]: [sn:%i]: Detected %s fault @ %x. Forwarding to "
+ "next stage.\n", tid, inst->seqNum, inst->fault->name(),
+ inst->pcState());
+ fs_req->done();
+ return;
+ }
switch (fs_req->cmd)
{
case AssignNextPC:
{
- if (pcValid[tid]) {
-
- if (delaySlotInfo[tid].targetReady &&
- delaySlotInfo[tid].numInsts == 0) {
- // Set PC to target
- PC[tid] = delaySlotInfo[tid].targetAddr; //next_PC
- nextPC[tid] = PC[tid] + instSize; //next_NPC
- nextNPC[tid] = PC[tid] + (2 * instSize);
+ DPRINTF(InOrderFetchSeq, "[tid:%i]: Current PC is %s\n", tid,
+ pc[tid]);
- delaySlotInfo[tid].targetReady = false;
-
- DPRINTF(InOrderFetchSeq, "[tid:%i]: Setting PC to delay slot target\n",tid);
- }
-
- inst->setPC(PC[tid]);
- inst->setNextPC(PC[tid] + instSize);
- inst->setNextNPC(PC[tid] + (instSize * 2));
+ if (pcValid[tid]) {
+ inst->pcState(pc[tid]);
+ inst->setMemAddr(pc[tid].instAddr());
- inst->setPredTarg(inst->readNextNPC());
+ // Advance to next PC (typically PC + 4)
+ pc[tid].advance();
- inst->setMemAddr(PC[tid]);
inst->setSeqNum(cpu->getAndIncrementInstSeq(tid));
- DPRINTF(InOrderFetchSeq, "[tid:%i]: Assigning [sn:%i] to PC %08p\n", tid,
- inst->seqNum, inst->readPC());
-
- if (delaySlotInfo[tid].numInsts > 0) {
- --delaySlotInfo[tid].numInsts;
-
- // It's OK to set PC to target of branch
- if (delaySlotInfo[tid].numInsts == 0) {
- delaySlotInfo[tid].targetReady = true;
- }
-
- DPRINTF(InOrderFetchSeq, "[tid:%i]: %i delay slot inst(s) left to"
- " process.\n", tid, delaySlotInfo[tid].numInsts);
- }
-
- PC[tid] = nextPC[tid];
- nextPC[tid] = nextNPC[tid];
- nextNPC[tid] += instSize;
+ DPRINTF(InOrderFetchSeq, "[tid:%i]: Assigning [sn:%i] to "
+ "PC %s\n", tid, inst->seqNum, inst->pcState());
fs_req->done();
} else {
DPRINTF(InOrderStall, "STALL: [tid:%i]: NPC not valid\n", tid);
- fs_req->setCompleted(false);
+ fs_req->done(false);
}
}
break;
case UpdateTargetPC:
{
+ assert(!inst->isCondDelaySlot() &&
+ "Not Handling Conditional Delay Slot");
+
if (inst->isControl()) {
- // If it's a return, then we must wait for resolved address.
if (inst->isReturn() && !inst->predTaken()) {
- cpu->pipelineStage[stage_num]->toPrevStages->stageBlock[stage_num][tid] = true;
+ // If it's a return, then we must wait for resolved address.
+ // The Predictor will mark a return a false as "not taken"
+ // if there is no RAS entry
+ DPRINTF(InOrderFetchSeq, "[tid:%d]: Setting block signal "
+ "for stage %i.\n",
+ tid, stage_num);
+ cpu->pipelineStage[stage_num]->
+ toPrevStages->stageBlock[stage_num][tid] = true;
pcValid[tid] = false;
pcBlockStage[tid] = stage_num;
- } else if (inst->isCondDelaySlot() && !inst->predTaken()) {
- // Not-Taken AND Conditional Control
- DPRINTF(InOrderFetchSeq, "[tid:%i]: [sn:%i]: [PC:%08p] Predicted Not-Taken Cond. "
- "Delay inst. Skipping delay slot and Updating PC to %08p\n",
- tid, inst->seqNum, inst->readPC(), inst->readPredTarg());
-
- DPRINTF(InOrderFetchSeq, "[tid:%i] Setting up squash to start from stage %i, after [sn:%i].\n",
- tid, stage_num, seq_num);
-
- inst->bdelaySeqNum = seq_num;
- inst->squashingStage = stage_num;
-
- squashAfterInst(inst, stage_num, tid);
- } else if (!inst->isCondDelaySlot() && !inst->predTaken()) {
- // Not-Taken Control
- DPRINTF(InOrderFetchSeq, "[tid:%i]: [sn:%i]: Predicted Not-Taken Control "
- "inst. updating PC to %08p\n", tid, inst->seqNum,
- inst->readNextPC());
-
- ++delaySlotInfo[tid].numInsts;
- delaySlotInfo[tid].targetReady = false;
- delaySlotInfo[tid].targetAddr = inst->readNextNPC();
-
} else if (inst->predTaken()) {
- // Taken Control
- ++delaySlotInfo[tid].numInsts;
- delaySlotInfo[tid].targetReady = false;
- delaySlotInfo[tid].targetAddr = inst->readPredTarg();
-
- DPRINTF(InOrderFetchSeq, "[tid:%i]: [sn:%i] Updating delay slot target "
- "to PC %08p\n", tid, inst->seqNum, inst->readPredTarg());
-
- // Set-Up Squash Through-Out Pipeline
- DPRINTF(InOrderFetchSeq, "[tid:%i] Setting up squash to start from stage %i, after [sn:%i].\n",
- tid, stage_num, seq_num + 1);
- inst->bdelaySeqNum = seq_num + 1;
- inst->squashingStage = stage_num;
-
- // Do Squashing
- squashAfterInst(inst, stage_num, tid);
+ // Taken Control
+ inst->setSquashInfo(stage_num);
+ setupSquash(inst, stage_num, tid);
+
+ DPRINTF(InOrderFetchSeq, "[tid:%i] Setting up squash to "
+ "start from stage %i, after [sn:%i].\n",
+ tid, stage_num, inst->squashSeqNum);
}
} else {
- DPRINTF(InOrderFetchSeq, "[tid:%i]: [sn:%i]: Ignoring branch target update "
- "since then is not a control instruction.\n", tid, inst->seqNum);
+ DPRINTF(InOrderFetchSeq, "[tid:%i]: [sn:%i]: Ignoring branch "
+ "target update since then is not a control "
+ "instruction.\n", tid, inst->seqNum);
}
fs_req->done();
}
}
-inline void
-FetchSeqUnit::squashAfterInst(DynInstPtr inst, int stage_num, unsigned tid)
-{
- // Squash In Pipeline Stage
- cpu->pipelineStage[stage_num]->squashDueToBranch(inst, tid);
-
- // Squash inside current resource, so if there needs to be fetching on same cycle
- // the fetch information will be correct.
- // squash(inst, stage_num, inst->bdelaySeqNum, tid);
-
- // Schedule Squash Through-out Resource Pool
- cpu->resPool->scheduleEvent((InOrderCPU::CPUEventType)ResourcePool::SquashAll, inst, 0);
-}
void
FetchSeqUnit::squash(DynInstPtr inst, int squash_stage,
- InstSeqNum squash_seq_num, unsigned tid)
+ InstSeqNum squash_seq_num, ThreadID tid)
{
- DPRINTF(InOrderFetchSeq, "[tid:%i]: Updating due to squash from stage %i.\n",
- tid, squash_stage);
-
- InstSeqNum done_seq_num = inst->bdelaySeqNum;
- Addr new_PC = inst->readPredTarg();
-
- if (squashSeqNum[tid] <= done_seq_num &&
- lastSquashCycle[tid] == curTick) {
- DPRINTF(InOrderFetchSeq, "[tid:%i]: Ignoring squash from stage %i, since"
- "there is an outstanding squash that is older.\n",
+ DPRINTF(InOrderFetchSeq, "[tid:%i]: Updating due to squash from %s (%s) "
+ "stage %i.\n", tid, inst->instName(), inst->pcState(),
+ squash_stage);
+
+ if (lastSquashCycle[tid] == curTick() &&
+ squashSeqNum[tid] <= squash_seq_num) {
+ DPRINTF(InOrderFetchSeq, "[tid:%i]: Ignoring squash from stage %i, "
+ "since there is an outstanding squash that is older.\n",
tid, squash_stage);
} else {
- squashSeqNum[tid] = done_seq_num;
- lastSquashCycle[tid] = curTick;
-
- // If The very next instruction number is the done seq. num,
- // then we haven't seen the delay slot yet ... if it isn't
- // the last done_seq_num then this is the delay slot inst.
- if (cpu->nextInstSeqNum(tid) != done_seq_num &&
- !inst->procDelaySlotOnMispred) {
- delaySlotInfo[tid].numInsts = 0;
- delaySlotInfo[tid].targetReady = false;
-
- // Reset PC
- PC[tid] = new_PC;
- nextPC[tid] = new_PC + instSize;
- nextNPC[tid] = new_PC + (2 * instSize);
-
- DPRINTF(InOrderFetchSeq, "[tid:%i]: Setting PC to %08p.\n",
- tid, PC[tid]);
- } else {
- delaySlotInfo[tid].numInsts = 1;
- delaySlotInfo[tid].targetReady = false;
- delaySlotInfo[tid].targetAddr = (inst->procDelaySlotOnMispred) ? inst->branchTarget() : new_PC;
-
- // Reset PC to Delay Slot Instruction
- if (inst->procDelaySlotOnMispred) {
- PC[tid] = new_PC;
- nextPC[tid] = new_PC + instSize;
- nextNPC[tid] = new_PC + (2 * instSize);
- }
+ squashSeqNum[tid] = squash_seq_num;
+ lastSquashCycle[tid] = curTick();
+
+ if (inst->staticInst) {
+ if (inst->fault != NoFault) {
+ // A Trap Caused This Fault and will update the pc state
+ // when done trapping
+ DPRINTF(InOrderFetchSeq, "[tid:%i] Blocking due to fault @ "
+ "[sn:%i].%s %s \n", tid, inst->seqNum,
+ inst->instName(), inst->pcState());
+ pcValid[tid] = false;
+ } else {
+ TheISA::PCState nextPC;
+ assert(inst->staticInst);
+ if (inst->isControl()) {
+ nextPC = inst->readPredTarg();
+
+ // If we are already fetching this PC then advance to next PC
+ // =======
+ // This should handle ISAs w/delay slots and annulled delay
+ // slots to figure out which is the next PC to fetch after
+ // a mispredict
+ DynInstPtr bdelay_inst = NULL;
+ ListIt bdelay_it;
+ if (inst->onInstList) {
+ bdelay_it = inst->getInstListIt();
+ bdelay_it++;
+ } else {
+ InstSeqNum branch_delay_num = inst->seqNum + 1;
+ bdelay_it = cpu->findInst(branch_delay_num, tid);
+ }
- }
+ if (bdelay_it != cpu->instList[tid].end()) {
+ bdelay_inst = (*bdelay_it);
+ }
- // Unblock Any Stages Waiting for this information to be updated ...
- if (!pcValid[tid]) {
- cpu->pipelineStage[pcBlockStage[tid]]->toPrevStages->stageUnblock[pcBlockStage[tid]][tid] = true;
- }
+ if (bdelay_inst) {
+ if (bdelay_inst->pc.instAddr() == nextPC.instAddr()) {
+ bdelay_inst->pc = nextPC;
+ advancePC(nextPC, inst->staticInst);
+ DPRINTF(InOrderFetchSeq, "Advanced PC to %s\n", nextPC);
+ }
+ }
+ } else {
+ nextPC = inst->pcState();
+ advancePC(nextPC, inst->staticInst);
+ }
+
+
+ DPRINTF(InOrderFetchSeq, "[tid:%i]: Setting PC to %s.\n",
+ tid, nextPC);
+ pc[tid] = nextPC;
- pcValid[tid] = true;
+ // Unblock Any Stages Waiting for this information to be updated ...
+ if (!pcValid[tid]) {
+ DPRINTF(InOrderFetchSeq, "[tid:%d]: Setting unblock signal "
+ "for stage %i.\n",
+ tid, pcBlockStage[tid]);
+
+ // Need to use "fromNextStages" instead of "toPrevStages"
+ // because the timebuffer will have already have advanced
+ // in the tick function and this squash function will happen after
+ // the tick
+ cpu->pipelineStage[pcBlockStage[tid]]->
+ fromNextStages->stageUnblock[pcBlockStage[tid]][tid] = true;
+ }
+
+ pcValid[tid] = true;
+ }
+ }
}
Resource::squash(inst, squash_stage, squash_seq_num, tid);
FetchSeqUnit* fs_res = dynamic_cast<FetchSeqUnit*>(resource);
assert(fs_res);
- for (int i=0; i < MaxThreads; i++) {
- fs_res->PC[i] = fs_res->cpu->readPC(i);
- fs_res->nextPC[i] = fs_res->cpu->readNextPC(i);
- fs_res->nextNPC[i] = fs_res->cpu->readNextNPC(i);
- DPRINTF(InOrderFetchSeq, "[tid:%i]: Setting PC:%08p NPC:%08p NNPC:%08p.\n",
- fs_res->PC[i], fs_res->nextPC[i], fs_res->nextNPC[i]);
+ for (int i = 0; i < MaxThreads; i++) {
+ fs_res->pc[i] = fs_res->cpu->pcState(i);
+ DPRINTF(InOrderFetchSeq, "[tid:%i]: Setting PC: %s.\n",
+ fs_res->pc[i]);
fs_res->pcValid[i] = true;
}
-
- //cpu->fetchPriorityList.push_back(tid);
}
void
-FetchSeqUnit::activateThread(unsigned tid)
+FetchSeqUnit::activateThread(ThreadID tid)
{
pcValid[tid] = true;
- PC[tid] = cpu->readPC(tid);
- nextPC[tid] = cpu->readNextPC(tid);
- nextNPC[tid] = cpu->readNextNPC(tid);
+ pc[tid] = cpu->pcState(tid);
cpu->fetchPriorityList.push_back(tid);
- DPRINTF(InOrderFetchSeq, "[tid:%i]: Reading PC:%08p NPC:%08p NNPC:%08p.\n",
- tid, PC[tid], nextPC[tid], nextNPC[tid]);
+ DPRINTF(InOrderFetchSeq, "[tid:%i]: Reading PC: %s.\n",
+ tid, pc[tid]);
}
void
-FetchSeqUnit::deactivateThread(unsigned tid)
+FetchSeqUnit::deactivateThread(ThreadID tid)
{
- delaySlotInfo[tid].numInsts = 0;
- delaySlotInfo[tid].targetReady = false;
-
pcValid[tid] = false;
pcBlockStage[tid] = 0;
squashSeqNum[tid] = (InstSeqNum)-1;
lastSquashCycle[tid] = 0;
- std::list<unsigned>::iterator thread_it = find(cpu->fetchPriorityList.begin(),
+ list<ThreadID>::iterator thread_it = find(cpu->fetchPriorityList.begin(),
cpu->fetchPriorityList.end(),
tid);
if (thread_it != cpu->fetchPriorityList.end())
cpu->fetchPriorityList.erase(thread_it);
}
+
+void
+FetchSeqUnit::suspendThread(ThreadID tid)
+{
+ deactivateThread(tid);
+}
+
+void
+FetchSeqUnit::trap(const Fault &fault, ThreadID tid, DynInstPtr inst)
+{
+ pcValid[tid] = true;
+ pc[tid] = cpu->pcState(tid);
+ DPRINTF(InOrderFetchSeq, "[tid:%i]: Trap updating to PC: "
+ "%s.\n", tid, pc[tid]);
+}
+
+void
+FetchSeqUnit::updateAfterContextSwitch(DynInstPtr inst, ThreadID tid)
+{
+ pcValid[tid] = true;
+
+ if (cpu->thread[tid]->lastGradIsBranch) {
+ /** This function assumes that the instruction causing the context
+ * switch was right after the branch. Thus, if it's not, then
+ * we are updating incorrectly here
+ */
+ assert(cpu->nextInstAddr(tid) == inst->instAddr());
+ pc[tid] = cpu->thread[tid]->lastBranchPC;
+ } else {
+ pc[tid] = inst->pcState();
+ }
+ assert(inst->staticInst);
+ advancePC(pc[tid], inst->staticInst);
+
+ DPRINTF(InOrderFetchSeq, "[tid:%i]: Updating PCs due to Context Switch."
+ "Assigning PC: %s.\n", tid, pc[tid]);
+}