2 * Copyright (c) 2004-2006 The Regents of The University of Michigan
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 #include "cpu/o3/decode.hh"
34 DefaultDecode<Impl>::DefaultDecode(Params *params)
35 : renameToDecodeDelay(params->renameToDecodeDelay),
36 iewToDecodeDelay(params->iewToDecodeDelay),
37 commitToDecodeDelay(params->commitToDecodeDelay),
38 fetchToDecodeDelay(params->fetchToDecodeDelay),
39 decodeWidth(params->decodeWidth),
40 numThreads(params->numberOfThreads)
44 // Setup status, make sure stall signals are clear.
45 for (int i = 0; i < numThreads; ++i) {
46 decodeStatus[i] = Idle;
48 stalls[i].rename = false;
49 stalls[i].iew = false;
50 stalls[i].commit = false;
52 squashAfterDelaySlot[i] = false;
55 // @todo: Make into a parameter
56 skidBufferMax = (fetchToDecodeDelay * params->fetchWidth) + decodeWidth;
61 DefaultDecode<Impl>::name() const
63 return cpu->name() + ".decode";
68 DefaultDecode<Impl>::regStats()
71 .name(name() + ".DECODE:IdleCycles")
72 .desc("Number of cycles decode is idle")
73 .prereq(decodeIdleCycles);
75 .name(name() + ".DECODE:BlockedCycles")
76 .desc("Number of cycles decode is blocked")
77 .prereq(decodeBlockedCycles);
79 .name(name() + ".DECODE:RunCycles")
80 .desc("Number of cycles decode is running")
81 .prereq(decodeRunCycles);
83 .name(name() + ".DECODE:UnblockCycles")
84 .desc("Number of cycles decode is unblocking")
85 .prereq(decodeUnblockCycles);
87 .name(name() + ".DECODE:SquashCycles")
88 .desc("Number of cycles decode is squashing")
89 .prereq(decodeSquashCycles);
91 .name(name() + ".DECODE:BranchResolved")
92 .desc("Number of times decode resolved a branch")
93 .prereq(decodeBranchResolved);
95 .name(name() + ".DECODE:BranchMispred")
96 .desc("Number of times decode detected a branch misprediction")
97 .prereq(decodeBranchMispred);
99 .name(name() + ".DECODE:ControlMispred")
100 .desc("Number of times decode detected an instruction incorrectly"
101 " predicted as a control")
102 .prereq(decodeControlMispred);
104 .name(name() + ".DECODE:DecodedInsts")
105 .desc("Number of instructions handled by decode")
106 .prereq(decodeDecodedInsts);
108 .name(name() + ".DECODE:SquashedInsts")
109 .desc("Number of squashed instructions handled by decode")
110 .prereq(decodeSquashedInsts);
115 DefaultDecode<Impl>::setCPU(O3CPU *cpu_ptr)
117 DPRINTF(Decode, "Setting CPU pointer.\n");
123 DefaultDecode<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr)
125 DPRINTF(Decode, "Setting time buffer pointer.\n");
128 // Setup wire to write information back to fetch.
129 toFetch = timeBuffer->getWire(0);
131 // Create wires to get information from proper places in time buffer.
132 fromRename = timeBuffer->getWire(-renameToDecodeDelay);
133 fromIEW = timeBuffer->getWire(-iewToDecodeDelay);
134 fromCommit = timeBuffer->getWire(-commitToDecodeDelay);
139 DefaultDecode<Impl>::setDecodeQueue(TimeBuffer<DecodeStruct> *dq_ptr)
141 DPRINTF(Decode, "Setting decode queue pointer.\n");
142 decodeQueue = dq_ptr;
144 // Setup wire to write information to proper place in decode queue.
145 toRename = decodeQueue->getWire(0);
150 DefaultDecode<Impl>::setFetchQueue(TimeBuffer<FetchStruct> *fq_ptr)
152 DPRINTF(Decode, "Setting fetch queue pointer.\n");
155 // Setup wire to read information from fetch queue.
156 fromFetch = fetchQueue->getWire(-fetchToDecodeDelay);
161 DefaultDecode<Impl>::setActiveThreads(std::list<unsigned> *at_ptr)
163 DPRINTF(Decode, "Setting active threads list pointer.\n");
164 activeThreads = at_ptr;
167 template <class Impl>
169 DefaultDecode<Impl>::drain()
171 // Decode is done draining at any time.
172 cpu->signalDrained();
176 template <class Impl>
178 DefaultDecode<Impl>::takeOverFrom()
182 // Be sure to reset state and clear out any old instructions.
183 for (int i = 0; i < numThreads; ++i) {
184 decodeStatus[i] = Idle;
186 stalls[i].rename = false;
187 stalls[i].iew = false;
188 stalls[i].commit = false;
189 while (!insts[i].empty())
191 while (!skidBuffer[i].empty())
195 wroteToTimeBuffer = false;
200 DefaultDecode<Impl>::checkStall(unsigned tid) const
202 bool ret_val = false;
204 if (stalls[tid].rename) {
205 DPRINTF(Decode,"[tid:%i]: Stall fom Rename stage detected.\n", tid);
207 } else if (stalls[tid].iew) {
208 DPRINTF(Decode,"[tid:%i]: Stall fom IEW stage detected.\n", tid);
210 } else if (stalls[tid].commit) {
211 DPRINTF(Decode,"[tid:%i]: Stall fom Commit stage detected.\n", tid);
220 DefaultDecode<Impl>::fetchInstsValid()
222 return fromFetch->size > 0;
227 DefaultDecode<Impl>::block(unsigned tid)
229 DPRINTF(Decode, "[tid:%u]: Blocking.\n", tid);
231 // Add the current inputs to the skid buffer so they can be
232 // reprocessed when this stage unblocks.
235 // If the decode status is blocked or unblocking then decode has not yet
236 // signalled fetch to unblock. In that case, there is no need to tell
238 if (decodeStatus[tid] != Blocked) {
239 // Set the status to Blocked.
240 decodeStatus[tid] = Blocked;
242 if (decodeStatus[tid] != Unblocking) {
243 toFetch->decodeBlock[tid] = true;
244 wroteToTimeBuffer = true;
255 DefaultDecode<Impl>::unblock(unsigned tid)
257 // Decode is done unblocking only if the skid buffer is empty.
258 if (skidBuffer[tid].empty()) {
259 DPRINTF(Decode, "[tid:%u]: Done unblocking.\n", tid);
260 toFetch->decodeUnblock[tid] = true;
261 wroteToTimeBuffer = true;
263 decodeStatus[tid] = Running;
267 DPRINTF(Decode, "[tid:%u]: Currently unblocking.\n", tid);
274 DefaultDecode<Impl>::squash(DynInstPtr &inst, unsigned tid)
276 DPRINTF(Decode, "[tid:%i]: Squashing due to incorrect branch prediction "
277 "detected at decode.\n", tid);
279 // Send back mispredict information.
280 toFetch->decodeInfo[tid].branchMispredict = true;
281 toFetch->decodeInfo[tid].predIncorrect = true;
282 toFetch->decodeInfo[tid].doneSeqNum = inst->seqNum;
283 toFetch->decodeInfo[tid].squash = true;
284 toFetch->decodeInfo[tid].nextPC = inst->branchTarget();
285 #if ISA_HAS_DELAY_SLOT
286 toFetch->decodeInfo[tid].branchTaken = inst->readNextNPC() !=
287 (inst->readNextPC() + sizeof(TheISA::MachInst));
289 toFetch->decodeInfo[tid].bdelayDoneSeqNum = bdelayDoneSeqNum[tid];
290 squashAfterDelaySlot[tid] = false;
292 InstSeqNum squash_seq_num = bdelayDoneSeqNum[tid];
294 toFetch->decodeInfo[tid].branchTaken =
295 inst->readNextPC() != (inst->readPC() + sizeof(TheISA::MachInst));
297 InstSeqNum squash_seq_num = inst->seqNum;
300 // Might have to tell fetch to unblock.
301 if (decodeStatus[tid] == Blocked ||
302 decodeStatus[tid] == Unblocking) {
303 toFetch->decodeUnblock[tid] = 1;
306 // Set status to squashing.
307 decodeStatus[tid] = Squashing;
309 for (int i=0; i<fromFetch->size; i++) {
310 if (fromFetch->insts[i]->threadNumber == tid &&
311 fromFetch->insts[i]->seqNum > squash_seq_num) {
312 fromFetch->insts[i]->setSquashed();
316 // Clear the instruction list and skid buffer in case they have any
318 while (!insts[tid].empty()) {
320 #if ISA_HAS_DELAY_SLOT
321 if (insts[tid].front()->seqNum <= squash_seq_num) {
322 DPRINTF(Decode, "[tid:%i]: Cannot remove incoming decode "
323 "instructions before delay slot [sn:%i]. %i insts"
324 "left in decode.\n", tid, squash_seq_num,
332 while (!skidBuffer[tid].empty()) {
334 #if ISA_HAS_DELAY_SLOT
335 if (skidBuffer[tid].front()->seqNum <= squash_seq_num) {
336 DPRINTF(Decode, "[tid:%i]: Cannot remove skidBuffer "
337 "instructions before delay slot [sn:%i]. %i insts"
338 "left in decode.\n", tid, squash_seq_num,
343 skidBuffer[tid].pop();
346 // Squash instructions up until this one
347 cpu->removeInstsUntil(squash_seq_num, tid);
352 DefaultDecode<Impl>::squash(unsigned tid)
354 DPRINTF(Decode, "[tid:%i]: Squashing.\n",tid);
356 if (decodeStatus[tid] == Blocked ||
357 decodeStatus[tid] == Unblocking) {
359 // In syscall emulation, we can have both a block and a squash due
360 // to a syscall in the same cycle. This would cause both signals to
361 // be high. This shouldn't happen in full system.
362 // @todo: Determine if this still happens.
363 if (toFetch->decodeBlock[tid]) {
364 toFetch->decodeBlock[tid] = 0;
366 toFetch->decodeUnblock[tid] = 1;
369 toFetch->decodeUnblock[tid] = 1;
373 // Set status to squashing.
374 decodeStatus[tid] = Squashing;
376 // Go through incoming instructions from fetch and squash them.
377 unsigned squash_count = 0;
379 for (int i=0; i<fromFetch->size; i++) {
380 if (fromFetch->insts[i]->threadNumber == tid) {
381 fromFetch->insts[i]->setSquashed();
386 // Clear the instruction list and skid buffer in case they have any
388 while (!insts[tid].empty()) {
392 while (!skidBuffer[tid].empty()) {
393 skidBuffer[tid].pop();
401 DefaultDecode<Impl>::skidInsert(unsigned tid)
403 DynInstPtr inst = NULL;
405 while (!insts[tid].empty()) {
406 inst = insts[tid].front();
410 assert(tid == inst->threadNumber);
412 DPRINTF(Decode,"Inserting [sn:%lli] PC:%#x into decode skidBuffer %i\n",
413 inst->seqNum, inst->readPC(), inst->threadNumber);
415 skidBuffer[tid].push(inst);
418 // @todo: Eventually need to enforce this by not letting a thread
419 // fetch past its skidbuffer
420 assert(skidBuffer[tid].size() <= skidBufferMax);
425 DefaultDecode<Impl>::skidsEmpty()
427 std::list<unsigned>::iterator threads = (*activeThreads).begin();
429 while (threads != (*activeThreads).end()) {
430 if (!skidBuffer[*threads++].empty())
439 DefaultDecode<Impl>::updateStatus()
441 bool any_unblocking = false;
443 std::list<unsigned>::iterator threads = (*activeThreads).begin();
445 threads = (*activeThreads).begin();
447 while (threads != (*activeThreads).end()) {
448 unsigned tid = *threads++;
450 if (decodeStatus[tid] == Unblocking) {
451 any_unblocking = true;
456 // Decode will have activity if it's unblocking.
457 if (any_unblocking) {
458 if (_status == Inactive) {
461 DPRINTF(Activity, "Activating stage.\n");
463 cpu->activateStage(O3CPU::DecodeIdx);
466 // If it's not unblocking, then decode will not have any internal
467 // activity. Switch it to inactive.
468 if (_status == Active) {
470 DPRINTF(Activity, "Deactivating stage.\n");
472 cpu->deactivateStage(O3CPU::DecodeIdx);
477 template <class Impl>
479 DefaultDecode<Impl>::sortInsts()
481 int insts_from_fetch = fromFetch->size;
483 for (int i=0; i < numThreads; i++)
484 assert(insts[i].empty());
486 for (int i = 0; i < insts_from_fetch; ++i) {
487 insts[fromFetch->insts[i]->threadNumber].push(fromFetch->insts[i]);
493 DefaultDecode<Impl>::readStallSignals(unsigned tid)
495 if (fromRename->renameBlock[tid]) {
496 stalls[tid].rename = true;
499 if (fromRename->renameUnblock[tid]) {
500 assert(stalls[tid].rename);
501 stalls[tid].rename = false;
504 if (fromIEW->iewBlock[tid]) {
505 stalls[tid].iew = true;
508 if (fromIEW->iewUnblock[tid]) {
509 assert(stalls[tid].iew);
510 stalls[tid].iew = false;
513 if (fromCommit->commitBlock[tid]) {
514 stalls[tid].commit = true;
517 if (fromCommit->commitUnblock[tid]) {
518 assert(stalls[tid].commit);
519 stalls[tid].commit = false;
523 template <class Impl>
525 DefaultDecode<Impl>::checkSignalsAndUpdate(unsigned tid)
527 // Check if there's a squash signal, squash if there is.
528 // Check stall signals, block if necessary.
529 // If status was blocked
530 // Check if stall conditions have passed
531 // if so then go to unblocking
532 // If status was Squashing
533 // check if squashing is not high. Switch to running this cycle.
535 // Update the per thread stall statuses.
536 readStallSignals(tid);
538 // Check squash signals from commit.
539 if (fromCommit->commitInfo[tid].squash) {
541 DPRINTF(Decode, "[tid:%u]: Squashing instructions due to squash "
542 "from commit.\n", tid);
549 // Check ROB squash signals from commit.
550 if (fromCommit->commitInfo[tid].robSquashing) {
551 DPRINTF(Decode, "[tid:%u]: ROB is still squashing.\n", tid);
553 // Continue to squash.
554 decodeStatus[tid] = Squashing;
559 if (checkStall(tid)) {
563 if (decodeStatus[tid] == Blocked) {
564 DPRINTF(Decode, "[tid:%u]: Done blocking, switching to unblocking.\n",
567 decodeStatus[tid] = Unblocking;
574 if (decodeStatus[tid] == Squashing) {
575 // Switch status to running if decode isn't being told to block or
576 // squash this cycle.
577 DPRINTF(Decode, "[tid:%u]: Done squashing, switching to running.\n",
580 decodeStatus[tid] = Running;
585 // If we've reached this point, we have not gotten any signals that
586 // cause decode to change its status. Decode remains the same as before.
592 DefaultDecode<Impl>::tick()
594 wroteToTimeBuffer = false;
596 bool status_change = false;
600 std::list<unsigned>::iterator threads = (*activeThreads).begin();
604 //Check stall and squash signals.
605 while (threads != (*activeThreads).end()) {
606 unsigned tid = *threads++;
608 DPRINTF(Decode,"Processing [tid:%i]\n",tid);
609 status_change = checkSignalsAndUpdate(tid) || status_change;
611 decode(status_change, tid);
618 if (wroteToTimeBuffer) {
619 DPRINTF(Activity, "Activity this cycle.\n");
621 cpu->activityThisCycle();
627 DefaultDecode<Impl>::decode(bool &status_change, unsigned tid)
629 // If status is Running or idle,
630 // call decodeInsts()
631 // If status is Unblocking,
632 // buffer any instructions coming from fetch
633 // continue trying to empty skid buffer
634 // check if stall conditions have passed
636 if (decodeStatus[tid] == Blocked) {
637 ++decodeBlockedCycles;
638 } else if (decodeStatus[tid] == Squashing) {
639 ++decodeSquashCycles;
642 // Decode should try to decode as many instructions as its bandwidth
643 // will allow, as long as it is not currently blocked.
644 if (decodeStatus[tid] == Running ||
645 decodeStatus[tid] == Idle) {
646 DPRINTF(Decode, "[tid:%u]: Not blocked, so attempting to run "
650 } else if (decodeStatus[tid] == Unblocking) {
651 // Make sure that the skid buffer has something in it if the
652 // status is unblocking.
653 assert(!skidsEmpty());
655 // If the status was unblocking, then instructions from the skid
656 // buffer were used. Remove those instructions and handle
657 // the rest of unblocking.
660 if (fetchInstsValid()) {
661 // Add the current inputs to the skid buffer so they can be
662 // reprocessed when this stage unblocks.
666 status_change = unblock(tid) || status_change;
670 template <class Impl>
672 DefaultDecode<Impl>::decodeInsts(unsigned tid)
674 // Instructions can come either from the skid buffer or the list of
675 // instructions coming from fetch, depending on decode's status.
676 int insts_available = decodeStatus[tid] == Unblocking ?
677 skidBuffer[tid].size() : insts[tid].size();
679 if (insts_available == 0) {
680 DPRINTF(Decode, "[tid:%u] Nothing to do, breaking out"
682 // Should I change the status to idle?
685 } else if (decodeStatus[tid] == Unblocking) {
686 DPRINTF(Decode, "[tid:%u] Unblocking, removing insts from skid "
688 ++decodeUnblockCycles;
689 } else if (decodeStatus[tid] == Running) {
695 std::queue<DynInstPtr>
696 &insts_to_decode = decodeStatus[tid] == Unblocking ?
697 skidBuffer[tid] : insts[tid];
699 DPRINTF(Decode, "[tid:%u]: Sending instruction to rename.\n",tid);
701 while (insts_available > 0 && toRenameIndex < decodeWidth) {
702 assert(!insts_to_decode.empty());
704 inst = insts_to_decode.front();
706 insts_to_decode.pop();
708 DPRINTF(Decode, "[tid:%u]: Processing instruction [sn:%lli] with "
710 tid, inst->seqNum, inst->readPC());
712 if (inst->isSquashed()) {
713 DPRINTF(Decode, "[tid:%u]: Instruction %i with PC %#x is "
714 "squashed, skipping.\n",
715 tid, inst->seqNum, inst->readPC());
717 ++decodeSquashedInsts;
724 // Also check if instructions have no source registers. Mark
725 // them as ready to issue at any time. Not sure if this check
726 // should exist here or at a later stage; however it doesn't matter
727 // too much for function correctness.
728 if (inst->numSrcRegs() == 0) {
732 // This current instruction is valid, so add it into the decode
733 // queue. The next instruction may not be valid, so check to
734 // see if branches were predicted correctly.
735 toRename->insts[toRenameIndex] = inst;
739 ++decodeDecodedInsts;
742 // Ensure that if it was predicted as a branch, it really is a
744 if (inst->readPredTaken() && !inst->isControl()) {
745 DPRINTF(Decode, "PredPC : %#x != NextPC: %#x\n",inst->predPC,
748 panic("Instruction predicted as a branch!");
750 ++decodeControlMispred;
752 // Might want to set some sort of boolean and just do
753 // a check at the end
754 squash(inst, inst->threadNumber);
759 // Go ahead and compute any PC-relative branches.
760 if (inst->isDirectCtrl() && inst->isUncondCtrl()) {
761 ++decodeBranchResolved;
763 if (inst->branchTarget() != inst->readPredPC()) {
764 ++decodeBranchMispred;
766 // Might want to set some sort of boolean and just do
767 // a check at the end
768 #if !ISA_HAS_DELAY_SLOT
769 squash(inst, inst->threadNumber);
770 Addr target = inst->branchTarget();
771 inst->setPredTarg(target, target + sizeof(TheISA::MachInst));
774 // If mispredicted as taken, then ignore delay slot
775 // instruction... else keep delay slot and squash
776 // after it is sent to rename
777 if (inst->readPredTaken() && inst->isCondDelaySlot()) {
778 DPRINTF(Decode, "[tid:%i]: Conditional delay slot inst."
779 "[sn:%i] PC %#x mispredicted as taken.\n", tid,
780 inst->seqNum, inst->PC);
781 bdelayDoneSeqNum[tid] = inst->seqNum;
782 squash(inst, inst->threadNumber);
783 Addr target = inst->branchTarget();
784 inst->setPredTarg(target,
785 target + sizeof(TheISA::MachInst));
788 DPRINTF(Decode, "[tid:%i]: Misprediction detected at "
789 "[sn:%i] PC %#x, will squash after delay slot "
790 "inst. is sent to Rename\n",
791 tid, inst->seqNum, inst->PC);
792 bdelayDoneSeqNum[tid] = inst->seqNum + 1;
793 squashAfterDelaySlot[tid] = true;
794 squashInst[tid] = inst;
801 if (squashAfterDelaySlot[tid]) {
802 assert(!inst->isSquashed());
803 squash(squashInst[tid], squashInst[tid]->threadNumber);
804 Addr target = squashInst[tid]->branchTarget();
805 squashInst[tid]->setPredTarg(target,
806 target + sizeof(TheISA::MachInst));
807 assert(!inst->isSquashed());
812 // If we didn't process all instructions, then we will need to block
813 // and put all those instructions into the skid buffer.
814 if (!insts_to_decode.empty()) {
818 // Record that decode has written to the time buffer for activity
821 wroteToTimeBuffer = true;