From dbeabedaf0f8d9ec0ea3331db2e44b1add53f79f Mon Sep 17 00:00:00 2001 From: "Nilay Vaish ext:(%2C%20Timothy%20Jones%20%3Ctimothy.jones%40cl.cam.ac.uk%3E)" Date: Thu, 24 Jan 2013 12:28:51 -0600 Subject: [PATCH] branch predictor: move out of o3 and inorder cpus This patch moves the branch predictor files in the o3 and inorder directories to src/cpu/pred. This allows sharing the branch predictor across different cpu models. This patch was originally posted by Timothy Jones in July 2010 but never made it to the repository. --HG-- rename : src/cpu/o3/bpred_unit.cc => src/cpu/pred/bpred_unit.cc rename : src/cpu/o3/bpred_unit.hh => src/cpu/pred/bpred_unit.hh rename : src/cpu/o3/bpred_unit_impl.hh => src/cpu/pred/bpred_unit_impl.hh rename : src/cpu/o3/sat_counter.hh => src/cpu/pred/sat_counter.hh --- configs/common/O3_ARM_v7a.py | 7 +- src/cpu/BaseCPU.py | 3 + src/cpu/inorder/InOrderCPU.py | 21 +- src/cpu/inorder/SConscript | 1 - src/cpu/inorder/inorder_cpu_builder.cc | 6 - src/cpu/inorder/resources/bpred_unit.cc | 463 ------------------ src/cpu/inorder/resources/bpred_unit.hh | 267 ---------- src/cpu/inorder/resources/branch_predictor.cc | 25 +- src/cpu/inorder/resources/branch_predictor.hh | 4 +- src/cpu/o3/O3CPU.py | 20 +- src/cpu/o3/SConscript | 1 - src/cpu/o3/cpu_policy.hh | 5 - src/cpu/o3/deriv.cc | 2 - src/cpu/o3/fetch.hh | 4 +- src/cpu/o3/fetch_impl.hh | 21 +- src/cpu/o3/sat_counter.cc | 57 --- src/cpu/pred/2bit_local.cc | 47 +- src/cpu/pred/2bit_local.hh | 26 +- src/cpu/pred/BranchPredictor.py | 54 ++ src/cpu/pred/SConscript | 4 + src/cpu/{o3 => pred}/bpred_unit.cc | 21 +- src/cpu/{o3 => pred}/bpred_unit.hh | 79 ++- src/cpu/{o3 => pred}/bpred_unit_impl.hh | 399 ++++++++------- src/cpu/pred/btb.hh | 6 +- src/cpu/pred/ras.hh | 6 +- src/cpu/{o3 => pred}/sat_counter.hh | 6 +- src/cpu/pred/tournament.cc | 37 +- src/cpu/pred/tournament.hh | 31 +- 28 files changed, 433 insertions(+), 1190 deletions(-) delete mode 100644 src/cpu/inorder/resources/bpred_unit.cc delete mode 100644 src/cpu/inorder/resources/bpred_unit.hh delete mode 100644 src/cpu/o3/sat_counter.cc create mode 100644 src/cpu/pred/BranchPredictor.py rename src/cpu/{o3 => pred}/bpred_unit.cc (75%) rename src/cpu/{o3 => pred}/bpred_unit.hh (86%) rename src/cpu/{o3 => pred}/bpred_unit_impl.hh (54%) rename src/cpu/{o3 => pred}/sat_counter.hh (96%) diff --git a/configs/common/O3_ARM_v7a.py b/configs/common/O3_ARM_v7a.py index f76128ae6..c85ba2c6c 100644 --- a/configs/common/O3_ARM_v7a.py +++ b/configs/common/O3_ARM_v7a.py @@ -87,8 +87,8 @@ class O3_ARM_v7a_FUP(FUPool): FUList = [O3_ARM_v7a_Simple_Int(), O3_ARM_v7a_Complex_Int(), O3_ARM_v7a_Load(), O3_ARM_v7a_Store(), O3_ARM_v7a_FP()] - -class O3_ARM_v7a_3(DerivO3CPU): +# Tournament Branch Predictor +class O3_ARM_v7a_BP(BranchPredictor): predType = "tournament" localCtrBits = 2 localHistoryTableSize = 64 @@ -102,6 +102,8 @@ class O3_ARM_v7a_3(DerivO3CPU): BTBTagSize = 18 RASSize = 16 instShiftAmt = 2 + +class O3_ARM_v7a_3(DerivO3CPU): LQEntries = 16 SQEntries = 16 LSQDepCheckShift = 0 @@ -142,6 +144,7 @@ class O3_ARM_v7a_3(DerivO3CPU): numROBEntries = 40 switched_out = False + branchPred = O3_ARM_v7a_BP() # Instruction Cache class O3_ARM_v7a_ICache(BaseCache): diff --git a/src/cpu/BaseCPU.py b/src/cpu/BaseCPU.py index 900a23991..759bc0881 100644 --- a/src/cpu/BaseCPU.py +++ b/src/cpu/BaseCPU.py @@ -51,6 +51,7 @@ from Bus import CoherentBus from InstTracer import InstTracer from ExeTracer import ExeTracer from MemObject import MemObject +from BranchPredictor import BranchPredictor default_tracer = ExeTracer() @@ -184,6 +185,8 @@ class BaseCPU(MemObject): dcache_port = MasterPort("Data Port") _cached_ports = ['icache_port', 'dcache_port'] + branchPred = Param.BranchPredictor(NULL, "Branch Predictor") + if buildEnv['TARGET_ISA'] in ['x86', 'arm']: _cached_ports += ["itb.walker.port", "dtb.walker.port"] diff --git a/src/cpu/inorder/InOrderCPU.py b/src/cpu/inorder/InOrderCPU.py index 811549bae..3285d50ce 100644 --- a/src/cpu/inorder/InOrderCPU.py +++ b/src/cpu/inorder/InOrderCPU.py @@ -29,6 +29,7 @@ from m5.params import * from m5.proxy import * from BaseCPU import BaseCPU +from BranchPredictor import BranchPredictor class ThreadModel(Enum): vals = ['Single', 'SMT', 'SwitchOnCacheMiss'] @@ -46,24 +47,6 @@ class InOrderCPU(BaseCPU): fetchBuffSize = Param.Unsigned(4, "Fetch Buffer Size (Number of Cache Blocks Stored)") memBlockSize = Param.Unsigned(64, "Memory Block Size") - predType = Param.String("tournament", "Branch predictor type ('local', 'tournament')") - localPredictorSize = Param.Unsigned(2048, "Size of local predictor") - localCtrBits = Param.Unsigned(2, "Bits per counter") - localHistoryTableSize = Param.Unsigned(2048, "Size of local history table") - localHistoryBits = Param.Unsigned(11, "Bits for the local history") - globalPredictorSize = Param.Unsigned(8192, "Size of global predictor") - globalCtrBits = Param.Unsigned(2, "Bits per counter") - globalHistoryBits = Param.Unsigned(13, "Bits of history") - choicePredictorSize = Param.Unsigned(8192, "Size of choice predictor") - choiceCtrBits = Param.Unsigned(2, "Bits of choice counters") - - BTBEntries = Param.Unsigned(4096, "Number of BTB entries") - BTBTagSize = Param.Unsigned(16, "Size of the BTB tags, in bits") - - RASSize = Param.Unsigned(16, "RAS size") - - instShiftAmt = Param.Unsigned(2, "Number of bits to shift instructions by") - stageTracing = Param.Bool(False, "Enable tracing of each stage in CPU") multLatency = Param.Cycles(1, "Latency for Multiply Operations") @@ -76,3 +59,5 @@ class InOrderCPU(BaseCPU): div24RepeatRate = Param.Cycles(1, "Repeat Rate for 24-bit Divide Operations") div32Latency = Param.Cycles(1, "Latency for 32-bit Divide Operations") div32RepeatRate = Param.Cycles(1, "Repeat Rate for 32-bit Divide Operations") + + branchPred = BranchPredictor(numThreads = Parent.numThreads) diff --git a/src/cpu/inorder/SConscript b/src/cpu/inorder/SConscript index 94fb5ae7f..c5406444c 100644 --- a/src/cpu/inorder/SConscript +++ b/src/cpu/inorder/SConscript @@ -71,7 +71,6 @@ if 'InOrderCPU' in env['CPU_MODELS']: Source('resource.cc') Source('resources/agen_unit.cc') Source('resources/execution_unit.cc') - Source('resources/bpred_unit.cc') Source('resources/branch_predictor.cc') Source('resources/cache_unit.cc') Source('resources/fetch_unit.cc') diff --git a/src/cpu/inorder/inorder_cpu_builder.cc b/src/cpu/inorder/inorder_cpu_builder.cc index bde5b1e94..569652bd2 100644 --- a/src/cpu/inorder/inorder_cpu_builder.cc +++ b/src/cpu/inorder/inorder_cpu_builder.cc @@ -57,11 +57,5 @@ InOrderCPUParams::create() } numThreads = actual_num_threads; - - instShiftAmt = 2; - return new InOrderCPU(this); } - - - diff --git a/src/cpu/inorder/resources/bpred_unit.cc b/src/cpu/inorder/resources/bpred_unit.cc deleted file mode 100644 index eff40566d..000000000 --- a/src/cpu/inorder/resources/bpred_unit.cc +++ /dev/null @@ -1,463 +0,0 @@ - -/* - * Copyright (c) 2004-2005 The Regents of The University of Michigan - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Authors: Kevin Lim - */ - -#include -#include - -#include "arch/utility.hh" -#include "base/trace.hh" -#include "config/the_isa.hh" -#include "cpu/inorder/resources/bpred_unit.hh" -#include "debug/InOrderBPred.hh" -#include "debug/Resource.hh" - -using namespace std; -using namespace ThePipeline; - -BPredUnit::BPredUnit(Resource *_res, ThePipeline::Params *params) - : res(_res), - BTB(params->BTBEntries, params->BTBTagSize, params->instShiftAmt) -{ - // Setup the selected predictor. - if (params->predType == "local") { - localBP = new LocalBP(params->localPredictorSize, - params->localCtrBits, - params->instShiftAmt); - predictor = Local; - } else if (params->predType == "tournament") { - tournamentBP = new TournamentBP(params->localCtrBits, - params->localHistoryTableSize, - params->localHistoryBits, - params->globalPredictorSize, - params->globalHistoryBits, - params->globalCtrBits, - params->choicePredictorSize, - params->choiceCtrBits, - params->instShiftAmt); - predictor = Tournament; - } else { - fatal("Invalid BP selected!"); - } - - for (int i=0; i < ThePipeline::MaxThreads; i++) - RAS[i].init(params->RASSize); - - instSize = sizeof(TheISA::MachInst); -} - -std::string -BPredUnit::name() -{ - return res->name(); -} - -void -BPredUnit::regStats() -{ - lookups - .name(name() + ".lookups") - .desc("Number of BP lookups") - ; - - condPredicted - .name(name() + ".condPredicted") - .desc("Number of conditional branches predicted") - ; - - condIncorrect - .name(name() + ".condIncorrect") - .desc("Number of conditional branches incorrect") - ; - - BTBLookups - .name(name() + ".BTBLookups") - .desc("Number of BTB lookups") - ; - - BTBHits - .name(name() + ".BTBHits") - .desc("Number of BTB hits") - ; - - BTBHitPct - .name(name() + ".BTBHitPct") - .desc("BTB Hit Percentage") - .precision(6); - BTBHitPct = (BTBHits / BTBLookups) * 100; - - usedRAS - .name(name() + ".usedRAS") - .desc("Number of times the RAS was used to get a target.") - ; - - RASIncorrect - .name(name() + ".RASInCorrect") - .desc("Number of incorrect RAS predictions.") - ; -} - - -void -BPredUnit::switchOut() -{ - // Clear any state upon switch out. - for (int i = 0; i < ThePipeline::MaxThreads; ++i) { - squash(0, i); - } -} - - -void -BPredUnit::takeOverFrom() -{ - // Can reset all predictor state, but it's not necessarily better - // than leaving it be. -/* - for (int i = 0; i < ThePipeline::MaxThreads; ++i) - RAS[i].reset(); - - BP.reset(); - BTB.reset(); -*/ -} - - -bool -BPredUnit::predict(DynInstPtr &inst, TheISA::PCState &predPC, ThreadID tid) -{ - // See if branch predictor predicts taken. - // If so, get its target addr either from the BTB or the RAS. - // Save off record of branch stuff so the RAS can be fixed - // up once it's done. - - using TheISA::MachInst; - - int asid = inst->asid; - bool pred_taken = false; - TheISA::PCState target; - - ++lookups; - DPRINTF(InOrderBPred, "[tid:%i] [sn:%i] %s ... PC %s doing branch " - "prediction\n", tid, inst->seqNum, - inst->staticInst->disassemble(inst->instAddr()), - inst->pcState()); - - - void *bp_history = NULL; - - if (inst->isUncondCtrl()) { - DPRINTF(InOrderBPred, "[tid:%i] Unconditional control.\n", - tid); - pred_taken = true; - // Tell the BP there was an unconditional branch. - BPUncond(bp_history); - - if (inst->isReturn() && RAS[tid].empty()) { - DPRINTF(InOrderBPred, "[tid:%i] RAS is empty, predicting " - "false.\n", tid); - pred_taken = false; - } - } else { - ++condPredicted; - - pred_taken = BPLookup(predPC.instAddr(), bp_history); - } - - PredictorHistory predict_record(inst->seqNum, predPC, pred_taken, - bp_history, tid); - - // Now lookup in the BTB or RAS. - if (pred_taken) { - if (inst->isReturn()) { - ++usedRAS; - - // If it's a function return call, then look up the address - // in the RAS. - TheISA::PCState rasTop = RAS[tid].top(); - target = TheISA::buildRetPC(inst->pcState(), rasTop); - - // Record the top entry of the RAS, and its index. - predict_record.usedRAS = true; - predict_record.RASIndex = RAS[tid].topIdx(); - predict_record.rasTarget = rasTop; - - assert(predict_record.RASIndex < 16); - - RAS[tid].pop(); - - DPRINTF(InOrderBPred, "[tid:%i]: Instruction %s is a return, " - "RAS predicted target: %s, RAS index: %i.\n", - tid, inst->pcState(), target, - predict_record.RASIndex); - } else { - ++BTBLookups; - - if (inst->isCall()) { - - RAS[tid].push(inst->pcState()); - - // Record that it was a call so that the top RAS entry can - // be popped off if the speculation is incorrect. - predict_record.wasCall = true; - - DPRINTF(InOrderBPred, "[tid:%i]: Instruction %s was a call" - ", adding %s to the RAS index: %i.\n", - tid, inst->pcState(), predPC, - RAS[tid].topIdx()); - } - - if (inst->isCall() && - inst->isUncondCtrl() && - inst->isDirectCtrl()) { - target = inst->branchTarget(); - } else if (BTB.valid(predPC.instAddr(), asid)) { - ++BTBHits; - - // If it's not a return, use the BTB to get the target addr. - target = BTB.lookup(predPC.instAddr(), asid); - - DPRINTF(InOrderBPred, "[tid:%i]: [asid:%i] Instruction %s " - "predicted target is %s.\n", - tid, asid, inst->pcState(), target); - } else { - DPRINTF(InOrderBPred, "[tid:%i]: BTB doesn't have a " - "valid entry, predicting false.\n",tid); - pred_taken = false; - } - } - } - - if (pred_taken) { - // Set the PC and the instruction's predicted target. - predPC = target; - } - DPRINTF(InOrderBPred, "[tid:%i]: [sn:%i]: Setting Predicted PC to %s.\n", - tid, inst->seqNum, predPC); - - predHist[tid].push_front(predict_record); - - DPRINTF(InOrderBPred, "[tid:%i] [sn:%i] pushed onto front of predHist " - "...predHist.size(): %i\n", - tid, inst->seqNum, predHist[tid].size()); - - return pred_taken; -} - - -void -BPredUnit::update(const InstSeqNum &done_sn, ThreadID tid) -{ - DPRINTF(Resource, "BranchPred: [tid:%i]: Commiting branches until sequence" - "number %lli.\n", tid, done_sn); - - while (!predHist[tid].empty() && - predHist[tid].back().seqNum <= done_sn) { - // Update the branch predictor with the correct results. - BPUpdate(predHist[tid].back().pc.instAddr(), - predHist[tid].back().predTaken, - predHist[tid].back().bpHistory, - false); - - predHist[tid].pop_back(); - } -} - - -void -BPredUnit::squash(const InstSeqNum &squashed_sn, ThreadID tid, ThreadID asid) -{ - History &pred_hist = predHist[tid]; - - while (!pred_hist.empty() && - pred_hist.front().seqNum > squashed_sn) { - if (pred_hist.front().usedRAS) { - DPRINTF(InOrderBPred, "BranchPred: [tid:%i]: Restoring top of RAS " - "to: %i, target: %s.\n", - tid, - pred_hist.front().RASIndex, - pred_hist.front().rasTarget); - - RAS[tid].restore(pred_hist.front().RASIndex, - pred_hist.front().rasTarget); - - } else if (pred_hist.front().wasCall) { - DPRINTF(InOrderBPred, "BranchPred: [tid:%i]: Removing speculative " - "entry added to the RAS.\n",tid); - - RAS[tid].pop(); - } - - // This call should delete the bpHistory. - BPSquash(pred_hist.front().bpHistory); - - pred_hist.pop_front(); - } - -} - - -void -BPredUnit::squash(const InstSeqNum &squashed_sn, - const TheISA::PCState &corrTarget, - bool actually_taken, - ThreadID tid, - ThreadID asid) -{ - // Now that we know that a branch was mispredicted, we need to undo - // all the branches that have been seen up until this branch and - // fix up everything. - - History &pred_hist = predHist[tid]; - - ++condIncorrect; - - DPRINTF(InOrderBPred, "[tid:%i]: Squashing from sequence number %i, " - "setting target to %s.\n", - tid, squashed_sn, corrTarget); - - squash(squashed_sn, tid); - - // If there's a squash due to a syscall, there may not be an entry - // corresponding to the squash. In that case, don't bother trying to - // fix up the entry. - if (!pred_hist.empty()) { - HistoryIt hist_it = pred_hist.begin(); - //HistoryIt hist_it = find(pred_hist.begin(), pred_hist.end(), - // squashed_sn); - - //assert(hist_it != pred_hist.end()); - if (pred_hist.front().seqNum != squashed_sn) { - DPRINTF(InOrderBPred, "Front sn %i != Squash sn %i\n", - pred_hist.front().seqNum, squashed_sn); - - assert(pred_hist.front().seqNum == squashed_sn); - } - - - if ((*hist_it).usedRAS) { - ++RASIncorrect; - } - - BPUpdate((*hist_it).pc.instAddr(), actually_taken, - pred_hist.front().bpHistory, true); - - // only update BTB on branch taken right??? - if (actually_taken) - BTB.update((*hist_it).pc.instAddr(), corrTarget, asid); - - DPRINTF(InOrderBPred, "[tid:%i]: Removing history for [sn:%i] " - "PC %s.\n", tid, (*hist_it).seqNum, (*hist_it).pc); - - pred_hist.erase(hist_it); - - DPRINTF(InOrderBPred, "[tid:%i]: predHist.size(): %i\n", tid, - predHist[tid].size()); - - } else { - DPRINTF(InOrderBPred, "[tid:%i]: [sn:%i] pred_hist empty, can't " - "update.\n", tid, squashed_sn); - } -} - - -void -BPredUnit::BPUncond(void * &bp_history) -{ - // Only the tournament predictor cares about unconditional branches. - if (predictor == Tournament) { - tournamentBP->uncondBr(bp_history); - } -} - - -void -BPredUnit::BPSquash(void *bp_history) -{ - if (predictor == Local) { - localBP->squash(bp_history); - } else if (predictor == Tournament) { - tournamentBP->squash(bp_history); - } else { - panic("Predictor type is unexpected value!"); - } -} - - -bool -BPredUnit::BPLookup(Addr inst_PC, void * &bp_history) -{ - if (predictor == Local) { - return localBP->lookup(inst_PC, bp_history); - } else if (predictor == Tournament) { - return tournamentBP->lookup(inst_PC, bp_history); - } else { - panic("Predictor type is unexpected value!"); - } -} - - -void -BPredUnit::BPUpdate(Addr inst_PC, bool taken, void *bp_history, bool squashed) -{ - if (predictor == Local) { - localBP->update(inst_PC, taken, bp_history); - } else if (predictor == Tournament) { - tournamentBP->update(inst_PC, taken, bp_history, squashed); - } else { - panic("Predictor type is unexpected value!"); - } -} - - -void -BPredUnit::dump() -{ - /*typename History::iterator pred_hist_it; - - for (int i = 0; i < ThePipeline::MaxThreads; ++i) { - if (!predHist[i].empty()) { - pred_hist_it = predHist[i].begin(); - - cprintf("predHist[%i].size(): %i\n", i, predHist[i].size()); - - while (pred_hist_it != predHist[i].end()) { - cprintf("[sn:%lli], PC:%#x, tid:%i, predTaken:%i, " - "bpHistory:%#x\n", - (*pred_hist_it).seqNum, (*pred_hist_it).PC, - (*pred_hist_it).tid, (*pred_hist_it).predTaken, - (*pred_hist_it).bpHistory); - pred_hist_it++; - } - - cprintf("\n"); - } - }*/ -} diff --git a/src/cpu/inorder/resources/bpred_unit.hh b/src/cpu/inorder/resources/bpred_unit.hh deleted file mode 100644 index b5d12d2db..000000000 --- a/src/cpu/inorder/resources/bpred_unit.hh +++ /dev/null @@ -1,267 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Regents of The University of Michigan - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Authors: Kevin Lim - * Korey Sewell - */ - -#ifndef __CPU_INORDER_BPRED_UNIT_HH__ -#define __CPU_INORDER_BPRED_UNIT_HH__ - -#include - -#include "arch/isa_traits.hh" -#include "base/statistics.hh" -#include "cpu/inorder/inorder_dyn_inst.hh" -#include "cpu/inorder/pipeline_traits.hh" -#include "cpu/inorder/resource.hh" -#include "cpu/pred/2bit_local.hh" -#include "cpu/pred/btb.hh" -#include "cpu/pred/ras.hh" -#include "cpu/pred/tournament.hh" -#include "cpu/inst_seq.hh" -#include "params/InOrderCPU.hh" - -/** - * Basically a wrapper class to hold both the branch predictor - * and the BTB. - */ -class BPredUnit -{ - private: - - enum PredType { - Local, - Tournament - }; - - PredType predictor; - - public: - - /** - * @param params The params object, that has the size of the BP and BTB. - */ - BPredUnit(Resource *_res, ThePipeline::Params *params); - - std::string name(); - - /** - * Registers statistics. - */ - void regStats(); - - void switchOut(); - - void takeOverFrom(); - - /** - * Predicts whether or not the instruction is a taken branch, and the - * target of the branch if it is taken. - * @param inst The branch instruction. - * @param predPC The predicted PC is passed back through this parameter. - * @param tid The thread id. - * @return Returns if the branch is taken or not. - */ - bool predict(ThePipeline::DynInstPtr &inst, - TheISA::PCState &predPC, ThreadID tid); - - // @todo: Rename this function. - void BPUncond(void * &bp_history); - - /** - * Tells the branch predictor to commit any updates until the given - * sequence number. - * @param done_sn The sequence number to commit any older updates up until. - * @param tid The thread id. - */ - void update(const InstSeqNum &done_sn, ThreadID tid); - - /** - * Squashes all outstanding updates until a given sequence number. - * @param squashed_sn The sequence number to squash any younger updates up - * until. - * @param tid The thread id. - */ - void squash(const InstSeqNum &squashed_sn, ThreadID tid, - ThreadID asid = 0); - - /** - * Squashes all outstanding updates until a given sequence number, and - * corrects that sn's update with the proper address and taken/not taken. - * @param squashed_sn The sequence number to squash any younger updates up - * until. - * @param corrTarget The correct branch target. - * @param actually_taken The correct branch direction. - * @param tid The thread id. - */ - void squash(const InstSeqNum &squashed_sn, - const TheISA::PCState &corrTarget, bool actually_taken, - ThreadID tid, ThreadID asid = 0); - - /** - * @param bp_history Pointer to the history object. The predictor - * will need to update any state and delete the object. - */ - void BPSquash(void *bp_history); - - /** - * Looks up a given PC in the BP to see if it is taken or not taken. - * @param inst_PC The PC to look up. - * @param bp_history Pointer that will be set to an object that - * has the branch predictor state associated with the lookup. - * @return Whether the branch is taken or not taken. - */ - bool BPLookup(Addr instPC, void * &bp_history); - - /** - * Looks up a given PC in the BTB to see if a matching entry exists. - * @param inst_PC The PC to look up. - * @return Whether the BTB contains the given PC. - */ - bool BTBValid(Addr &inst_PC) - { return BTB.valid(inst_PC, 0); } - - /** - * Looks up a given PC in the BTB to get the predicted target. - * @param inst_PC The PC to look up. - * @return The address of the target of the branch. - */ - TheISA::PCState BTBLookup(Addr instPC) - { return BTB.lookup(instPC, 0); } - - /** - * Updates the BP with taken/not taken information. - * @param instPC The branch's PC that will be updated. - * @param taken Whether the branch was taken or not taken. - * @param bp_history Pointer to the branch predictor state that is - * associated with the branch lookup that is being updated. - * @param squashed if the branch in question was squashed or not - * @todo Make this update flexible enough to handle a global predictor. - */ - void BPUpdate(Addr instPC, bool taken, void *bp_history, bool squashed); - - /** - * Updates the BTB with the target of a branch. - * @param inst_PC The branch's PC that will be updated. - * @param target_PC The branch's target that will be added to the BTB. - */ - void BTBUpdate(Addr instPC, const TheISA::PCState &targetPC) - { BTB.update(instPC, targetPC, 0); } - - void dump(); - - private: - int instSize; - Resource *res; - - struct PredictorHistory { - /** - * Makes a predictor history struct that contains any - * information needed to update the predictor, BTB, and RAS. - */ - PredictorHistory(const InstSeqNum &seq_num, - const TheISA::PCState &instPC, bool pred_taken, - void *bp_history, ThreadID _tid) - : seqNum(seq_num), pc(instPC), rasTarget(0), RASIndex(0), - tid(_tid), predTaken(pred_taken), usedRAS(0), wasCall(0), - bpHistory(bp_history) - {} - - /** The sequence number for the predictor history entry. */ - InstSeqNum seqNum; - - /** The PC associated with the sequence number. */ - TheISA::PCState pc; - - /** The RAS target (only valid if a return). */ - TheISA::PCState rasTarget; - - /** The RAS index of the instruction (only valid if a call). */ - unsigned RASIndex; - - /** The thread id. */ - ThreadID tid; - - /** Whether or not it was predicted taken. */ - bool predTaken; - - /** Whether or not the RAS was used. */ - bool usedRAS; - - /** Whether or not the instruction was a call. */ - bool wasCall; - - /** Pointer to the history object passed back from the branch - * predictor. It is used to update or restore state of the - * branch predictor. - */ - void *bpHistory; - }; - - typedef std::list History; - typedef History::iterator HistoryIt; - - /** - * The per-thread predictor history. This is used to update the predictor - * as instructions are committed, or restore it to the proper state after - * a squash. - */ - History predHist[ThePipeline::MaxThreads]; - - /** The local branch predictor. */ - LocalBP *localBP; - - /** The tournament branch predictor. */ - TournamentBP *tournamentBP; - - /** The BTB. */ - DefaultBTB BTB; - - /** The per-thread return address stack. */ - ReturnAddrStack RAS[ThePipeline::MaxThreads]; - - /** Stat for number of BP lookups. */ - Stats::Scalar lookups; - /** Stat for number of conditional branches predicted. */ - Stats::Scalar condPredicted; - /** Stat for number of conditional branches predicted incorrectly. */ - Stats::Scalar condIncorrect; - /** Stat for number of BTB lookups. */ - Stats::Scalar BTBLookups; - /** Stat for number of BTB hits. */ - Stats::Scalar BTBHits; - /** Stat for number of times the BTB is correct. */ - Stats::Scalar BTBCorrect; - /** Stat for number of times the RAS is used to get a target. */ - Stats::Scalar usedRAS; - /** Stat for number of times the RAS is incorrect. */ - Stats::Scalar RASIncorrect; - Stats::Formula BTBHitPct; -}; - -#endif // __CPU_INORDER_BPRED_UNIT_HH__ diff --git a/src/cpu/inorder/resources/branch_predictor.cc b/src/cpu/inorder/resources/branch_predictor.cc index 004cf8b63..50d3847ba 100644 --- a/src/cpu/inorder/resources/branch_predictor.cc +++ b/src/cpu/inorder/resources/branch_predictor.cc @@ -44,7 +44,7 @@ BranchPredictor::BranchPredictor(std::string res_name, int res_id, InOrderCPU *_cpu, ThePipeline::Params *params) : Resource(res_name, res_id, res_width, res_latency, _cpu), - branchPred(this, params) + branchPred(params->branchPred) { instSize = sizeof(MachInst); } @@ -61,8 +61,6 @@ BranchPredictor::regStats() .desc("Number of Branches Predicted As Not Taken (False)."); Resource::regStats(); - - branchPred.regStats(); } void @@ -97,6 +95,7 @@ BranchPredictor::execute(int slot_num) DPRINTF(InOrderStage, "[tid:%u]: [sn:%i]: squashed, " "skipping prediction \n", tid, inst->seqNum); } else { + TheISA::PCState instPC = inst->pcState(); TheISA::PCState pred_PC = inst->pcState(); TheISA::advancePC(pred_PC, inst->staticInst); @@ -104,7 +103,9 @@ BranchPredictor::execute(int slot_num) // If not, the pred_PC be updated to pc+8 // If predicted, the pred_PC will be updated to new target // value - bool predict_taken = branchPred.predict(inst, pred_PC, tid); + bool predict_taken = branchPred->predictInOrder( + inst->staticInst, inst->seqNum, + inst->asid, instPC, pred_PC, tid); if (predict_taken) { DPRINTF(InOrderBPred, "[tid:%i]: [sn:%i]: Branch " @@ -119,8 +120,8 @@ BranchPredictor::execute(int slot_num) inst->setBranchPred(predict_taken); } - //@todo: Check to see how hw_rei is handled here...how does PC,NPC get - // updated to compare mispredict against??? + //@todo: Check to see how hw_rei is handled here...how does + //PC,NPC get updated to compare mispredict against??? inst->setPredTarg(pred_PC); DPRINTF(InOrderBPred, "[tid:%i]: [sn:%i]: %s Predicted PC is " "%s.\n", tid, seq_num, inst->instName(), pred_PC); @@ -143,7 +144,7 @@ BranchPredictor::execute(int slot_num) tid, seq_num); - branchPred.update(seq_num, tid); + branchPred->update(seq_num, tid); } bpred_req->done(); @@ -165,18 +166,16 @@ BranchPredictor::squash(DynInstPtr inst, int squash_stage, // update due to branch resolution if (squash_stage >= ThePipeline::BackEndStartStage) { - branchPred.squash(bpred_squash_num, - inst->pcState(), - inst->pcState().branching(), - tid); + branchPred->squash(bpred_squash_num, inst->pcState(), + inst->pcState().branching(), tid); } else { // update due to predicted taken branch - branchPred.squash(bpred_squash_num, tid); + branchPred->squash(bpred_squash_num, tid); } } void BranchPredictor::instGraduated(InstSeqNum seq_num, ThreadID tid) { - branchPred.update(seq_num, tid); + branchPred->update(seq_num, tid); } diff --git a/src/cpu/inorder/resources/branch_predictor.hh b/src/cpu/inorder/resources/branch_predictor.hh index dde340ce7..e2b7fba52 100644 --- a/src/cpu/inorder/resources/branch_predictor.hh +++ b/src/cpu/inorder/resources/branch_predictor.hh @@ -36,7 +36,7 @@ #include #include -#include "cpu/inorder/resources/bpred_unit.hh" +#include "cpu/pred/bpred_unit.hh" #include "cpu/inorder/cpu.hh" #include "cpu/inorder/inorder_dyn_inst.hh" #include "cpu/inorder/pipeline_traits.hh" @@ -70,7 +70,7 @@ class BranchPredictor : public Resource { /** List of instructions this resource is currently * processing. */ - BPredUnit branchPred; + BPredUnit *branchPred; int instSize; diff --git a/src/cpu/o3/O3CPU.py b/src/cpu/o3/O3CPU.py index 5fec3c547..4f720a8f6 100644 --- a/src/cpu/o3/O3CPU.py +++ b/src/cpu/o3/O3CPU.py @@ -32,6 +32,7 @@ from m5.proxy import * from BaseCPU import BaseCPU from FUPool import * from O3Checker import O3Checker +from BranchPredictor import BranchPredictor class DerivO3CPU(BaseCPU): type = 'DerivO3CPU' @@ -84,22 +85,6 @@ class DerivO3CPU(BaseCPU): backComSize = Param.Unsigned(5, "Time buffer size for backwards communication") forwardComSize = Param.Unsigned(5, "Time buffer size for forward communication") - predType = Param.String("tournament", "Branch predictor type ('local', 'tournament')") - localPredictorSize = Param.Unsigned(2048, "Size of local predictor") - localCtrBits = Param.Unsigned(2, "Bits per counter") - localHistoryTableSize = Param.Unsigned(2048, "Size of local history table") - localHistoryBits = Param.Unsigned(11, "Bits for the local history") - globalPredictorSize = Param.Unsigned(8192, "Size of global predictor") - globalCtrBits = Param.Unsigned(2, "Bits per counter") - globalHistoryBits = Param.Unsigned(13, "Bits of history") - choicePredictorSize = Param.Unsigned(8192, "Size of choice predictor") - choiceCtrBits = Param.Unsigned(2, "Bits of choice counters") - - BTBEntries = Param.Unsigned(4096, "Number of BTB entries") - BTBTagSize = Param.Unsigned(16, "Size of the BTB tags, in bits") - - RASSize = Param.Unsigned(16, "RAS size") - LQEntries = Param.Unsigned(32, "Number of load queue entries") SQEntries = Param.Unsigned(32, "Number of store queue entries") LSQDepCheckShift = Param.Unsigned(4, "Number of places to shift addr before check") @@ -118,8 +103,6 @@ class DerivO3CPU(BaseCPU): numIQEntries = Param.Unsigned(64, "Number of instruction queue entries") numROBEntries = Param.Unsigned(192, "Number of reorder buffer entries") - instShiftAmt = Param.Unsigned(2, "Number of bits to shift instructions by") - smtNumFetchingThreads = Param.Unsigned(1, "SMT Number of Fetching Threads") smtFetchPolicy = Param.String('SingleThread', "SMT Fetch policy") smtLSQPolicy = Param.String('Partitioned', "SMT LSQ Sharing Policy") @@ -130,6 +113,7 @@ class DerivO3CPU(BaseCPU): smtROBThreshold = Param.Int(100, "SMT ROB Threshold Sharing Parameter") smtCommitPolicy = Param.String('RoundRobin', "SMT Commit Policy") + branchPred = BranchPredictor(numThreads = Parent.numThreads) needsTSO = Param.Bool(buildEnv['TARGET_ISA'] == 'x86', "Enable TSO Memory model") diff --git a/src/cpu/o3/SConscript b/src/cpu/o3/SConscript index e97f4527b..50b5a8ea4 100755 --- a/src/cpu/o3/SConscript +++ b/src/cpu/o3/SConscript @@ -43,7 +43,6 @@ if 'O3CPU' in env['CPU_MODELS']: SimObject('O3CPU.py') Source('base_dyn_inst.cc') - Source('bpred_unit.cc') Source('commit.cc') Source('cpu.cc') Source('deriv.cc') diff --git a/src/cpu/o3/cpu_policy.hh b/src/cpu/o3/cpu_policy.hh index ed0c31133..eea49ad52 100644 --- a/src/cpu/o3/cpu_policy.hh +++ b/src/cpu/o3/cpu_policy.hh @@ -31,7 +31,6 @@ #ifndef __CPU_O3_CPU_POLICY_HH__ #define __CPU_O3_CPU_POLICY_HH__ -#include "cpu/o3/bpred_unit.hh" #include "cpu/o3/comm.hh" #include "cpu/o3/commit.hh" #include "cpu/o3/decode.hh" @@ -60,10 +59,6 @@ template struct SimpleCPUPolicy { - /** Typedef for the branch prediction unit (which includes the BP, - * RAS, and BTB). - */ - typedef ::BPredUnit BPredUnit; /** Typedef for the register file. Most classes assume a unified * physical register file. */ diff --git a/src/cpu/o3/deriv.cc b/src/cpu/o3/deriv.cc index fb10934d5..9f41b989f 100644 --- a/src/cpu/o3/deriv.cc +++ b/src/cpu/o3/deriv.cc @@ -65,7 +65,5 @@ DerivO3CPUParams::create() else smtFetchPolicy = smtFetchPolicy; - instShiftAmt = 2; - return new DerivO3CPU(this); } diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh index fb17a9247..23245d496 100644 --- a/src/cpu/o3/fetch.hh +++ b/src/cpu/o3/fetch.hh @@ -49,6 +49,7 @@ #include "base/statistics.hh" #include "config/the_isa.hh" #include "cpu/pc_event.hh" +#include "cpu/pred/bpred_unit.hh" #include "cpu/timebuf.hh" #include "cpu/translation.hh" #include "mem/packet.hh" @@ -76,7 +77,6 @@ class DefaultFetch typedef typename Impl::O3CPU O3CPU; /** Typedefs from the CPU policy. */ - typedef typename CPUPol::BPredUnit BPredUnit; typedef typename CPUPol::FetchStruct FetchStruct; typedef typename CPUPol::TimeStruct TimeStruct; @@ -405,7 +405,7 @@ class DefaultFetch typename TimeBuffer::wire toDecode; /** BPredUnit. */ - BPredUnit branchPred; + BPredUnit *branchPred; TheISA::PCState pc[Impl::MaxThreads]; diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh index f531203d9..07033fc8a 100644 --- a/src/cpu/o3/fetch_impl.hh +++ b/src/cpu/o3/fetch_impl.hh @@ -73,7 +73,6 @@ using namespace std; template DefaultFetch::DefaultFetch(O3CPU *_cpu, DerivO3CPUParams *params) : cpu(_cpu), - branchPred(params), decodeToFetchDelay(params->decodeToFetchDelay), renameToFetchDelay(params->renameToFetchDelay), iewToFetchDelay(params->iewToFetchDelay), @@ -129,6 +128,8 @@ DefaultFetch::DefaultFetch(O3CPU *_cpu, DerivO3CPUParams *params) cacheData[i] = NULL; decoder[i] = new TheISA::Decoder; } + + branchPred = params->branchPred; } template @@ -259,8 +260,6 @@ DefaultFetch::regStats() .desc("Number of inst fetches per cycle") .flags(Stats::total); fetchRate = fetchedInsts / cpu->numCycles; - - branchPred.regStats(); } template @@ -437,7 +436,7 @@ DefaultFetch::drainSanityCheck() const assert(fetchStatus[i] == Idle || stalls[i].drain); } - branchPred.drainSanityCheck(); + branchPred->drainSanityCheck(); } template @@ -470,7 +469,6 @@ DefaultFetch::takeOverFrom() assert(cpu->getInstPort().isConnected()); resetStage(); - branchPred.takeOverFrom(); } template @@ -537,7 +535,8 @@ DefaultFetch::lookupAndUpdateNextPC( } ThreadID tid = inst->threadNumber; - predict_taken = branchPred.predict(inst, nextPC, tid); + predict_taken = branchPred->predict(inst->staticInst, inst->seqNum, + nextPC, tid); if (predict_taken) { DPRINTF(Fetch, "[tid:%i]: [sn:%i]: Branch predicted to be taken to %s.\n", @@ -990,12 +989,12 @@ DefaultFetch::checkSignalsAndUpdate(ThreadID tid) // invalid state we generated in after sequence number if (fromCommit->commitInfo[tid].mispredictInst && fromCommit->commitInfo[tid].mispredictInst->isControl()) { - branchPred.squash(fromCommit->commitInfo[tid].doneSeqNum, + branchPred->squash(fromCommit->commitInfo[tid].doneSeqNum, fromCommit->commitInfo[tid].pc, fromCommit->commitInfo[tid].branchTaken, tid); } else { - branchPred.squash(fromCommit->commitInfo[tid].doneSeqNum, + branchPred->squash(fromCommit->commitInfo[tid].doneSeqNum, tid); } @@ -1003,7 +1002,7 @@ DefaultFetch::checkSignalsAndUpdate(ThreadID tid) } else if (fromCommit->commitInfo[tid].doneSeqNum) { // Update the branch predictor if it wasn't a squashed instruction // that was broadcasted. - branchPred.update(fromCommit->commitInfo[tid].doneSeqNum, tid); + branchPred->update(fromCommit->commitInfo[tid].doneSeqNum, tid); } // Check ROB squash signals from commit. @@ -1023,12 +1022,12 @@ DefaultFetch::checkSignalsAndUpdate(ThreadID tid) // Update the branch predictor. if (fromDecode->decodeInfo[tid].branchMispredict) { - branchPred.squash(fromDecode->decodeInfo[tid].doneSeqNum, + branchPred->squash(fromDecode->decodeInfo[tid].doneSeqNum, fromDecode->decodeInfo[tid].nextPC, fromDecode->decodeInfo[tid].branchTaken, tid); } else { - branchPred.squash(fromDecode->decodeInfo[tid].doneSeqNum, + branchPred->squash(fromDecode->decodeInfo[tid].doneSeqNum, tid); } diff --git a/src/cpu/o3/sat_counter.cc b/src/cpu/o3/sat_counter.cc deleted file mode 100644 index 68d3ef627..000000000 --- a/src/cpu/o3/sat_counter.cc +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright (c) 2005 The Regents of The University of Michigan - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Authors: Kevin Lim - */ - -#include "base/misc.hh" -#include "cpu/o3/sat_counter.hh" - -SatCounter::SatCounter() - : initialVal(0), counter(0) -{ -} - -SatCounter::SatCounter(unsigned bits) - : initialVal(0), maxVal((1 << bits) - 1), counter(0) -{ -} - -SatCounter::SatCounter(unsigned bits, uint8_t initial_val) - : initialVal(initialVal), maxVal((1 << bits) - 1), counter(initial_val) -{ - // Check to make sure initial value doesn't exceed the max counter value. - if (initial_val > maxVal) { - fatal("BP: Initial counter value exceeds max size."); - } -} - -void -SatCounter::setBits(unsigned bits) -{ - maxVal = (1 << bits) - 1; -} diff --git a/src/cpu/pred/2bit_local.cc b/src/cpu/pred/2bit_local.cc index 4d18c419b..0fd0a10d3 100644 --- a/src/cpu/pred/2bit_local.cc +++ b/src/cpu/pred/2bit_local.cc @@ -34,12 +34,11 @@ #include "cpu/pred/2bit_local.hh" #include "debug/Fetch.hh" -LocalBP::LocalBP(unsigned _localPredictorSize, - unsigned _localCtrBits, - unsigned _instShiftAmt) - : localPredictorSize(_localPredictorSize), - localCtrBits(_localCtrBits), - instShiftAmt(_instShiftAmt) +LocalBP::LocalBP(const Params *params) + : BPredUnit(params), + localPredictorSize(params->localPredictorSize), + localCtrBits(params->localCtrBits), + instShiftAmt(params->instShiftAmt) { if (!isPowerOf2(localPredictorSize)) { fatal("Invalid local predictor size!\n"); @@ -54,20 +53,20 @@ LocalBP::LocalBP(unsigned _localPredictorSize, // Setup the index mask. indexMask = localPredictorSets - 1; - DPRINTF(Fetch, "Branch predictor: index mask: %#x\n", indexMask); + DPRINTF(Fetch, "index mask: %#x\n", indexMask); // Setup the array of counters for the local predictor. localCtrs.resize(localPredictorSets); for (unsigned i = 0; i < localPredictorSets; ++i) - localCtrs[i].setBits(_localCtrBits); + localCtrs[i].setBits(localCtrBits); - DPRINTF(Fetch, "Branch predictor: local predictor size: %i\n", + DPRINTF(Fetch, "local predictor size: %i\n", localPredictorSize); - DPRINTF(Fetch, "Branch predictor: local counter bits: %i\n", localCtrBits); + DPRINTF(Fetch, "local counter bits: %i\n", localCtrBits); - DPRINTF(Fetch, "Branch predictor: instruction shift amount: %i\n", + DPRINTF(Fetch, "instruction shift amount: %i\n", instShiftAmt); } @@ -80,7 +79,7 @@ LocalBP::reset() } void -LocalBP::BTBUpdate(Addr &branch_addr, void * &bp_history) +LocalBP::btbUpdate(Addr branch_addr, void * &bp_history) { // Place holder for a function that is called to update predictor history when // a BTB entry is invalid or not found. @@ -88,18 +87,18 @@ LocalBP::BTBUpdate(Addr &branch_addr, void * &bp_history) bool -LocalBP::lookup(Addr &branch_addr, void * &bp_history) +LocalBP::lookup(Addr branch_addr, void * &bp_history) { bool taken; uint8_t counter_val; unsigned local_predictor_idx = getLocalIndex(branch_addr); - DPRINTF(Fetch, "Branch predictor: Looking up index %#x\n", + DPRINTF(Fetch, "Looking up index %#x\n", local_predictor_idx); counter_val = localCtrs[local_predictor_idx].read(); - DPRINTF(Fetch, "Branch predictor: prediction is %i.\n", + DPRINTF(Fetch, "prediction is %i.\n", (int)counter_val); taken = getPrediction(counter_val); @@ -107,10 +106,10 @@ LocalBP::lookup(Addr &branch_addr, void * &bp_history) #if 0 // Speculative update. if (taken) { - DPRINTF(Fetch, "Branch predictor: Branch updated as taken.\n"); + DPRINTF(Fetch, "Branch updated as taken.\n"); localCtrs[local_predictor_idx].increment(); } else { - DPRINTF(Fetch, "Branch predictor: Branch updated as not taken.\n"); + DPRINTF(Fetch, "Branch updated as not taken.\n"); localCtrs[local_predictor_idx].decrement(); } #endif @@ -119,7 +118,7 @@ LocalBP::lookup(Addr &branch_addr, void * &bp_history) } void -LocalBP::update(Addr &branch_addr, bool taken, void *bp_history) +LocalBP::update(Addr branch_addr, bool taken, void *bp_history, bool squashed) { assert(bp_history == NULL); unsigned local_predictor_idx; @@ -127,14 +126,13 @@ LocalBP::update(Addr &branch_addr, bool taken, void *bp_history) // Update the local predictor. local_predictor_idx = getLocalIndex(branch_addr); - DPRINTF(Fetch, "Branch predictor: Looking up index %#x\n", - local_predictor_idx); + DPRINTF(Fetch, "Looking up index %#x\n", local_predictor_idx); if (taken) { - DPRINTF(Fetch, "Branch predictor: Branch updated as taken.\n"); + DPRINTF(Fetch, "Branch updated as taken.\n"); localCtrs[local_predictor_idx].increment(); } else { - DPRINTF(Fetch, "Branch predictor: Branch updated as not taken.\n"); + DPRINTF(Fetch, "Branch updated as not taken.\n"); localCtrs[local_predictor_idx].decrement(); } } @@ -153,3 +151,8 @@ LocalBP::getLocalIndex(Addr &branch_addr) { return (branch_addr >> instShiftAmt) & indexMask; } + +void +LocalBP::uncondBranch(void *&bp_history) +{ +} diff --git a/src/cpu/pred/2bit_local.hh b/src/cpu/pred/2bit_local.hh index 01a0b64db..23683cc67 100644 --- a/src/cpu/pred/2bit_local.hh +++ b/src/cpu/pred/2bit_local.hh @@ -38,15 +38,17 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Kevin Lim + * Timothy M. Jones */ -#ifndef __CPU_O3_2BIT_LOCAL_PRED_HH__ -#define __CPU_O3_2BIT_LOCAL_PRED_HH__ +#ifndef __CPU_PRED_2BIT_LOCAL_PRED_HH__ +#define __CPU_PRED_2BIT_LOCAL_PRED_HH__ #include #include "base/types.hh" -#include "cpu/o3/sat_counter.hh" +#include "cpu/pred/bpred_unit.hh" +#include "cpu/pred/sat_counter.hh" /** * Implements a local predictor that uses the PC to index into a table of @@ -55,17 +57,15 @@ * predictor state that needs to be recorded or updated; the update can be * determined solely by the branch being taken or not taken. */ -class LocalBP +class LocalBP : public BPredUnit { public: /** * Default branch predictor constructor. - * @param localPredictorSize Size of the local predictor. - * @param localCtrBits Number of bits per counter. - * @param instShiftAmt Offset amount for instructions to ignore alignment. */ - LocalBP(unsigned localPredictorSize, unsigned localCtrBits, - unsigned instShiftAmt); + LocalBP(const Params *params); + + virtual void uncondBranch(void * &bp_history); /** * Looks up the given address in the branch predictor and returns @@ -74,7 +74,7 @@ class LocalBP * @param bp_history Pointer to any bp history state. * @return Whether or not the branch is taken. */ - bool lookup(Addr &branch_addr, void * &bp_history); + bool lookup(Addr branch_addr, void * &bp_history); /** * Updates the branch predictor to Not Taken if a BTB entry is @@ -83,14 +83,14 @@ class LocalBP * @param bp_history Pointer to any bp history state. * @return Whether or not the branch is taken. */ - void BTBUpdate(Addr &branch_addr, void * &bp_history); + void btbUpdate(Addr branch_addr, void * &bp_history); /** * Updates the branch predictor with the actual result of a branch. * @param branch_addr The address of the branch to update. * @param taken Whether or not the branch was taken. */ - void update(Addr &branch_addr, bool taken, void *bp_history); + void update(Addr branch_addr, bool taken, void *bp_history, bool squashed); void squash(void *bp_history) { assert(bp_history == NULL); } @@ -128,4 +128,4 @@ class LocalBP unsigned indexMask; }; -#endif // __CPU_O3_2BIT_LOCAL_PRED_HH__ +#endif // __CPU_PRED_2BIT_LOCAL_PRED_HH__ diff --git a/src/cpu/pred/BranchPredictor.py b/src/cpu/pred/BranchPredictor.py new file mode 100644 index 000000000..21001b360 --- /dev/null +++ b/src/cpu/pred/BranchPredictor.py @@ -0,0 +1,54 @@ +# Copyright (c) 2012 Mark D. Hill and David A. Wood +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Nilay Vaish + +from m5.SimObject import SimObject +from m5.params import * + +class BranchPredictor(SimObject): + type = 'BranchPredictor' + cxx_class = 'BPredUnit' + cxx_header = "cpu/pred/bpred_unit.hh" + + numThreads = Param.Unsigned(1, "Number of threads") + predType = Param.String("tournament", + "Branch predictor type ('local', 'tournament')") + localPredictorSize = Param.Unsigned(2048, "Size of local predictor") + localCtrBits = Param.Unsigned(2, "Bits per counter") + localHistoryTableSize = Param.Unsigned(2048, "Size of local history table") + localHistoryBits = Param.Unsigned(11, "Bits for the local history") + globalPredictorSize = Param.Unsigned(8192, "Size of global predictor") + globalCtrBits = Param.Unsigned(2, "Bits per counter") + globalHistoryBits = Param.Unsigned(13, "Bits of history") + choicePredictorSize = Param.Unsigned(8192, "Size of choice predictor") + choiceCtrBits = Param.Unsigned(2, "Bits of choice counters") + + BTBEntries = Param.Unsigned(4096, "Number of BTB entries") + BTBTagSize = Param.Unsigned(16, "Size of the BTB tags, in bits") + + RASSize = Param.Unsigned(16, "RAS size") + instShiftAmt = Param.Unsigned(2, "Number of bits to shift instructions by") diff --git a/src/cpu/pred/SConscript b/src/cpu/pred/SConscript index 742c132c7..d30a7a04d 100644 --- a/src/cpu/pred/SConscript +++ b/src/cpu/pred/SConscript @@ -31,8 +31,12 @@ Import('*') if 'InOrderCPU' in env['CPU_MODELS'] or 'O3CPU' in env['CPU_MODELS']: + SimObject('BranchPredictor.py') + + Source('bpred_unit.cc') Source('2bit_local.cc') Source('btb.cc') Source('ras.cc') Source('tournament.cc') DebugFlag('FreeList') + DebugFlag('Branch') diff --git a/src/cpu/o3/bpred_unit.cc b/src/cpu/pred/bpred_unit.cc similarity index 75% rename from src/cpu/o3/bpred_unit.cc rename to src/cpu/pred/bpred_unit.cc index 08fd4e8ea..52a77119c 100644 --- a/src/cpu/o3/bpred_unit.cc +++ b/src/cpu/pred/bpred_unit.cc @@ -1,5 +1,7 @@ /* * Copyright (c) 2004-2006 The Regents of The University of Michigan + * Copyright (c) 2010 The University of Edinburgh + * Copyright (c) 2012 Mark D. Hill and David A. Wood * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -26,9 +28,22 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Kevin Lim + * Timothy M. Jones */ -#include "cpu/o3/bpred_unit_impl.hh" -#include "cpu/o3/isa_specific.hh" +#include "cpu/pred/2bit_local.hh" +#include "cpu/pred/bpred_unit_impl.hh" +#include "cpu/pred/tournament.hh" -template class BPredUnit; +BPredUnit * +BranchPredictorParams::create() +{ + // Setup the selected predictor. + if (predType == "local") { + return new LocalBP(this); + } else if (predType == "tournament") { + return new TournamentBP(this); + } else { + fatal("Invalid BP selected!"); + } +} diff --git a/src/cpu/o3/bpred_unit.hh b/src/cpu/pred/bpred_unit.hh similarity index 86% rename from src/cpu/o3/bpred_unit.hh rename to src/cpu/pred/bpred_unit.hh index 3f83f9bef..95f9a3573 100644 --- a/src/cpu/o3/bpred_unit.hh +++ b/src/cpu/pred/bpred_unit.hh @@ -1,5 +1,6 @@ /* * Copyright (c) 2011-2012 ARM Limited + * Copyright (c) 2010 The University of Edinburgh * All rights reserved * * The license below extends only to copyright in the software and shall @@ -38,50 +39,37 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Kevin Lim + * Korey Sewell + * Timothy M. Jones + * Nilay Vaish */ -#ifndef __CPU_O3_BPRED_UNIT_HH__ -#define __CPU_O3_BPRED_UNIT_HH__ +#ifndef __CPU_PRED_BPRED_UNIT_HH__ +#define __CPU_PRED_BPRED_UNIT_HH__ #include #include "base/statistics.hh" #include "base/types.hh" -#include "cpu/pred/2bit_local.hh" #include "cpu/pred/btb.hh" #include "cpu/pred/ras.hh" -#include "cpu/pred/tournament.hh" #include "cpu/inst_seq.hh" - -struct DerivO3CPUParams; +#include "cpu/static_inst.hh" +#include "params/BranchPredictor.hh" +#include "sim/sim_object.hh" /** * Basically a wrapper class to hold both the branch predictor * and the BTB. */ -template -class BPredUnit +class BPredUnit : public SimObject { - private: - typedef typename Impl::DynInstPtr DynInstPtr; - - enum PredType { - Local, - Tournament - }; - - PredType predictor; - - const std::string _name; - public: - + typedef BranchPredictorParams Params; /** * @param params The params object, that has the size of the BP and BTB. */ - BPredUnit(DerivO3CPUParams *params); - - const std::string &name() const { return _name; } + BPredUnit(const Params *p); /** * Registers statistics. @@ -91,9 +79,6 @@ class BPredUnit /** Perform sanity checks after a drain. */ void drainSanityCheck() const; - /** Take over execution from another CPU's thread. */ - void takeOverFrom(); - /** * Predicts whether or not the instruction is a taken branch, and the * target of the branch if it is taken. @@ -102,10 +87,14 @@ class BPredUnit * @param tid The thread id. * @return Returns if the branch is taken or not. */ - bool predict(DynInstPtr &inst, TheISA::PCState &pc, ThreadID tid); + bool predict(StaticInstPtr &inst, const InstSeqNum &seqNum, + TheISA::PCState &pc, ThreadID tid); + bool predictInOrder(StaticInstPtr &inst, const InstSeqNum &seqNum, + int asid, TheISA::PCState &instPC, TheISA::PCState &predPC, + ThreadID tid); // @todo: Rename this function. - void BPUncond(void * &bp_history); + virtual void uncondBranch(void * &bp_history) = 0; /** * Tells the branch predictor to commit any updates until the given @@ -140,7 +129,7 @@ class BPredUnit * @param bp_history Pointer to the history object. The predictor * will need to update any state and delete the object. */ - void BPSquash(void *bp_history); + virtual void squash(void *bp_history) = 0; /** * Looks up a given PC in the BP to see if it is taken or not taken. @@ -149,7 +138,7 @@ class BPredUnit * has the branch predictor state associated with the lookup. * @return Whether the branch is taken or not taken. */ - bool BPLookup(Addr instPC, void * &bp_history); + virtual bool lookup(Addr instPC, void * &bp_history) = 0; /** * If a branch is not taken, because the BTB address is invalid or missing, @@ -159,7 +148,7 @@ class BPredUnit * @param bp_history Pointer that will be set to an object that * has the branch predictor state associated with the lookup. */ - void BPBTBUpdate(Addr instPC, void * &bp_history); + virtual void btbUpdate(Addr instPC, void * &bp_history) = 0; /** * Looks up a given PC in the BTB to see if a matching entry exists. @@ -187,7 +176,8 @@ class BPredUnit * squash operation. * @todo Make this update flexible enough to handle a global predictor. */ - void BPUpdate(Addr instPC, bool taken, void *bp_history, bool squashed); + virtual void update(Addr instPC, bool taken, void *bp_history, + bool squashed) = 0; /** * Updates the BTB with the target of a branch. @@ -210,7 +200,7 @@ class BPredUnit ThreadID _tid) : seqNum(seq_num), pc(instPC), bpHistory(bp_history), RASTarget(0), RASIndex(0), tid(_tid), predTaken(pred_taken), usedRAS(0), pushedRAS(0), - wasCall(0), wasReturn(0), validBTB(0) + wasCall(0), wasReturn(0) {} bool operator==(const PredictorHistory &entry) const { @@ -252,31 +242,26 @@ class BPredUnit /** Whether or not the instruction was a return. */ bool wasReturn; - /** Whether or not the instruction had a valid BTB entry. */ - bool validBTB; }; typedef std::list History; - typedef typename History::iterator HistoryIt; + typedef History::iterator HistoryIt; + + /** Number of the threads for which the branch history is maintained. */ + uint32_t numThreads; /** * The per-thread predictor history. This is used to update the predictor * as instructions are committed, or restore it to the proper state after * a squash. */ - History predHist[Impl::MaxThreads]; - - /** The local branch predictor. */ - LocalBP *localBP; - - /** The tournament branch predictor. */ - TournamentBP *tournamentBP; + History *predHist; /** The BTB. */ DefaultBTB BTB; /** The per-thread return address stack. */ - ReturnAddrStack RAS[Impl::MaxThreads]; + ReturnAddrStack *RAS; /** Stat for number of BP lookups. */ Stats::Scalar lookups; @@ -290,10 +275,12 @@ class BPredUnit Stats::Scalar BTBHits; /** Stat for number of times the BTB is correct. */ Stats::Scalar BTBCorrect; + /** Stat for percent times an entry in BTB found. */ + Stats::Formula BTBHitPct; /** Stat for number of times the RAS is used to get a target. */ Stats::Scalar usedRAS; /** Stat for number of times the RAS is incorrect. */ Stats::Scalar RASIncorrect; }; -#endif // __CPU_O3_BPRED_UNIT_HH__ +#endif // __CPU_PRED_BPRED_UNIT_HH__ diff --git a/src/cpu/o3/bpred_unit_impl.hh b/src/cpu/pred/bpred_unit_impl.hh similarity index 54% rename from src/cpu/o3/bpred_unit_impl.hh rename to src/cpu/pred/bpred_unit_impl.hh index 43e801710..c3d1b7e9b 100644 --- a/src/cpu/o3/bpred_unit_impl.hh +++ b/src/cpu/pred/bpred_unit_impl.hh @@ -1,5 +1,7 @@ /* * Copyright (c) 2011-2012 ARM Limited + * Copyright (c) 2010 The University of Edinburgh + * Copyright (c) 2012 Mark D. Hill and David A. Wood * All rights reserved * * The license below extends only to copyright in the software and shall @@ -47,45 +49,26 @@ #include "arch/utility.hh" #include "base/trace.hh" #include "config/the_isa.hh" -#include "cpu/o3/bpred_unit.hh" -#include "debug/Fetch.hh" -#include "params/DerivO3CPU.hh" +#include "cpu/pred/bpred_unit.hh" +#include "debug/Branch.hh" -template -BPredUnit::BPredUnit(DerivO3CPUParams *params) - : _name(params->name + ".BPredUnit"), +BPredUnit::BPredUnit(const Params *params) + : SimObject(params), BTB(params->BTBEntries, params->BTBTagSize, params->instShiftAmt) { - // Setup the selected predictor. - if (params->predType == "local") { - localBP = new LocalBP(params->localPredictorSize, - params->localCtrBits, - params->instShiftAmt); - predictor = Local; - } else if (params->predType == "tournament") { - tournamentBP = new TournamentBP(params->localCtrBits, - params->localHistoryTableSize, - params->localHistoryBits, - params->globalPredictorSize, - params->globalHistoryBits, - params->globalCtrBits, - params->choicePredictorSize, - params->choiceCtrBits, - params->instShiftAmt); - predictor = Tournament; - } else { - fatal("Invalid BP selected!"); - } + numThreads = params->numThreads; - for (int i=0; i < Impl::MaxThreads; i++) + predHist = new History[numThreads]; + + RAS = new ReturnAddrStack[numThreads]; + for (int i=0; i < numThreads; i++) RAS[i].init(params->RASSize); } -template void -BPredUnit::regStats() +BPredUnit::regStats() { lookups .name(name() + ".lookups") @@ -118,6 +101,12 @@ BPredUnit::regStats() "work properly.") ; + BTBHitPct + .name(name() + ".BTBHitPct") + .desc("BTB Hit Percentage") + .precision(6); + BTBHitPct = (BTBHits / BTBLookups) * 100; + usedRAS .name(name() + ".usedRAS") .desc("Number of times the RAS was used to get a target.") @@ -129,34 +118,18 @@ BPredUnit::regStats() ; } -template void -BPredUnit::drainSanityCheck() const +BPredUnit::drainSanityCheck() const { // We shouldn't have any outstanding requests when we resume from // a drained system. - for (int i = 0; i < Impl::MaxThreads; ++i) + for (int i = 0; i < numThreads; ++i) assert(predHist[i].empty()); } -template -void -BPredUnit::takeOverFrom() -{ - // Can reset all predictor state, but it's not necessarily better - // than leaving it be. -/* - for (int i = 0; i < Impl::MaxThreads; ++i) - RAS[i].reset(); - - BP.reset(); - BTB.reset(); -*/ -} - -template bool -BPredUnit::predict(DynInstPtr &inst, TheISA::PCState &pc, ThreadID tid) +BPredUnit::predict(StaticInstPtr &inst, const InstSeqNum &seqNum, + TheISA::PCState &pc, ThreadID tid) { // See if branch predictor predicts taken. // If so, get its target addr either from the BTB or the RAS. @@ -171,24 +144,22 @@ BPredUnit::predict(DynInstPtr &inst, TheISA::PCState &pc, ThreadID tid) void *bp_history = NULL; if (inst->isUncondCtrl()) { - DPRINTF(Fetch, "BranchPred: [tid:%i]: Unconditional control.\n", tid); + DPRINTF(Branch, "[tid:%i]: Unconditional control.\n", tid); pred_taken = true; // Tell the BP there was an unconditional branch. - BPUncond(bp_history); + uncondBranch(bp_history); } else { ++condPredicted; - pred_taken = BPLookup(pc.instAddr(), bp_history); + pred_taken = lookup(pc.instAddr(), bp_history); - DPRINTF(Fetch, "BranchPred:[tid:%i]: [sn:%i] Branch predictor" - " predicted %i for PC %s\n", - tid, inst->seqNum, pred_taken, inst->pcState()); + DPRINTF(Branch, "[tid:%i]: [sn:%i] Branch predictor" + " predicted %i for PC %s\n", tid, seqNum, pred_taken, pc); } - DPRINTF(Fetch, "BranchPred: [tid:%i]: [sn:%i] Creating prediction history " - "for PC %s\n", - tid, inst->seqNum, inst->pcState()); + DPRINTF(Branch, "[tid:%i]: [sn:%i] Creating prediction history " + "for PC %s\n", tid, seqNum, pc); - PredictorHistory predict_record(inst->seqNum, pc.instAddr(), + PredictorHistory predict_record(seqNum, pc.instAddr(), pred_taken, bp_history, tid); // Now lookup in the BTB or RAS. @@ -208,133 +179,245 @@ BPredUnit::predict(DynInstPtr &inst, TheISA::PCState &pc, ThreadID tid) RAS[tid].pop(); - DPRINTF(Fetch, "BranchPred: [tid:%i]: Instruction %s is a return, " + DPRINTF(Branch, "[tid:%i]: Instruction %s is a return, " "RAS predicted target: %s, RAS index: %i.\n", - tid, inst->pcState(), target, predict_record.RASIndex); + tid, pc, target, predict_record.RASIndex); } else { ++BTBLookups; if (inst->isCall()) { RAS[tid].push(pc); predict_record.pushedRAS = true; + // Record that it was a call so that the top RAS entry can // be popped off if the speculation is incorrect. predict_record.wasCall = true; - DPRINTF(Fetch, "BranchPred: [tid:%i]: Instruction %s was a " + DPRINTF(Branch, "[tid:%i]: Instruction %s was a " "call, adding %s to the RAS index: %i.\n", - tid, inst->pcState(), pc, RAS[tid].topIdx()); + tid, pc, pc, RAS[tid].topIdx()); } if (BTB.valid(pc.instAddr(), tid)) { ++BTBHits; - predict_record.validBTB = true; // If it's not a return, use the BTB to get the target addr. target = BTB.lookup(pc.instAddr(), tid); - DPRINTF(Fetch, "BranchPred: [tid:%i]: Instruction %s predicted" - " target is %s.\n", tid, inst->pcState(), target); + DPRINTF(Branch, "[tid:%i]: Instruction %s predicted" + " target is %s.\n", tid, pc, target); } else { - DPRINTF(Fetch, "BranchPred: [tid:%i]: BTB doesn't have a " + DPRINTF(Branch, "[tid:%i]: BTB doesn't have a " "valid entry.\n",tid); pred_taken = false; // The Direction of the branch predictor is altered because the // BTB did not have an entry // The predictor needs to be updated accordingly if (!inst->isCall() && !inst->isReturn()) { - BPBTBUpdate(pc.instAddr(), bp_history); - DPRINTF(Fetch, "BranchPred: [tid:%i]:[sn:%i] BPBTBUpdate" - " called for %s\n", - tid, inst->seqNum, inst->pcState()); + btbUpdate(pc.instAddr(), bp_history); + DPRINTF(Branch, "[tid:%i]:[sn:%i] btbUpdate" + " called for %s\n", tid, seqNum, pc); } else if (inst->isCall() && !inst->isUncondCtrl()) { RAS[tid].pop(); predict_record.pushedRAS = false; } - TheISA::advancePC(target, inst->staticInst); + TheISA::advancePC(target, inst); } - } } else { if (inst->isReturn()) { predict_record.wasReturn = true; } - TheISA::advancePC(target, inst->staticInst); + TheISA::advancePC(target, inst); } pc = target; predHist[tid].push_front(predict_record); - DPRINTF(Fetch, "BranchPred: [tid:%i]: [sn:%i]: History entry added." - "predHist.size(): %i\n", tid, inst->seqNum, predHist[tid].size()); + DPRINTF(Branch, "[tid:%i]: [sn:%i]: History entry added." + "predHist.size(): %i\n", tid, seqNum, predHist[tid].size()); + + return pred_taken; +} + +bool +BPredUnit::predictInOrder(StaticInstPtr &inst, const InstSeqNum &seqNum, + int asid, TheISA::PCState &instPC, + TheISA::PCState &predPC, ThreadID tid) +{ + // See if branch predictor predicts taken. + // If so, get its target addr either from the BTB or the RAS. + // Save off record of branch stuff so the RAS can be fixed + // up once it's done. + + using TheISA::MachInst; + + bool pred_taken = false; + TheISA::PCState target; + + ++lookups; + DPRINTF(Branch, "[tid:%i] [sn:%i] %s ... PC %s doing branch " + "prediction\n", tid, seqNum, + inst->disassemble(instPC.instAddr()), instPC); + + void *bp_history = NULL; + + if (inst->isUncondCtrl()) { + DPRINTF(Branch, "[tid:%i] Unconditional control.\n", tid); + pred_taken = true; + // Tell the BP there was an unconditional branch. + uncondBranch(bp_history); + + if (inst->isReturn() && RAS[tid].empty()) { + DPRINTF(Branch, "[tid:%i] RAS is empty, predicting " + "false.\n", tid); + pred_taken = false; + } + } else { + ++condPredicted; + + pred_taken = lookup(predPC.instAddr(), bp_history); + } + + PredictorHistory predict_record(seqNum, predPC.instAddr(), pred_taken, + bp_history, tid); + + // Now lookup in the BTB or RAS. + if (pred_taken) { + if (inst->isReturn()) { + ++usedRAS; + + // If it's a function return call, then look up the address + // in the RAS. + TheISA::PCState rasTop = RAS[tid].top(); + target = TheISA::buildRetPC(instPC, rasTop); + + // Record the top entry of the RAS, and its index. + predict_record.usedRAS = true; + predict_record.RASIndex = RAS[tid].topIdx(); + predict_record.RASTarget = rasTop; + + assert(predict_record.RASIndex < 16); + + RAS[tid].pop(); + + DPRINTF(Branch, "[tid:%i]: Instruction %s is a return, " + "RAS predicted target: %s, RAS index: %i.\n", + tid, instPC, target, + predict_record.RASIndex); + } else { + ++BTBLookups; + + if (inst->isCall()) { + + RAS[tid].push(instPC); + predict_record.pushedRAS = true; + + // Record that it was a call so that the top RAS entry can + // be popped off if the speculation is incorrect. + predict_record.wasCall = true; + + DPRINTF(Branch, "[tid:%i]: Instruction %s was a call" + ", adding %s to the RAS index: %i.\n", + tid, instPC, predPC, + RAS[tid].topIdx()); + } + + if (inst->isCall() && + inst->isUncondCtrl() && + inst->isDirectCtrl()) { + target = inst->branchTarget(instPC); + } else if (BTB.valid(predPC.instAddr(), asid)) { + ++BTBHits; + + // If it's not a return, use the BTB to get the target addr. + target = BTB.lookup(predPC.instAddr(), asid); + + DPRINTF(Branch, "[tid:%i]: [asid:%i] Instruction %s " + "predicted target is %s.\n", + tid, asid, instPC, target); + } else { + DPRINTF(Branch, "[tid:%i]: BTB doesn't have a " + "valid entry, predicting false.\n",tid); + pred_taken = false; + } + } + } + + if (pred_taken) { + // Set the PC and the instruction's predicted target. + predPC = target; + } + DPRINTF(Branch, "[tid:%i]: [sn:%i]: Setting Predicted PC to %s.\n", + tid, seqNum, predPC); + + predHist[tid].push_front(predict_record); + + DPRINTF(Branch, "[tid:%i] [sn:%i] pushed onto front of predHist " + "...predHist.size(): %i\n", + tid, seqNum, predHist[tid].size()); return pred_taken; } -template void -BPredUnit::update(const InstSeqNum &done_sn, ThreadID tid) +BPredUnit::update(const InstSeqNum &done_sn, ThreadID tid) { - DPRINTF(Fetch, "BranchPred: [tid:%i]: Committing branches until " + DPRINTF(Branch, "[tid:%i]: Committing branches until " "[sn:%lli].\n", tid, done_sn); while (!predHist[tid].empty() && predHist[tid].back().seqNum <= done_sn) { // Update the branch predictor with the correct results. - BPUpdate(predHist[tid].back().pc, - predHist[tid].back().predTaken, - predHist[tid].back().bpHistory, false); + update(predHist[tid].back().pc, predHist[tid].back().predTaken, + predHist[tid].back().bpHistory, false); predHist[tid].pop_back(); } } -template void -BPredUnit::squash(const InstSeqNum &squashed_sn, ThreadID tid) +BPredUnit::squash(const InstSeqNum &squashed_sn, ThreadID tid) { History &pred_hist = predHist[tid]; while (!pred_hist.empty() && pred_hist.front().seqNum > squashed_sn) { if (pred_hist.front().usedRAS) { - DPRINTF(Fetch, "BranchPred: [tid:%i]: Restoring top of RAS to: %i," + DPRINTF(Branch, "[tid:%i]: Restoring top of RAS to: %i," " target: %s.\n", tid, pred_hist.front().RASIndex, pred_hist.front().RASTarget); RAS[tid].restore(pred_hist.front().RASIndex, pred_hist.front().RASTarget); } else if(pred_hist.front().wasCall && pred_hist.front().pushedRAS) { - // Was a call but predicated false. Pop RAS here - DPRINTF(Fetch, "BranchPred: [tid: %i] Squashing" - " Call [sn:%i] PC: %s Popping RAS\n", tid, - pred_hist.front().seqNum, pred_hist.front().pc); - RAS[tid].pop(); - } + // Was a call but predicated false. Pop RAS here + DPRINTF(Branch, "[tid: %i] Squashing" + " Call [sn:%i] PC: %s Popping RAS\n", tid, + pred_hist.front().seqNum, pred_hist.front().pc); + RAS[tid].pop(); + } // This call should delete the bpHistory. - BPSquash(pred_hist.front().bpHistory); + squash(pred_hist.front().bpHistory); - DPRINTF(Fetch, "BranchPred: [tid:%i]: Removing history for [sn:%i] " + DPRINTF(Branch, "[tid:%i]: Removing history for [sn:%i] " "PC %s.\n", tid, pred_hist.front().seqNum, pred_hist.front().pc); pred_hist.pop_front(); - DPRINTF(Fetch, "[tid:%i]: predHist.size(): %i\n", + DPRINTF(Branch, "[tid:%i]: predHist.size(): %i\n", tid, predHist[tid].size()); } - } -template void -BPredUnit::squash(const InstSeqNum &squashed_sn, - const TheISA::PCState &corrTarget, - bool actually_taken, - ThreadID tid) +BPredUnit::squash(const InstSeqNum &squashed_sn, + const TheISA::PCState &corrTarget, + bool actually_taken, ThreadID tid) { // Now that we know that a branch was mispredicted, we need to undo // all the branches that have been seen up until this branch and @@ -351,9 +434,8 @@ BPredUnit::squash(const InstSeqNum &squashed_sn, ++condIncorrect; - DPRINTF(Fetch, "BranchPred: [tid:%i]: Squashing from sequence number %i, " - "setting target to %s.\n", - tid, squashed_sn, corrTarget); + DPRINTF(Branch, "[tid:%i]: Squashing from sequence number %i, " + "setting target to %s.\n", tid, squashed_sn, corrTarget); // Squash All Branches AFTER this mispredicted branch squash(squashed_sn, tid); @@ -369,7 +451,7 @@ BPredUnit::squash(const InstSeqNum &squashed_sn, //assert(hist_it != pred_hist.end()); if (pred_hist.front().seqNum != squashed_sn) { - DPRINTF(Fetch, "Front sn %i != Squash sn %i\n", + DPRINTF(Branch, "Front sn %i != Squash sn %i\n", pred_hist.front().seqNum, squashed_sn); assert(pred_hist.front().seqNum == squashed_sn); @@ -380,121 +462,60 @@ BPredUnit::squash(const InstSeqNum &squashed_sn, ++RASIncorrect; } - BPUpdate((*hist_it).pc, actually_taken, - pred_hist.front().bpHistory, true); + update((*hist_it).pc, actually_taken, + pred_hist.front().bpHistory, true); if (actually_taken) { if (hist_it->wasReturn && !hist_it->usedRAS) { - DPRINTF(Fetch, "BranchPred: [tid: %i] Incorrectly predicted" - " return [sn:%i] PC: %s\n", tid, hist_it->seqNum, - hist_it->pc); + DPRINTF(Branch, "[tid: %i] Incorrectly predicted" + " return [sn:%i] PC: %s\n", tid, hist_it->seqNum, + hist_it->pc); RAS[tid].pop(); } - DPRINTF(Fetch,"BranchPred: [tid: %i] BTB Update called for [sn:%i]" - " PC: %s\n", tid,hist_it->seqNum, hist_it->pc); + DPRINTF(Branch,"[tid: %i] BTB Update called for [sn:%i]" + " PC: %s\n", tid,hist_it->seqNum, hist_it->pc); BTB.update((*hist_it).pc, corrTarget, tid); } else { //Actually not Taken if (hist_it->usedRAS) { - DPRINTF(Fetch,"BranchPred: [tid: %i] Incorrectly predicted" - " return [sn:%i] PC: %s Restoring RAS\n", tid, - hist_it->seqNum, hist_it->pc); - DPRINTF(Fetch, "BranchPred: [tid:%i]: Restoring top of RAS" - " to: %i, target: %s.\n", tid, - hist_it->RASIndex, hist_it->RASTarget); + DPRINTF(Branch,"[tid: %i] Incorrectly predicted" + " return [sn:%i] PC: %s Restoring RAS\n", tid, + hist_it->seqNum, hist_it->pc); + DPRINTF(Branch, "[tid:%i]: Restoring top of RAS" + " to: %i, target: %s.\n", tid, + hist_it->RASIndex, hist_it->RASTarget); RAS[tid].restore(hist_it->RASIndex, hist_it->RASTarget); } else if (hist_it->wasCall && hist_it->pushedRAS) { //Was a Call but predicated false. Pop RAS here - DPRINTF(Fetch, "BranchPred: [tid: %i] Incorrectly predicted" - " Call [sn:%i] PC: %s Popping RAS\n", tid, - hist_it->seqNum, hist_it->pc); + DPRINTF(Branch, "[tid: %i] Incorrectly predicted" + " Call [sn:%i] PC: %s Popping RAS\n", tid, + hist_it->seqNum, hist_it->pc); RAS[tid].pop(); } } - DPRINTF(Fetch, "BranchPred: [tid:%i]: Removing history for [sn:%i]" - " PC %s Actually Taken: %i\n", tid, hist_it->seqNum, - hist_it->pc, actually_taken); + DPRINTF(Branch, "[tid:%i]: Removing history for [sn:%i]" + " PC %s Actually Taken: %i\n", tid, hist_it->seqNum, + hist_it->pc, actually_taken); pred_hist.erase(hist_it); - DPRINTF(Fetch, "[tid:%i]: predHist.size(): %i\n", tid, + DPRINTF(Branch, "[tid:%i]: predHist.size(): %i\n", tid, predHist[tid].size()); - } -} - -template -void -BPredUnit::BPUncond(void * &bp_history) -{ - // Only the tournament predictor cares about unconditional branches. - if (predictor == Tournament) { - tournamentBP->uncondBr(bp_history); - } -} - -template -void -BPredUnit::BPSquash(void *bp_history) -{ - if (predictor == Local) { - localBP->squash(bp_history); - } else if (predictor == Tournament) { - tournamentBP->squash(bp_history); - } else { - panic("Predictor type is unexpected value!"); - } -} - -template -bool -BPredUnit::BPLookup(Addr instPC, void * &bp_history) -{ - if (predictor == Local) { - return localBP->lookup(instPC, bp_history); - } else if (predictor == Tournament) { - return tournamentBP->lookup(instPC, bp_history); - } else { - panic("Predictor type is unexpected value!"); - } -} - -template -void -BPredUnit::BPBTBUpdate(Addr instPC, void * &bp_history) -{ - if (predictor == Local) { - return localBP->BTBUpdate(instPC, bp_history); - } else if (predictor == Tournament) { - return tournamentBP->BTBUpdate(instPC, bp_history); - } else { - panic("Predictor type is unexpected value!"); - } -} - -template -void -BPredUnit::BPUpdate(Addr instPC, bool taken, void *bp_history, - bool squashed) -{ - if (predictor == Local) { - localBP->update(instPC, taken, bp_history); - } else if (predictor == Tournament) { - tournamentBP->update(instPC, taken, bp_history, squashed); } else { - panic("Predictor type is unexpected value!"); + DPRINTF(Branch, "[tid:%i]: [sn:%i] pred_hist empty, can't " + "update.\n", tid, squashed_sn); } } -template void -BPredUnit::dump() +BPredUnit::dump() { HistoryIt pred_hist_it; - for (int i = 0; i < Impl::MaxThreads; ++i) { + for (int i = 0; i < numThreads; ++i) { if (!predHist[i].empty()) { pred_hist_it = predHist[i].begin(); diff --git a/src/cpu/pred/btb.hh b/src/cpu/pred/btb.hh index 814b23872..3a773e40d 100644 --- a/src/cpu/pred/btb.hh +++ b/src/cpu/pred/btb.hh @@ -28,8 +28,8 @@ * Authors: Kevin Lim */ -#ifndef __CPU_O3_BTB_HH__ -#define __CPU_O3_BTB_HH__ +#ifndef __CPU_PRED_BTB_HH__ +#define __CPU_PRED_BTB_HH__ #include "arch/types.hh" #include "base/misc.hh" @@ -127,4 +127,4 @@ class DefaultBTB unsigned tagShiftAmt; }; -#endif // __CPU_O3_BTB_HH__ +#endif // __CPU_PRED_BTB_HH__ diff --git a/src/cpu/pred/ras.hh b/src/cpu/pred/ras.hh index ab92b34c2..924543eac 100644 --- a/src/cpu/pred/ras.hh +++ b/src/cpu/pred/ras.hh @@ -28,8 +28,8 @@ * Authors: Kevin Lim */ -#ifndef __CPU_O3_RAS_HH__ -#define __CPU_O3_RAS_HH__ +#ifndef __CPU_PRED_RAS_HH__ +#define __CPU_PRED_RAS_HH__ #include @@ -99,4 +99,4 @@ class ReturnAddrStack unsigned tos; }; -#endif // __CPU_O3_RAS_HH__ +#endif // __CPU_PRED_RAS_HH__ diff --git a/src/cpu/o3/sat_counter.hh b/src/cpu/pred/sat_counter.hh similarity index 96% rename from src/cpu/o3/sat_counter.hh rename to src/cpu/pred/sat_counter.hh index 17ff8546b..1294a4e08 100644 --- a/src/cpu/o3/sat_counter.hh +++ b/src/cpu/pred/sat_counter.hh @@ -28,8 +28,8 @@ * Authors: Kevin Lim */ -#ifndef __CPU_O3_SAT_COUNTER_HH__ -#define __CPU_O3_SAT_COUNTER_HH__ +#ifndef __CPU_PRED_SAT_COUNTER_HH__ +#define __CPU_PRED_SAT_COUNTER_HH__ #include "base/misc.hh" #include "base/types.hh" @@ -114,4 +114,4 @@ class SatCounter uint8_t counter; }; -#endif // __CPU_O3_SAT_COUNTER_HH__ +#endif // __CPU_PRED_SAT_COUNTER_HH__ diff --git a/src/cpu/pred/tournament.cc b/src/cpu/pred/tournament.cc index f36d30ae5..52a05960f 100644 --- a/src/cpu/pred/tournament.cc +++ b/src/cpu/pred/tournament.cc @@ -44,24 +44,17 @@ #include "base/intmath.hh" #include "cpu/pred/tournament.hh" -TournamentBP::TournamentBP(unsigned _localCtrBits, - unsigned _localHistoryTableSize, - unsigned _localHistoryBits, - unsigned _globalPredictorSize, - unsigned _globalHistoryBits, - unsigned _globalCtrBits, - unsigned _choicePredictorSize, - unsigned _choiceCtrBits, - unsigned _instShiftAmt) - : localCtrBits(_localCtrBits), - localHistoryTableSize(_localHistoryTableSize), - localHistoryBits(_localHistoryBits), - globalPredictorSize(_globalPredictorSize), - globalCtrBits(_globalCtrBits), - globalHistoryBits(_globalHistoryBits), - choicePredictorSize(_choicePredictorSize), - choiceCtrBits(_choiceCtrBits), - instShiftAmt(_instShiftAmt) +TournamentBP::TournamentBP(const Params *params) + : BPredUnit(params), + localCtrBits(params->localCtrBits), + localHistoryTableSize(params->localHistoryTableSize), + localHistoryBits(params->localHistoryBits), + globalPredictorSize(params->globalPredictorSize), + globalCtrBits(params->globalCtrBits), + globalHistoryBits(params->globalHistoryBits), + choicePredictorSize(params->choicePredictorSize), + choiceCtrBits(params->choiceCtrBits), + instShiftAmt(params->instShiftAmt) { localPredictorSize = ULL(1) << localHistoryBits; @@ -178,7 +171,7 @@ TournamentBP::updateLocalHistNotTaken(unsigned local_history_idx) void -TournamentBP::BTBUpdate(Addr &branch_addr, void * &bp_history) +TournamentBP::btbUpdate(Addr branch_addr, void * &bp_history) { unsigned local_history_idx = calcLocHistIdx(branch_addr); //Update Global History to Not Taken (clear LSB) @@ -189,7 +182,7 @@ TournamentBP::BTBUpdate(Addr &branch_addr, void * &bp_history) } bool -TournamentBP::lookup(Addr &branch_addr, void * &bp_history) +TournamentBP::lookup(Addr branch_addr, void * &bp_history) { bool local_prediction; unsigned local_history_idx; @@ -249,7 +242,7 @@ TournamentBP::lookup(Addr &branch_addr, void * &bp_history) } void -TournamentBP::uncondBr(void * &bp_history) +TournamentBP::uncondBranch(void * &bp_history) { // Create BPHistory and pass it back to be recorded. BPHistory *history = new BPHistory; @@ -264,7 +257,7 @@ TournamentBP::uncondBr(void * &bp_history) } void -TournamentBP::update(Addr &branch_addr, bool taken, void *bp_history, +TournamentBP::update(Addr branch_addr, bool taken, void *bp_history, bool squashed) { unsigned local_history_idx; diff --git a/src/cpu/pred/tournament.hh b/src/cpu/pred/tournament.hh index 35cfd8455..39fff5bfb 100644 --- a/src/cpu/pred/tournament.hh +++ b/src/cpu/pred/tournament.hh @@ -38,15 +38,18 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Kevin Lim + * Timothy M. Jones + * Nilay Vaish */ -#ifndef __CPU_O3_TOURNAMENT_PRED_HH__ -#define __CPU_O3_TOURNAMENT_PRED_HH__ +#ifndef __CPU_PRED_TOURNAMENT_PRED_HH__ +#define __CPU_PRED_TOURNAMENT_PRED_HH__ #include #include "base/types.hh" -#include "cpu/o3/sat_counter.hh" +#include "cpu/pred/bpred_unit.hh" +#include "cpu/pred/sat_counter.hh" /** * Implements a tournament branch predictor, hopefully identical to the one @@ -57,21 +60,13 @@ * is speculatively updated, the rest are updated upon branches committing * or misspeculating. */ -class TournamentBP +class TournamentBP : public BPredUnit { public: /** * Default branch predictor constructor. */ - TournamentBP(unsigned localCtrBits, - unsigned localHistoryTableSize, - unsigned localHistoryBits, - unsigned globalPredictorSize, - unsigned globalHistoryBits, - unsigned globalCtrBits, - unsigned choicePredictorSize, - unsigned choiceCtrBits, - unsigned instShiftAmt); + TournamentBP(const Params *params); /** * Looks up the given address in the branch predictor and returns @@ -81,7 +76,7 @@ class TournamentBP * @param bp_history Pointer that will be set to the BPHistory object. * @return Whether or not the branch is taken. */ - bool lookup(Addr &branch_addr, void * &bp_history); + bool lookup(Addr branch_addr, void * &bp_history); /** * Records that there was an unconditional branch, and modifies @@ -89,7 +84,7 @@ class TournamentBP * global history stored in it. * @param bp_history Pointer that will be set to the BPHistory object. */ - void uncondBr(void * &bp_history); + void uncondBranch(void * &bp_history); /** * Updates the branch predictor to Not Taken if a BTB entry is * invalid or not found. @@ -97,7 +92,7 @@ class TournamentBP * @param bp_history Pointer to any bp history state. * @return Whether or not the branch is taken. */ - void BTBUpdate(Addr &branch_addr, void * &bp_history); + void btbUpdate(Addr branch_addr, void * &bp_history); /** * Updates the branch predictor with the actual result of a branch. * @param branch_addr The address of the branch to update. @@ -107,7 +102,7 @@ class TournamentBP * @param squashed is set when this function is called during a squash * operation. */ - void update(Addr &branch_addr, bool taken, void *bp_history, bool squashed); + void update(Addr branch_addr, bool taken, void *bp_history, bool squashed); /** * Restores the global branch history on a squash. @@ -250,4 +245,4 @@ class TournamentBP unsigned choiceThreshold; }; -#endif // __CPU_O3_TOURNAMENT_PRED_HH__ +#endif // __CPU_PRED_TOURNAMENT_PRED_HH__ -- 2.30.2