From 248bd2bb62861c8d77de4c8cfab6c6392bd85049 Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Thu, 25 May 2006 17:01:48 -0400 Subject: [PATCH] Various branch predictor fixes/cleanup. It works more correctly now and supports both local and tournament predictors. cpu/o3/2bit_local_pred.cc: Branch predictor cleanup/fixup. Rename this to LocalBP. cpu/o3/2bit_local_pred.hh: Rename to LocalBP, update to support changes to BPredUnit, include comments. cpu/o3/alpha_cpu_builder.cc: Support extra parameters to the branch predictor. Now it takes in a parameter to tell it which branch predictor it is using, the local or the tournament predictor. cpu/o3/alpha_params.hh: Add in extra parameter for the branch predictor type. cpu/o3/bpred_unit.cc: Branch predictor fixup/cleanup. Rename it to BPredUnit. cpu/o3/bpred_unit.hh: Branch predictor fixup/cleanup. Now supports both the local and tournament predictors, and stores the branch predictor update state. cpu/o3/bpred_unit_impl.hh: Branch predictor overhaul. Now supports both the local and tournament predictors. cpu/o3/cpu_policy.hh: cpu/ozone/ozone_impl.hh: cpu/ozone/simple_impl.hh: Reflect the class name change. cpu/o3/decode_impl.hh: Be sure to set the predicted target as well so we don't squash twice. cpu/o3/tournament_pred.cc: cpu/o3/tournament_pred.hh: Fixes to the tournament predictor. cpu/ozone/simple_params.hh: Include parameter for the branch predictor type. python/m5/objects/AlphaFullCPU.py: python/m5/objects/OzoneCPU.py: Include the parameter for the branch predictor type. --HG-- extra : convert_revision : 34afebb3b40b47accb12558e439ee4cb03df5e64 --- cpu/o3/2bit_local_pred.cc | 25 ++-- cpu/o3/2bit_local_pred.hh | 22 ++- cpu/o3/alpha_cpu_builder.cc | 3 + cpu/o3/alpha_params.hh | 1 + cpu/o3/bpred_unit.cc | 6 +- cpu/o3/bpred_unit.hh | 64 ++++++--- cpu/o3/bpred_unit_impl.hh | 182 ++++++++++++++++------- cpu/o3/cpu_policy.hh | 2 +- cpu/o3/decode_impl.hh | 1 + cpu/o3/tournament_pred.cc | 232 +++++++++++++++++------------- cpu/o3/tournament_pred.hh | 90 ++++++++++-- cpu/ozone/ozone_impl.hh | 2 +- cpu/ozone/simple_impl.hh | 2 +- cpu/ozone/simple_params.hh | 30 +++- python/m5/objects/AlphaFullCPU.py | 1 + python/m5/objects/OzoneCPU.py | 1 + 16 files changed, 465 insertions(+), 199 deletions(-) diff --git a/cpu/o3/2bit_local_pred.cc b/cpu/o3/2bit_local_pred.cc index c3fb2fdb8..33c417c88 100644 --- a/cpu/o3/2bit_local_pred.cc +++ b/cpu/o3/2bit_local_pred.cc @@ -30,9 +30,9 @@ #include "base/trace.hh" #include "cpu/o3/2bit_local_pred.hh" -DefaultBP::DefaultBP(unsigned _localPredictorSize, - unsigned _localCtrBits, - unsigned _instShiftAmt) +LocalBP::LocalBP(unsigned _localPredictorSize, + unsigned _localCtrBits, + unsigned _instShiftAmt) : localPredictorSize(_localPredictorSize), localCtrBits(_localCtrBits), instShiftAmt(_instShiftAmt) @@ -68,7 +68,7 @@ DefaultBP::DefaultBP(unsigned _localPredictorSize, } void -DefaultBP::reset() +LocalBP::reset() { for (int i = 0; i < localPredictorSets; ++i) { localCtrs[i].reset(); @@ -76,21 +76,21 @@ DefaultBP::reset() } bool -DefaultBP::lookup(Addr &branch_addr) +LocalBP::lookup(Addr &branch_addr, void * &bp_history) { bool taken; - uint8_t local_prediction; + uint8_t counter_val; unsigned local_predictor_idx = getLocalIndex(branch_addr); DPRINTF(Fetch, "Branch predictor: Looking up index %#x\n", local_predictor_idx); - local_prediction = localCtrs[local_predictor_idx].read(); + counter_val = localCtrs[local_predictor_idx].read(); DPRINTF(Fetch, "Branch predictor: prediction is %i.\n", - (int)local_prediction); + (int)counter_val); - taken = getPrediction(local_prediction); + taken = getPrediction(counter_val); #if 0 // Speculative update. @@ -107,8 +107,9 @@ DefaultBP::lookup(Addr &branch_addr) } void -DefaultBP::update(Addr &branch_addr, bool taken) +LocalBP::update(Addr &branch_addr, bool taken, void *bp_history) { + assert(bp_history == NULL); unsigned local_predictor_idx; // Update the local predictor. @@ -128,7 +129,7 @@ DefaultBP::update(Addr &branch_addr, bool taken) inline bool -DefaultBP::getPrediction(uint8_t &count) +LocalBP::getPrediction(uint8_t &count) { // Get the MSB of the count return (count >> (localCtrBits - 1)); @@ -136,7 +137,7 @@ DefaultBP::getPrediction(uint8_t &count) inline unsigned -DefaultBP::getLocalIndex(Addr &branch_addr) +LocalBP::getLocalIndex(Addr &branch_addr) { return (branch_addr >> instShiftAmt) & indexMask; } diff --git a/cpu/o3/2bit_local_pred.hh b/cpu/o3/2bit_local_pred.hh index cd65978ca..02595702b 100644 --- a/cpu/o3/2bit_local_pred.hh +++ b/cpu/o3/2bit_local_pred.hh @@ -35,7 +35,14 @@ #include -class DefaultBP +/** + * Implements a local predictor that uses the PC to index into a table of + * counters. Note that any time a pointer to the bp_history is given, it + * should be NULL using this predictor because it does not have any branch + * predictor state that needs to be recorded or updated; the update can be + * determined solely by the branch being taken or not taken. + */ +class LocalBP { public: /** @@ -44,28 +51,31 @@ class DefaultBP * @param localCtrBits Number of bits per counter. * @param instShiftAmt Offset amount for instructions to ignore alignment. */ - DefaultBP(unsigned localPredictorSize, unsigned localCtrBits, - unsigned instShiftAmt); + LocalBP(unsigned localPredictorSize, unsigned localCtrBits, + unsigned instShiftAmt); /** * Looks up the given address in the branch predictor and returns * a true/false value as to whether it is taken. * @param branch_addr The address of the branch to look up. + * @param bp_history Pointer to any bp history state. * @return Whether or not the branch is taken. */ - bool lookup(Addr &branch_addr); + bool lookup(Addr &branch_addr, void * &bp_history); /** * Updates the branch predictor with the actual result of a branch. * @param branch_addr The address of the branch to update. * @param taken Whether or not the branch was taken. */ - void update(Addr &branch_addr, bool taken); + void update(Addr &branch_addr, bool taken, void *bp_history); + + void squash(void *bp_history) + { assert(bp_history == NULL); } void reset(); private: - /** * Returns the taken/not taken prediction given the value of the * counter. diff --git a/cpu/o3/alpha_cpu_builder.cc b/cpu/o3/alpha_cpu_builder.cc index b0d812edc..08d42cd46 100644 --- a/cpu/o3/alpha_cpu_builder.cc +++ b/cpu/o3/alpha_cpu_builder.cc @@ -109,6 +109,7 @@ Param squashWidth; Param trapLatency; Param fetchTrapLatency; +Param predType; Param localPredictorSize; Param localCtrBits; Param localHistoryTableSize; @@ -234,6 +235,7 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivAlphaFullCPU) INIT_PARAM_DFLT(trapLatency, "Number of cycles before the trap is handled", 6), INIT_PARAM_DFLT(fetchTrapLatency, "Number of cycles before the fetch trap is handled", 12), + INIT_PARAM(predType, "Type of branch predictor ('local', 'tournament')"), INIT_PARAM(localPredictorSize, "Size of local predictor"), INIT_PARAM(localCtrBits, "Bits per counter"), INIT_PARAM(localHistoryTableSize, "Size of local history table"), @@ -366,6 +368,7 @@ CREATE_SIM_OBJECT(DerivAlphaFullCPU) params->trapLatency = trapLatency; params->fetchTrapLatency = fetchTrapLatency; + params->predType = predType; params->localPredictorSize = localPredictorSize; params->localCtrBits = localCtrBits; params->localHistoryTableSize = localHistoryTableSize; diff --git a/cpu/o3/alpha_params.hh b/cpu/o3/alpha_params.hh index e3acf2c05..5eb00426d 100644 --- a/cpu/o3/alpha_params.hh +++ b/cpu/o3/alpha_params.hh @@ -127,6 +127,7 @@ class AlphaSimpleParams : public BaseFullCPU::Params // // Branch predictor (BP & BTB) // + std::string predType; unsigned localPredictorSize; unsigned localCtrBits; unsigned localHistoryTableSize; diff --git a/cpu/o3/bpred_unit.cc b/cpu/o3/bpred_unit.cc index 92344111f..e149b8073 100644 --- a/cpu/o3/bpred_unit.cc +++ b/cpu/o3/bpred_unit.cc @@ -32,6 +32,6 @@ #include "cpu/ozone/ozone_impl.hh" #include "cpu/ozone/simple_impl.hh" -template class TwobitBPredUnit; -template class TwobitBPredUnit; -template class TwobitBPredUnit; +template class BPredUnit; +template class BPredUnit; +template class BPredUnit; diff --git a/cpu/o3/bpred_unit.hh b/cpu/o3/bpred_unit.hh index b7814b2e9..93aae8f15 100644 --- a/cpu/o3/bpred_unit.hh +++ b/cpu/o3/bpred_unit.hh @@ -46,16 +46,25 @@ * and the BTB. */ template -class TwobitBPredUnit +class BPredUnit { - public: + private: typedef typename Impl::Params Params; typedef typename Impl::DynInstPtr DynInstPtr; + enum PredType { + Local, + Tournament + }; + + PredType predictor; + + public: + /** * @param params The params object, that has the size of the BP and BTB. */ - TwobitBPredUnit(Params *params); + BPredUnit(Params *params); /** * Registers statistics. @@ -76,6 +85,9 @@ class TwobitBPredUnit */ bool predict(DynInstPtr &inst, Addr &PC, unsigned tid); + // @todo: Rename this function. + void BPUncond(void * &bp_history); + /** * Tells the branch predictor to commit any updates until the given * sequence number. @@ -104,13 +116,20 @@ class TwobitBPredUnit void squash(const InstSeqNum &squashed_sn, const Addr &corr_target, bool actually_taken, unsigned tid); + /** + * @param bp_history Pointer to the history object. The predictor + * will need to update any state and delete the object. + */ + void BPSquash(void *bp_history); + /** * Looks up a given PC in the BP to see if it is taken or not taken. * @param inst_PC The PC to look up. + * @param bp_history Pointer that will be set to an object that + * has the branch predictor state associated with the lookup. * @return Whether the branch is taken or not taken. */ - bool BPLookup(Addr &inst_PC) - { return BP.lookup(inst_PC); } + bool BPLookup(Addr &inst_PC, void * &bp_history); /** * Looks up a given PC in the BTB to see if a matching entry exists. @@ -132,10 +151,11 @@ class TwobitBPredUnit * Updates the BP with taken/not taken information. * @param inst_PC The branch's PC that will be updated. * @param taken Whether the branch was taken or not taken. + * @param bp_history Pointer to the branch predictor state that is + * associated with the branch lookup that is being updated. * @todo Make this update flexible enough to handle a global predictor. */ - void BPUpdate(Addr &inst_PC, bool taken) - { BP.update(inst_PC, taken); } + void BPUpdate(Addr &inst_PC, bool taken, void *bp_history); /** * Updates the BTB with the target of a branch. @@ -145,18 +165,20 @@ class TwobitBPredUnit void BTBUpdate(Addr &inst_PC, Addr &target_PC) { BTB.update(inst_PC, target_PC,0); } + void dump(); + private: struct PredictorHistory { /** - * Makes a predictor history struct that contains a sequence number, - * the PC of its instruction, and whether or not it was predicted - * taken. + * Makes a predictor history struct that contains any + * information needed to update the predictor, BTB, and RAS. */ PredictorHistory(const InstSeqNum &seq_num, const Addr &inst_PC, - const bool pred_taken, const unsigned _tid) - : seqNum(seq_num), PC(inst_PC), RASTarget(0), globalHistory(0), + const bool pred_taken, void *bp_history, + const unsigned _tid) + : seqNum(seq_num), PC(inst_PC), RASTarget(0), RASIndex(0), tid(_tid), predTaken(pred_taken), usedRAS(0), - wasCall(0) + wasCall(0), bpHistory(bp_history) { } /** The sequence number for the predictor history entry. */ @@ -168,9 +190,6 @@ class TwobitBPredUnit /** The RAS target (only valid if a return). */ Addr RASTarget; - /** The global history at the time this entry was created. */ - unsigned globalHistory; - /** The RAS index of the instruction (only valid if a call). */ unsigned RASIndex; @@ -185,6 +204,12 @@ class TwobitBPredUnit /** Whether or not the instruction was a call. */ bool wasCall; + + /** Pointer to the history object passed back from the branch + * predictor. It is used to update or restore state of the + * branch predictor. + */ + void *bpHistory; }; typedef std::list History; @@ -196,8 +221,11 @@ class TwobitBPredUnit */ History predHist[Impl::MaxThreads]; - /** The branch predictor. */ - DefaultBP BP; + /** The local branch predictor. */ + LocalBP *localBP; + + /** The tournament branch predictor. */ + TournamentBP *tournamentBP; /** The BTB. */ DefaultBTB BTB; diff --git a/cpu/o3/bpred_unit_impl.hh b/cpu/o3/bpred_unit_impl.hh index c37df606b..1844c155e 100644 --- a/cpu/o3/bpred_unit_impl.hh +++ b/cpu/o3/bpred_unit_impl.hh @@ -36,21 +36,40 @@ using namespace std; template -TwobitBPredUnit::TwobitBPredUnit(Params *params) - : BP(params->localPredictorSize, - params->localCtrBits, - params->instShiftAmt), - BTB(params->BTBEntries, +BPredUnit::BPredUnit(Params *params) + : BTB(params->BTBEntries, params->BTBTagSize, params->instShiftAmt) { + // Setup the selected predictor. + if (params->predType == "local") { + localBP = new LocalBP(params->localPredictorSize, + params->localCtrBits, + params->instShiftAmt); + predictor = Local; + } else if (params->predType == "tournament") { + tournamentBP = new TournamentBP(params->localPredictorSize, + params->localCtrBits, + params->localHistoryTableSize, + params->localHistoryBits, + params->globalPredictorSize, + params->globalHistoryBits, + params->globalCtrBits, + params->choicePredictorSize, + params->choiceCtrBits, + params->instShiftAmt); + predictor = Tournament; + } else { + fatal("Invalid BP selected!"); + } + for (int i=0; i < Impl::MaxThreads; i++) RAS[i].init(params->RASSize); } template void -TwobitBPredUnit::regStats() +BPredUnit::regStats() { lookups .name(name() + ".BPredUnit.lookups") @@ -96,17 +115,20 @@ TwobitBPredUnit::regStats() template void -TwobitBPredUnit::switchOut() +BPredUnit::switchOut() { + // Clear any state upon switch out. for (int i = 0; i < Impl::MaxThreads; ++i) { - predHist[i].clear(); + squash(0, i); } } template void -TwobitBPredUnit::takeOverFrom() +BPredUnit::takeOverFrom() { + // Can reset all predictor state, but it's not necessarily better + // than leaving it be. /* for (int i = 0; i < Impl::MaxThreads; ++i) RAS[i].reset(); @@ -118,11 +140,10 @@ TwobitBPredUnit::takeOverFrom() template bool -TwobitBPredUnit::predict(DynInstPtr &inst, Addr &PC, unsigned tid) +BPredUnit::predict(DynInstPtr &inst, Addr &PC, unsigned tid) { // See if branch predictor predicts taken. // If so, get its target addr either from the BTB or the RAS. - // Once that's done, speculatively update the predictor? // Save off record of branch stuff so the RAS can be fixed // up once it's done. @@ -133,20 +154,25 @@ TwobitBPredUnit::predict(DynInstPtr &inst, Addr &PC, unsigned tid) ++lookups; + void *bp_history = NULL; + if (inst->isUncondCtrl()) { DPRINTF(Fetch, "BranchPred: [tid:%i] Unconditional control.\n", tid); pred_taken = true; + // Tell the BP there was an unconditional branch. + BPUncond(bp_history); } else { ++condPredicted; - pred_taken = BPLookup(PC); + pred_taken = BPLookup(PC, bp_history); DPRINTF(Fetch, "BranchPred: [tid:%i]: Branch predictor predicted %i " "for PC %#x\n", tid, pred_taken, inst->readPC()); } - PredictorHistory predict_record(inst->seqNum, PC, pred_taken, tid); + PredictorHistory predict_record(inst->seqNum, PC, pred_taken, + bp_history, tid); // Now lookup in the BTB or RAS. if (pred_taken) { @@ -187,7 +213,7 @@ TwobitBPredUnit::predict(DynInstPtr &inst, Addr &PC, unsigned tid) if (BTB.valid(PC, tid)) { ++BTBHits; - //If it's anything else, use the BTB to get the target addr. + // If it's not a return, use the BTB to get the target addr. target = BTB.lookup(PC, tid); DPRINTF(Fetch, "BranchPred: [tid:%i]: Instruction %#x predicted" @@ -221,7 +247,7 @@ TwobitBPredUnit::predict(DynInstPtr &inst, Addr &PC, unsigned tid) template void -TwobitBPredUnit::update(const InstSeqNum &done_sn, unsigned tid) +BPredUnit::update(const InstSeqNum &done_sn, unsigned tid) { DPRINTF(Fetch, "BranchPred: [tid:%i]: Commiting branches until sequence" "number %lli.\n", tid, done_sn); @@ -229,8 +255,9 @@ TwobitBPredUnit::update(const InstSeqNum &done_sn, unsigned tid) while (!predHist[tid].empty() && predHist[tid].back().seqNum <= done_sn) { // Update the branch predictor with the correct results. - BP.update(predHist[tid].back().PC, - predHist[tid].back().predTaken); + BPUpdate(predHist[tid].back().PC, + predHist[tid].back().predTaken, + predHist[tid].back().bpHistory); predHist[tid].pop_back(); } @@ -238,13 +265,13 @@ TwobitBPredUnit::update(const InstSeqNum &done_sn, unsigned tid) template void -TwobitBPredUnit::squash(const InstSeqNum &squashed_sn, unsigned tid) +BPredUnit::squash(const InstSeqNum &squashed_sn, unsigned tid) { History &pred_hist = predHist[tid]; while (!pred_hist.empty() && pred_hist.front().seqNum > squashed_sn) { - if (pred_hist.front().usedRAS) { + if (pred_hist.front().usedRAS) { DPRINTF(Fetch, "BranchPred: [tid:%i]: Restoring top of RAS to: %i," " target: %#x.\n", tid, @@ -255,12 +282,15 @@ TwobitBPredUnit::squash(const InstSeqNum &squashed_sn, unsigned tid) pred_hist.front().RASTarget); } else if (pred_hist.front().wasCall) { - DPRINTF(Fetch, "BranchPred: [tid:%i]: Removing speculative entry added " - "to the RAS.\n",tid); + DPRINTF(Fetch, "BranchPred: [tid:%i]: Removing speculative entry " + "added to the RAS.\n",tid); RAS[tid].pop(); } + // This call should delete the bpHistory. + BPSquash(pred_hist.front().bpHistory); + pred_hist.pop_front(); } @@ -268,10 +298,10 @@ TwobitBPredUnit::squash(const InstSeqNum &squashed_sn, unsigned tid) template void -TwobitBPredUnit::squash(const InstSeqNum &squashed_sn, - const Addr &corr_target, - const bool actually_taken, - unsigned tid) +BPredUnit::squash(const InstSeqNum &squashed_sn, + const Addr &corr_target, + const bool actually_taken, + unsigned tid) { // Now that we know that a branch was mispredicted, we need to undo // all the branches that have been seen up until this branch and @@ -285,40 +315,96 @@ TwobitBPredUnit::squash(const InstSeqNum &squashed_sn, "setting target to %#x.\n", tid, squashed_sn, corr_target); - while (!pred_hist.empty() && - pred_hist.front().seqNum > squashed_sn) { - if (pred_hist.front().usedRAS) { - DPRINTF(Fetch, "BranchPred: [tid:%i]: Restoring top of RAS to: %i, " - "target: %#x.\n", - tid, - pred_hist.front().RASIndex, - pred_hist.front().RASTarget); - - RAS[tid].restore(pred_hist.front().RASIndex, - pred_hist.front().RASTarget); - } else if (pred_hist.front().wasCall) { - DPRINTF(Fetch, "BranchPred: [tid:%i]: Removing speculative entry" - " added to the RAS.\n", tid); - - RAS[tid].pop(); - } - - pred_hist.pop_front(); - } + squash(squashed_sn, tid); // If there's a squash due to a syscall, there may not be an entry // corresponding to the squash. In that case, don't bother trying to // fix up the entry. if (!pred_hist.empty()) { - pred_hist.front().predTaken = actually_taken; - + assert(pred_hist.front().seqNum == squashed_sn); if (pred_hist.front().usedRAS) { ++RASIncorrect; } - BP.update(pred_hist.front().PC, actually_taken); + BPUpdate(pred_hist.front().PC, actually_taken, + pred_hist.front().bpHistory); BTB.update(pred_hist.front().PC, corr_target, tid); pred_hist.pop_front(); } } + +template +void +BPredUnit::BPUncond(void * &bp_history) +{ + // Only the tournament predictor cares about unconditional branches. + if (predictor == Tournament) { + tournamentBP->uncondBr(bp_history); + } +} + +template +void +BPredUnit::BPSquash(void *bp_history) +{ + if (predictor == Local) { + localBP->squash(bp_history); + } else if (predictor == Tournament) { + tournamentBP->squash(bp_history); + } else { + panic("Predictor type is unexpected value!"); + } +} + +template +bool +BPredUnit::BPLookup(Addr &inst_PC, void * &bp_history) +{ + if (predictor == Local) { + return localBP->lookup(inst_PC, bp_history); + } else if (predictor == Tournament) { + return tournamentBP->lookup(inst_PC, bp_history); + } else { + panic("Predictor type is unexpected value!"); + } +} + +template +void +BPredUnit::BPUpdate(Addr &inst_PC, bool taken, void *bp_history) +{ + if (predictor == Local) { + localBP->update(inst_PC, taken, bp_history); + } else if (predictor == Tournament) { + tournamentBP->update(inst_PC, taken, bp_history); + } else { + panic("Predictor type is unexpected value!"); + } +} + +template +void +BPredUnit::dump() +{ + typename History::iterator pred_hist_it; + + for (int i = 0; i < Impl::MaxThreads; ++i) { + if (!predHist[i].empty()) { + pred_hist_it = predHist[i].begin(); + + cprintf("predHist[%i].size(): %i\n", i, predHist[i].size()); + + while (pred_hist_it != predHist[i].end()) { + cprintf("[sn:%lli], PC:%#x, tid:%i, predTaken:%i, " + "bpHistory:%#x\n", + (*pred_hist_it).seqNum, (*pred_hist_it).PC, + (*pred_hist_it).tid, (*pred_hist_it).predTaken, + (*pred_hist_it).bpHistory); + pred_hist_it++; + } + + cprintf("\n"); + } + } +} diff --git a/cpu/o3/cpu_policy.hh b/cpu/o3/cpu_policy.hh index 52227013e..b4249b12d 100644 --- a/cpu/o3/cpu_policy.hh +++ b/cpu/o3/cpu_policy.hh @@ -51,7 +51,7 @@ template struct SimpleCPUPolicy { - typedef TwobitBPredUnit BPredUnit; + typedef BPredUnit BPredUnit; typedef PhysRegFile RegFile; typedef SimpleFreeList FreeList; typedef SimpleRenameMap RenameMap; diff --git a/cpu/o3/decode_impl.hh b/cpu/o3/decode_impl.hh index 2ed7ec6fc..8d84d46c8 100644 --- a/cpu/o3/decode_impl.hh +++ b/cpu/o3/decode_impl.hh @@ -721,6 +721,7 @@ DefaultDecode::decodeInsts(unsigned tid) // Might want to set some sort of boolean and just do // a check at the end squash(inst, inst->threadNumber); + inst->setPredTarg(inst->branchTarget()); break; } diff --git a/cpu/o3/tournament_pred.cc b/cpu/o3/tournament_pred.cc index 89da7b9f5..f8c95abd8 100644 --- a/cpu/o3/tournament_pred.cc +++ b/cpu/o3/tournament_pred.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2005 The Regents of The University of Michigan + * Copyright (c) 2004-2006 The Regents of The University of Michigan * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -26,6 +26,7 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#include "base/intmath.hh" #include "cpu/o3/tournament_pred.hh" TournamentBP::TournamentBP(unsigned _localPredictorSize, @@ -49,7 +50,9 @@ TournamentBP::TournamentBP(unsigned _localPredictorSize, choiceCtrBits(_choiceCtrBits), instShiftAmt(_instShiftAmt) { - //Should do checks here to make sure sizes are correct (powers of 2) + if (!isPowerOf2(localPredictorSize)) { + fatal("Invalid local predictor size!\n"); + } //Setup the array of counters for the local predictor localCtrs.resize(localPredictorSize); @@ -57,6 +60,10 @@ TournamentBP::TournamentBP(unsigned _localPredictorSize, for (int i = 0; i < localPredictorSize; ++i) localCtrs[i].setBits(localCtrBits); + if (!isPowerOf2(localHistoryTableSize)) { + fatal("Invalid local history table size!\n"); + } + //Setup the history table for the local table localHistoryTable.resize(localHistoryTableSize); @@ -66,6 +73,10 @@ TournamentBP::TournamentBP(unsigned _localPredictorSize, // Setup the local history mask localHistoryMask = (1 << localHistoryBits) - 1; + if (!isPowerOf2(globalPredictorSize)) { + fatal("Invalid global predictor size!\n"); + } + //Setup the array of counters for the global predictor globalCtrs.resize(globalPredictorSize); @@ -77,12 +88,17 @@ TournamentBP::TournamentBP(unsigned _localPredictorSize, // Setup the global history mask globalHistoryMask = (1 << globalHistoryBits) - 1; + if (!isPowerOf2(choicePredictorSize)) { + fatal("Invalid choice predictor size!\n"); + } + //Setup the array of counters for the choice predictor choiceCtrs.resize(choicePredictorSize); for (int i = 0; i < choicePredictorSize; ++i) choiceCtrs[i].setBits(choiceCtrBits); + // @todo: Allow for different thresholds between the predictors. threshold = (1 << (localCtrBits - 1)) - 1; threshold = threshold / 2; } @@ -91,165 +107,185 @@ inline unsigned TournamentBP::calcLocHistIdx(Addr &branch_addr) { + // Get low order bits after removing instruction offset. return (branch_addr >> instShiftAmt) & (localHistoryTableSize - 1); } inline void -TournamentBP::updateHistoriesTaken(unsigned local_history_idx) +TournamentBP::updateGlobalHistTaken() { globalHistory = (globalHistory << 1) | 1; globalHistory = globalHistory & globalHistoryMask; - - localHistoryTable[local_history_idx] = - (localHistoryTable[local_history_idx] << 1) | 1; } inline void -TournamentBP::updateHistoriesNotTaken(unsigned local_history_idx) +TournamentBP::updateGlobalHistNotTaken() { globalHistory = (globalHistory << 1); globalHistory = globalHistory & globalHistoryMask; +} +inline +void +TournamentBP::updateLocalHistTaken(unsigned local_history_idx) +{ + localHistoryTable[local_history_idx] = + (localHistoryTable[local_history_idx] << 1) | 1; +} + +inline +void +TournamentBP::updateLocalHistNotTaken(unsigned local_history_idx) +{ localHistoryTable[local_history_idx] = (localHistoryTable[local_history_idx] << 1); } bool -TournamentBP::lookup(Addr &branch_addr) +TournamentBP::lookup(Addr &branch_addr, void * &bp_history) { - uint8_t local_prediction; + bool local_prediction; unsigned local_history_idx; unsigned local_predictor_idx; - uint8_t global_prediction; - uint8_t choice_prediction; + bool global_prediction; + bool choice_prediction; //Lookup in the local predictor to get its branch prediction local_history_idx = calcLocHistIdx(branch_addr); local_predictor_idx = localHistoryTable[local_history_idx] & localHistoryMask; - local_prediction = localCtrs[local_predictor_idx].read(); + local_prediction = localCtrs[local_predictor_idx].read() > threshold; //Lookup in the global predictor to get its branch prediction - global_prediction = globalCtrs[globalHistory].read(); + global_prediction = globalCtrs[globalHistory].read() > threshold; //Lookup in the choice predictor to see which one to use - choice_prediction = choiceCtrs[globalHistory].read(); - - //@todo Put a threshold value in for the three predictors that can - // be set through the constructor (so this isn't hard coded). - //Also should put some of this code into functions. - if (choice_prediction > threshold) { - if (global_prediction > threshold) { - updateHistoriesTaken(local_history_idx); - - assert(globalHistory < globalPredictorSize && - local_history_idx < localPredictorSize); - - globalCtrs[globalHistory].increment(); - localCtrs[local_history_idx].increment(); - + choice_prediction = choiceCtrs[globalHistory].read() > threshold; + + // Create BPHistory and pass it back to be recorded. + BPHistory *history = new BPHistory; + history->globalHistory = globalHistory; + history->localPredTaken = local_prediction; + history->globalPredTaken = global_prediction; + history->globalUsed = choice_prediction; + bp_history = (void *)history; + + assert(globalHistory < globalPredictorSize && + local_history_idx < localPredictorSize); + + // Commented code is for doing speculative update of counters and + // all histories. + if (choice_prediction) { + if (global_prediction) { +// updateHistoriesTaken(local_history_idx); +// globalCtrs[globalHistory].increment(); +// localCtrs[local_history_idx].increment(); + updateGlobalHistTaken(); return true; } else { - updateHistoriesNotTaken(local_history_idx); - - assert(globalHistory < globalPredictorSize && - local_history_idx < localPredictorSize); - - globalCtrs[globalHistory].decrement(); - localCtrs[local_history_idx].decrement(); - +// updateHistoriesNotTaken(local_history_idx); +// globalCtrs[globalHistory].decrement(); +// localCtrs[local_history_idx].decrement(); + updateGlobalHistNotTaken(); return false; } } else { - if (local_prediction > threshold) { - updateHistoriesTaken(local_history_idx); - - assert(globalHistory < globalPredictorSize && - local_history_idx < localPredictorSize); - - globalCtrs[globalHistory].increment(); - localCtrs[local_history_idx].increment(); - + if (local_prediction) { +// updateHistoriesTaken(local_history_idx); +// globalCtrs[globalHistory].increment(); +// localCtrs[local_history_idx].increment(); + updateGlobalHistTaken(); return true; } else { - updateHistoriesNotTaken(local_history_idx); - - assert(globalHistory < globalPredictorSize && - local_history_idx < localPredictorSize); - - globalCtrs[globalHistory].decrement(); - localCtrs[local_history_idx].decrement(); - +// updateHistoriesNotTaken(local_history_idx); +// globalCtrs[globalHistory].decrement(); +// localCtrs[local_history_idx].decrement(); + updateGlobalHistNotTaken(); return false; } } } -// Update the branch predictor if it predicted a branch wrong. void -TournamentBP::update(Addr &branch_addr, unsigned correct_gh, bool taken) +TournamentBP::uncondBr(void * &bp_history) { + // Create BPHistory and pass it back to be recorded. + BPHistory *history = new BPHistory; + history->globalHistory = globalHistory; + history->localPredTaken = true; + history->globalPredTaken = true; + bp_history = static_cast(history); + + updateGlobalHistTaken(); +} - uint8_t local_prediction; +void +TournamentBP::update(Addr &branch_addr, bool taken, void *bp_history) +{ unsigned local_history_idx; unsigned local_predictor_idx; - bool local_pred_taken; + unsigned local_predictor_hist; - uint8_t global_prediction; - bool global_pred_taken; - - // Load the correct global history into the register. - globalHistory = correct_gh; - - // Get the local predictor's current prediction, remove the incorrect - // update, and update the local predictor + // Get the local predictor's current prediction local_history_idx = calcLocHistIdx(branch_addr); - local_predictor_idx = localHistoryTable[local_history_idx]; - local_predictor_idx = (local_predictor_idx >> 1) & localHistoryMask; - - local_prediction = localCtrs[local_predictor_idx].read(); - local_pred_taken = local_prediction > threshold; - - //Get the global predictor's current prediction, and update the - //global predictor - global_prediction = globalCtrs[globalHistory].read(); - global_pred_taken = global_prediction > threshold; - - //Update the choice predictor to tell it which one was correct - if (local_pred_taken != global_pred_taken) { - //If the local prediction matches the actual outcome, decerement - //the counter. Otherwise increment the counter. - if (local_pred_taken == taken) { - choiceCtrs[globalHistory].decrement(); - } else { - choiceCtrs[globalHistory].increment(); + local_predictor_hist = localHistoryTable[local_history_idx]; + local_predictor_idx = local_predictor_hist & localHistoryMask; + + // Update the choice predictor to tell it which one was correct if + // there was a prediction. + if (bp_history) { + BPHistory *history = static_cast(bp_history); + if (history->localPredTaken != history->globalPredTaken) { + // If the local prediction matches the actual outcome, + // decerement the counter. Otherwise increment the + // counter. + if (history->localPredTaken == taken) { + choiceCtrs[globalHistory].decrement(); + } else if (history->globalPredTaken == taken){ + choiceCtrs[globalHistory].increment(); + } } + + // We're done with this history, now delete it. + delete history; } - if (taken) { - assert(globalHistory < globalPredictorSize && - local_predictor_idx < localPredictorSize); + assert(globalHistory < globalPredictorSize && + local_predictor_idx < localPredictorSize); + // Update the counters and local history with the proper + // resolution of the branch. Global history is updated + // speculatively and restored upon squash() calls, so it does not + // need to be updated. + if (taken) { localCtrs[local_predictor_idx].increment(); globalCtrs[globalHistory].increment(); - globalHistory = (globalHistory << 1) | 1; - globalHistory = globalHistory & globalHistoryMask; - - localHistoryTable[local_history_idx] |= 1; + updateLocalHistTaken(local_history_idx); } else { - assert(globalHistory < globalPredictorSize && - local_predictor_idx < localPredictorSize); - localCtrs[local_predictor_idx].decrement(); globalCtrs[globalHistory].decrement(); - globalHistory = (globalHistory << 1); - globalHistory = globalHistory & globalHistoryMask; - - localHistoryTable[local_history_idx] &= ~1; + updateLocalHistNotTaken(local_history_idx); } } + +void +TournamentBP::squash(void *bp_history) +{ + BPHistory *history = static_cast(bp_history); + + // Restore global history to state prior to this branch. + globalHistory = history->globalHistory; + + // Delete this BPHistory now that we're done with it. + delete history; +} + +#ifdef DEBUG +int +TournamentBP::BPHistory::newCount = 0; +#endif diff --git a/cpu/o3/tournament_pred.hh b/cpu/o3/tournament_pred.hh index 7b600aa53..6d77999cc 100644 --- a/cpu/o3/tournament_pred.hh +++ b/cpu/o3/tournament_pred.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2005 The Regents of The University of Michigan + * Copyright (c) 2004-2006 The Regents of The University of Michigan * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -34,6 +34,15 @@ #include "cpu/o3/sat_counter.hh" #include +/** + * Implements a tournament branch predictor, hopefully identical to the one + * used in the 21264. It has a local predictor, which uses a local history + * table to index into a table of counters, and a global predictor, which + * uses a global history to index into a table of counters. A choice + * predictor chooses between the two. Only the global history register + * is speculatively updated, the rest are updated upon branches committing + * or misspeculating. + */ class TournamentBP { public: @@ -53,30 +62,95 @@ class TournamentBP /** * Looks up the given address in the branch predictor and returns - * a true/false value as to whether it is taken. + * a true/false value as to whether it is taken. Also creates a + * BPHistory object to store any state it will need on squash/update. * @param branch_addr The address of the branch to look up. + * @param bp_history Pointer that will be set to the BPHistory object. * @return Whether or not the branch is taken. */ - bool lookup(Addr &branch_addr); + bool lookup(Addr &branch_addr, void * &bp_history); + + /** + * Records that there was an unconditional branch, and modifies + * the bp history to point to an object that has the previous + * global history stored in it. + * @param bp_history Pointer that will be set to the BPHistory object. + */ + void uncondBr(void * &bp_history); /** * Updates the branch predictor with the actual result of a branch. * @param branch_addr The address of the branch to update. * @param taken Whether or not the branch was taken. + * @param bp_history Pointer to the BPHistory object that was created + * when the branch was predicted. + */ + void update(Addr &branch_addr, bool taken, void *bp_history); + + /** + * Restores the global branch history on a squash. + * @param bp_history Pointer to the BPHistory object that has the + * previous global branch history in it. */ - void update(Addr &branch_addr, unsigned global_history, bool taken); + void squash(void *bp_history); + /** Returns the global history. */ inline unsigned readGlobalHist() { return globalHistory; } private: - + /** + * Returns if the branch should be taken or not, given a counter + * value. + * @param count The counter value. + */ inline bool getPrediction(uint8_t &count); + /** + * Returns the local history index, given a branch address. + * @param branch_addr The branch's PC address. + */ inline unsigned calcLocHistIdx(Addr &branch_addr); - inline void updateHistoriesTaken(unsigned local_history_idx); + /** Updates global history as taken. */ + inline void updateGlobalHistTaken(); - inline void updateHistoriesNotTaken(unsigned local_history_idx); + /** Updates global history as not taken. */ + inline void updateGlobalHistNotTaken(); + + /** + * Updates local histories as taken. + * @param local_history_idx The local history table entry that + * will be updated. + */ + inline void updateLocalHistTaken(unsigned local_history_idx); + + /** + * Updates local histories as not taken. + * @param local_history_idx The local history table entry that + * will be updated. + */ + inline void updateLocalHistNotTaken(unsigned local_history_idx); + + /** + * The branch history information that is created upon predicting + * a branch. It will be passed back upon updating and squashing, + * when the BP can use this information to update/restore its + * state properly. + */ + struct BPHistory { +#ifdef DEBUG + BPHistory() + { newCount++; } + ~BPHistory() + { newCount--; } + + static int newCount; +#endif + unsigned globalHistory; + bool localPredTaken; + bool globalPredTaken; + bool globalUsed; + }; /** Local counters. */ std::vector localCtrs; @@ -101,7 +175,6 @@ class TournamentBP /** Mask to get the proper local history. */ unsigned localHistoryMask; - /** Array of counters that make up the global predictor. */ std::vector globalCtrs; @@ -120,7 +193,6 @@ class TournamentBP /** Mask to get the proper global history. */ unsigned globalHistoryMask; - /** Array of counters that make up the choice predictor. */ std::vector choiceCtrs; diff --git a/cpu/ozone/ozone_impl.hh b/cpu/ozone/ozone_impl.hh index 1f543ec6e..9dc50c1fb 100644 --- a/cpu/ozone/ozone_impl.hh +++ b/cpu/ozone/ozone_impl.hh @@ -54,7 +54,7 @@ struct OzoneImpl { // Would like to put these into their own area. // typedef NullPredictor BranchPred; - typedef TwobitBPredUnit BranchPred; + typedef BPredUnit BranchPred; typedef FrontEnd FrontEnd; // Will need IQ, LSQ eventually typedef LWBackEnd BackEnd; diff --git a/cpu/ozone/simple_impl.hh b/cpu/ozone/simple_impl.hh index 961bf2ea9..26845271a 100644 --- a/cpu/ozone/simple_impl.hh +++ b/cpu/ozone/simple_impl.hh @@ -51,7 +51,7 @@ struct SimpleImpl { // Would like to put these into their own area. // typedef NullPredictor BranchPred; - typedef TwobitBPredUnit BranchPred; + typedef BPredUnit BranchPred; typedef FrontEnd FrontEnd; // Will need IQ, LSQ eventually typedef InorderBackEnd BackEnd; diff --git a/cpu/ozone/simple_params.hh b/cpu/ozone/simple_params.hh index 647da1781..7b5c6f67b 100644 --- a/cpu/ozone/simple_params.hh +++ b/cpu/ozone/simple_params.hh @@ -1,4 +1,30 @@ - +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ #ifndef __CPU_OZONE_SIMPLE_PARAMS_HH__ #define __CPU_OZONE_SIMPLE_PARAMS_HH__ @@ -29,7 +55,6 @@ class SimpleParams : public BaseCPU::Params AlphaITB *itb; AlphaDTB *dtb; #else std::vector workload; -// Process *process; #endif // FULL_SYSTEM //Page Table @@ -103,6 +128,7 @@ class SimpleParams : public BaseCPU::Params // // Branch predictor (BP & BTB) // + std::string predType; unsigned localPredictorSize; unsigned localCtrBits; unsigned localHistoryTableSize; diff --git a/python/m5/objects/AlphaFullCPU.py b/python/m5/objects/AlphaFullCPU.py index d719bf783..043c3c08f 100644 --- a/python/m5/objects/AlphaFullCPU.py +++ b/python/m5/objects/AlphaFullCPU.py @@ -55,6 +55,7 @@ class DerivAlphaFullCPU(BaseCPU): trapLatency = Param.Tick("Trap latency") fetchTrapLatency = Param.Tick("Fetch trap latency") + predType = Param.String("Branch predictor type ('local', 'tournament')") localPredictorSize = Param.Unsigned("Size of local predictor") localCtrBits = Param.Unsigned("Bits per counter") localHistoryTableSize = Param.Unsigned("Size of local history table") diff --git a/python/m5/objects/OzoneCPU.py b/python/m5/objects/OzoneCPU.py index 3fca61e28..ea8b6b537 100644 --- a/python/m5/objects/OzoneCPU.py +++ b/python/m5/objects/OzoneCPU.py @@ -57,6 +57,7 @@ class DerivOzoneCPU(BaseCPU): commitWidth = Param.Unsigned("Commit width") squashWidth = Param.Unsigned("Squash width") + predType = Param.String("Type of branch predictor ('local', 'tournament')") localPredictorSize = Param.Unsigned("Size of local predictor") localCtrBits = Param.Unsigned("Bits per counter") localHistoryTableSize = Param.Unsigned("Size of local history table") -- 2.30.2