RASSize = Param.Unsigned(16, "RAS size")
instShiftAmt = Param.Unsigned(2, "Number of bits to shift instructions by")
+ useIndirect = Param.Bool(True, "Use indirect branch predictor")
+ indirectHashGHR = Param.Bool(True, "Hash branch predictor GHR")
+ indirectHashTargets = Param.Bool(True, "Hash path history targets")
+ indirectSets = Param.Unsigned(256, "Cache sets for indirect predictor")
+ indirectWays = Param.Unsigned(2, "Ways for indirect predictor")
+ indirectTagSize = Param.Unsigned(16, "Indirect target cache tag bits")
+ indirectPathLength = Param.Unsigned(3,
+ "Previous indirect targets to use for path history")
+
+
class LocalBP(BranchPredictor):
type = 'LocalBP'
SimObject('BranchPredictor.py')
+DebugFlag('Indirect')
Source('bpred_unit.cc')
Source('2bit_local.cc')
Source('btb.cc')
+Source('indirect.cc')
Source('ras.cc')
Source('tournament.cc')
Source ('bi_mode.cc')
delete history;
}
+unsigned
+BiModeBP::getGHR(void *bp_history) const
+{
+ return static_cast<BPHistory*>(bp_history)->globalHistoryReg;
+}
+
void
BiModeBP::updateGlobalHistReg(bool taken)
{
void btbUpdate(Addr branch_addr, void * &bp_history);
void update(Addr branch_addr, bool taken, void *bp_history, bool squashed);
void retireSquashed(void *bp_history);
+ unsigned getGHR(void *bp_history) const;
private:
void updateGlobalHistReg(bool taken);
params->instShiftAmt,
params->numThreads),
RAS(numThreads),
+ useIndirect(params->useIndirect),
+ iPred(params->indirectHashGHR,
+ params->indirectHashTargets,
+ params->indirectSets,
+ params->indirectWays,
+ params->indirectTagSize,
+ params->indirectPathLength,
+ params->instShiftAmt,
+ params->numThreads),
instShiftAmt(params->instShiftAmt)
{
for (auto& r : RAS)
.name(name() + ".RASInCorrect")
.desc("Number of incorrect RAS predictions.")
;
+
+ indirectLookups
+ .name(name() + ".indirectLookups")
+ .desc("Number of indirect predictor lookups.")
+ ;
+
+ indirectHits
+ .name(name() + ".indirectHits")
+ .desc("Number of indirect target hits.")
+ ;
+
+ indirectMisses
+ .name(name() + ".indirectMisses")
+ .desc("Number of indirect misses.")
+ ;
+
+ indirectMispredicted
+ .name(name() + "indirectMispredcited")
+ .desc("Number of mispredicted indirect branches.")
+ ;
+
}
ProbePoints::PMUUPtr
tid, pc, pc, RAS[tid].topIdx());
}
- if (BTB.valid(pc.instAddr(), tid)) {
- ++BTBHits;
-
- // If it's not a return, use the BTB to get the target addr.
- target = BTB.lookup(pc.instAddr(), tid);
-
- DPRINTF(Branch, "[tid:%i]: Instruction %s predicted"
- " target is %s.\n", tid, pc, target);
-
+ if (inst->isDirectCtrl() || !useIndirect) {
+ // Check BTB on direct branches
+ if (BTB.valid(pc.instAddr(), tid)) {
+ ++BTBHits;
+
+ // If it's not a return, use the BTB to get target addr.
+ target = BTB.lookup(pc.instAddr(), tid);
+
+ DPRINTF(Branch, "[tid:%i]: Instruction %s predicted"
+ " target is %s.\n", tid, pc, target);
+
+ } else {
+ DPRINTF(Branch, "[tid:%i]: BTB doesn't have a "
+ "valid entry.\n",tid);
+ pred_taken = false;
+ // The Direction of the branch predictor is altered
+ // because the BTB did not have an entry
+ // The predictor needs to be updated accordingly
+ if (!inst->isCall() && !inst->isReturn()) {
+ btbUpdate(pc.instAddr(), bp_history);
+ DPRINTF(Branch, "[tid:%i]:[sn:%i] btbUpdate"
+ " called for %s\n", tid, seqNum, pc);
+ } else if (inst->isCall() && !inst->isUncondCtrl()) {
+ RAS[tid].pop();
+ predict_record.pushedRAS = false;
+ }
+ TheISA::advancePC(target, inst);
+ }
} else {
- DPRINTF(Branch, "[tid:%i]: BTB doesn't have a "
- "valid entry.\n",tid);
- pred_taken = false;
- // The Direction of the branch predictor is altered because the
- // BTB did not have an entry
- // The predictor needs to be updated accordingly
- if (!inst->isCall() && !inst->isReturn()) {
- btbUpdate(pc.instAddr(), bp_history);
- DPRINTF(Branch, "[tid:%i]:[sn:%i] btbUpdate"
- " called for %s\n", tid, seqNum, pc);
- } else if (inst->isCall() && !inst->isUncondCtrl()) {
- RAS[tid].pop();
- predict_record.pushedRAS = false;
+ predict_record.wasIndirect = true;
+ ++indirectLookups;
+ //Consult indirect predictor on indirect control
+ if (iPred.lookup(pc.instAddr(), getGHR(bp_history), target,
+ tid)) {
+ // Indirect predictor hit
+ ++indirectHits;
+ DPRINTF(Branch, "[tid:%i]: Instruction %s predicted "
+ "indirect target is %s.\n", tid, pc, target);
+ } else {
+ ++indirectMisses;
+ pred_taken = false;
+ DPRINTF(Branch, "[tid:%i]: Instruction %s no indirect "
+ "target.\n", tid, pc);
+ if (!inst->isCall() && !inst->isReturn()) {
+
+ } else if (inst->isCall() && !inst->isUncondCtrl()) {
+ RAS[tid].pop();
+ predict_record.pushedRAS = false;
+ }
+ TheISA::advancePC(target, inst);
}
- TheISA::advancePC(target, inst);
+ iPred.recordIndirect(pc.instAddr(), target.instAddr(), seqNum,
+ tid);
}
}
} else {
DPRINTF(Branch, "[tid:%i]: Committing branches until "
"[sn:%lli].\n", tid, done_sn);
+ iPred.commit(done_sn, tid);
while (!predHist[tid].empty() &&
predHist[tid].back().seqNum <= done_sn) {
// Update the branch predictor with the correct results.
{
History &pred_hist = predHist[tid];
+ iPred.squash(squashed_sn, tid);
while (!pred_hist.empty() &&
pred_hist.front().seqNum > squashed_sn) {
if (pred_hist.front().usedRAS) {
if ((*hist_it).usedRAS) {
++RASIncorrect;
+ DPRINTF(Branch, "[tid:%i]: Incorrect RAS [sn:%i]\n",
+ tid, hist_it->seqNum);
}
+ // Have to get GHR here because the update deletes bpHistory
+ unsigned ghr = getGHR(hist_it->bpHistory);
+
update((*hist_it).pc, actually_taken,
pred_hist.front().bpHistory, true);
hist_it->wasSquashed = true;
RAS[tid].pop();
hist_it->usedRAS = true;
}
+ if (hist_it->wasIndirect) {
+ ++indirectMispredicted;
+ iPred.recordTarget(hist_it->seqNum, ghr, corrTarget, tid);
+ } else {
+ DPRINTF(Branch,"[tid: %i] BTB Update called for [sn:%i]"
+ " PC: %s\n", tid,hist_it->seqNum, hist_it->pc);
- DPRINTF(Branch,"[tid: %i] BTB Update called for [sn:%i]"
- " PC: %s\n", tid,hist_it->seqNum, hist_it->pc);
-
- BTB.update((*hist_it).pc, corrTarget, tid);
-
+ BTB.update((*hist_it).pc, corrTarget, tid);
+ }
} else {
//Actually not Taken
if (hist_it->usedRAS) {
#include "base/statistics.hh"
#include "base/types.hh"
#include "cpu/pred/btb.hh"
+#include "cpu/pred/indirect.hh"
#include "cpu/pred/ras.hh"
#include "cpu/inst_seq.hh"
#include "cpu/static_inst.hh"
void BTBUpdate(Addr instPC, const TheISA::PCState &target)
{ BTB.update(instPC, target, 0); }
+
+ virtual unsigned getGHR(void* bp_history) const { return 0; }
+
void dump();
private:
ThreadID _tid)
: seqNum(seq_num), pc(instPC), bpHistory(bp_history), RASTarget(0),
RASIndex(0), tid(_tid), predTaken(pred_taken), usedRAS(0), pushedRAS(0),
- wasCall(0), wasReturn(0), wasSquashed(0)
+ wasCall(0), wasReturn(0), wasSquashed(0), wasIndirect(0)
{}
bool operator==(const PredictorHistory &entry) const {
/** Whether this instruction has already mispredicted/updated bp */
bool wasSquashed;
+
+ /** Wether this instruction was an indirect branch */
+ bool wasIndirect;
};
typedef std::deque<PredictorHistory> History;
/** The per-thread return address stack. */
std::vector<ReturnAddrStack> RAS;
+ /** Option to disable indirect predictor. */
+ const bool useIndirect;
+
+ /** The indirect target predictor. */
+ IndirectPredictor iPred;
+
/** Stat for number of BP lookups. */
Stats::Scalar lookups;
/** Stat for number of conditional branches predicted. */
/** Stat for number of times the RAS is incorrect. */
Stats::Scalar RASIncorrect;
+ /** Stat for the number of indirect target lookups.*/
+ Stats::Scalar indirectLookups;
+ /** Stat for the number of indirect target hits.*/
+ Stats::Scalar indirectHits;
+ /** Stat for the number of indirect target misses.*/
+ Stats::Scalar indirectMisses;
+ /** Stat for the number of indirect target mispredictions.*/
+ Stats::Scalar indirectMispredicted;
+
protected:
/** Number of bits to shift instructions by for predictor addresses. */
const unsigned instShiftAmt;
--- /dev/null
+/*
+ * Copyright (c) 2014 ARM Limited
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Mitch Hayenga
+ */
+
+#include "cpu/pred/indirect.hh"
+
+#include "base/intmath.hh"
+#include "debug/Indirect.hh"
+
+IndirectPredictor::IndirectPredictor(bool hash_ghr, bool hash_targets,
+ unsigned num_sets, unsigned num_ways,
+ unsigned tag_bits, unsigned path_len, unsigned inst_shift,
+ unsigned num_threads)
+ : hashGHR(hash_ghr), hashTargets(hash_targets),
+ numSets(num_sets), numWays(num_ways), tagBits(tag_bits),
+ pathLength(path_len), instShift(inst_shift)
+{
+ if (!isPowerOf2(numSets)) {
+ panic("Indirect predictor requires power of 2 number of sets");
+ }
+
+ threadInfo.resize(num_threads);
+
+ targetCache.resize(numSets);
+ for (unsigned i = 0; i < numSets; i++) {
+ targetCache[i].resize(numWays);
+ }
+}
+
+bool
+IndirectPredictor::lookup(Addr br_addr, unsigned ghr, TheISA::PCState& target,
+ ThreadID tid)
+{
+ Addr set_index = getSetIndex(br_addr, ghr, tid);
+ Addr tag = getTag(br_addr);
+
+ assert(set_index < numSets);
+
+ DPRINTF(Indirect, "Looking up %x (set:%d)\n", br_addr, set_index);
+ const auto &iset = targetCache[set_index];
+ for (auto way = iset.begin(); way != iset.end(); ++way) {
+ if (way->tag == tag) {
+ DPRINTF(Indirect, "Hit %x (target:%s)\n", br_addr, way->target);
+ target = way->target;
+ return true;
+ }
+ }
+ DPRINTF(Indirect, "Miss %x\n", br_addr);
+ return false;
+}
+
+void
+IndirectPredictor::recordIndirect(Addr br_addr, Addr tgt_addr,
+ InstSeqNum seq_num, ThreadID tid)
+{
+ DPRINTF(Indirect, "Recording %x seq:%d\n", br_addr, seq_num);
+ HistoryEntry entry(br_addr, tgt_addr, seq_num);
+ threadInfo[tid].pathHist.push_back(entry);
+}
+
+void
+IndirectPredictor::commit(InstSeqNum seq_num, ThreadID tid)
+{
+ DPRINTF(Indirect, "Committing seq:%d\n", seq_num);
+ ThreadInfo &t_info = threadInfo[tid];
+
+ if (t_info.pathHist.empty()) return;
+
+ if (t_info.headHistEntry < t_info.pathHist.size() &&
+ t_info.pathHist[t_info.headHistEntry].seqNum <= seq_num) {
+ if (t_info.headHistEntry >= pathLength) {
+ t_info.pathHist.pop_front();
+ } else {
+ ++t_info.headHistEntry;
+ }
+ }
+}
+
+void
+IndirectPredictor::squash(InstSeqNum seq_num, ThreadID tid)
+{
+ DPRINTF(Indirect, "Squashing seq:%d\n", seq_num);
+ ThreadInfo &t_info = threadInfo[tid];
+ auto squash_itr = t_info.pathHist.begin();
+ while (squash_itr != t_info.pathHist.end()) {
+ if (squash_itr->seqNum > seq_num) {
+ break;
+ }
+ ++squash_itr;
+ }
+ if (squash_itr != t_info.pathHist.end()) {
+ DPRINTF(Indirect, "Squashing series starting with sn:%d\n",
+ squash_itr->seqNum);
+ }
+ t_info.pathHist.erase(squash_itr, t_info.pathHist.end());
+}
+
+
+void
+IndirectPredictor::recordTarget(InstSeqNum seq_num, unsigned ghr,
+ const TheISA::PCState& target, ThreadID tid)
+{
+ ThreadInfo &t_info = threadInfo[tid];
+
+ // Should have just squashed so this branch should be the oldest
+ auto hist_entry = *(t_info.pathHist.rbegin());
+ // Temporarily pop it off the history so we can calculate the set
+ t_info.pathHist.pop_back();
+ Addr set_index = getSetIndex(hist_entry.pcAddr, ghr, tid);
+ Addr tag = getTag(hist_entry.pcAddr);
+ hist_entry.targetAddr = target.instAddr();
+ t_info.pathHist.push_back(hist_entry);
+
+ assert(set_index < numSets);
+
+ auto &iset = targetCache[set_index];
+ for (auto way = iset.begin(); way != iset.end(); ++way) {
+ if (way->tag == tag) {
+ DPRINTF(Indirect, "Updating Target (seq: %d br:%x set:%d target:"
+ "%s)\n", seq_num, hist_entry.pcAddr, set_index, target);
+ way->target = target;
+ return;
+ }
+ }
+
+ DPRINTF(Indirect, "Allocating Target (seq: %d br:%x set:%d target:%s)\n",
+ seq_num, hist_entry.pcAddr, set_index, target);
+ // Did not find entry, random replacement
+ auto &way = iset[rand() % numWays];
+ way.tag = tag;
+ way.target = target;
+}
+
+
+inline Addr
+IndirectPredictor::getSetIndex(Addr br_addr, unsigned ghr, ThreadID tid)
+{
+ ThreadInfo &t_info = threadInfo[tid];
+
+ Addr hash = br_addr >> instShift;
+ if (hashGHR) {
+ hash ^= ghr;
+ }
+ if (hashTargets) {
+ unsigned hash_shift = floorLog2(numSets) / pathLength;
+ for (int i = t_info.pathHist.size()-1, p = 0;
+ i >= 0 && p < pathLength; i--, p++) {
+ hash ^= (t_info.pathHist[i].targetAddr >>
+ (instShift + p*hash_shift));
+ }
+ }
+ return hash & (numSets-1);
+}
+
+inline Addr
+IndirectPredictor::getTag(Addr br_addr)
+{
+ return (br_addr >> instShift) & ((0x1<<tagBits)-1);
+}
--- /dev/null
+/*
+ * Copyright (c) 2014 ARM Limited
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Mitch Hayenga
+ */
+
+#ifndef __CPU_PRED_INDIRECT_HH__
+#define __CPU_PRED_INDIRECT_HH__
+
+#include <deque>
+
+#include "arch/isa_traits.hh"
+#include "config/the_isa.hh"
+#include "cpu/inst_seq.hh"
+
+class IndirectPredictor
+{
+ public:
+ IndirectPredictor(bool hash_ghr, bool hash_targets,
+ unsigned num_sets, unsigned num_ways,
+ unsigned tag_bits, unsigned path_len,
+ unsigned inst_shift, unsigned num_threads);
+ bool lookup(Addr br_addr, unsigned ghr, TheISA::PCState& br_target,
+ ThreadID tid);
+ void recordIndirect(Addr br_addr, Addr tgt_addr, InstSeqNum seq_num,
+ ThreadID tid);
+ void commit(InstSeqNum seq_num, ThreadID tid);
+ void squash(InstSeqNum seq_num, ThreadID tid);
+ void recordTarget(InstSeqNum seq_num, unsigned ghr,
+ const TheISA::PCState& target, ThreadID tid);
+
+ private:
+ const bool hashGHR;
+ const bool hashTargets;
+ const unsigned numSets;
+ const unsigned numWays;
+ const unsigned tagBits;
+ const unsigned pathLength;
+ const unsigned instShift;
+
+ struct IPredEntry
+ {
+ IPredEntry() : tag(0), target(0) { }
+ Addr tag;
+ TheISA::PCState target;
+ };
+
+ std::vector<std::vector<IPredEntry> > targetCache;
+
+ Addr getSetIndex(Addr br_addr, unsigned ghr, ThreadID tid);
+ Addr getTag(Addr br_addr);
+
+ struct HistoryEntry
+ {
+ HistoryEntry(Addr br_addr, Addr tgt_addr, InstSeqNum seq_num)
+ : pcAddr(br_addr), targetAddr(tgt_addr), seqNum(seq_num) { }
+ Addr pcAddr;
+ Addr targetAddr;
+ InstSeqNum seqNum;
+ };
+
+
+ struct ThreadInfo {
+ ThreadInfo() : headHistEntry(0) { }
+
+ std::deque<HistoryEntry> pathHist;
+ unsigned headHistEntry;
+ };
+
+ std::vector<ThreadInfo> threadInfo;
+};
+
+#endif // __CPU_PRED_INDIRECT_HH__
return new TournamentBP(this);
}
+unsigned
+TournamentBP::getGHR(void *bp_history) const
+{
+ return static_cast<BPHistory *>(bp_history)->globalHistory;
+}
+
#ifdef DEBUG
int
TournamentBP::BPHistory::newCount = 0;
*/
void squash(void *bp_history);
+ unsigned getGHR(void *bp_history) const;
+
/** Returns the global history. */
inline unsigned readGlobalHist() { return globalHistory; }