From b6203360ef684a8dc32981221336f5d216ce2668 Mon Sep 17 00:00:00 2001 From: Ali Saidi Date: Fri, 19 Aug 2011 15:08:07 -0500 Subject: [PATCH] LSQ: Set store predictor to periodically clear itself as recommended in the storesets paper. This patch improves performance by as much as 10% on some spec benchmarks. --- src/cpu/o3/O3CPU.py | 2 ++ src/cpu/o3/mem_dep_unit_impl.hh | 9 ++++++--- src/cpu/o3/store_set.cc | 25 ++++++++++++++++++++++--- src/cpu/o3/store_set.hh | 18 ++++++++++++++++-- 4 files changed, 46 insertions(+), 8 deletions(-) diff --git a/src/cpu/o3/O3CPU.py b/src/cpu/o3/O3CPU.py index f379fcd8a..47b18a3ec 100644 --- a/src/cpu/o3/O3CPU.py +++ b/src/cpu/o3/O3CPU.py @@ -121,6 +121,8 @@ class DerivO3CPU(BaseCPU): LSQDepCheckShift = Param.Unsigned(4, "Number of places to shift addr before check") LSQCheckLoads = Param.Bool(True, "Should dependency violations be checked for loads & stores or just stores") + store_set_clear_period = Param.Unsigned(250000, + "Number of load/store insts before the dep predictor should be invalidated") LFSTSize = Param.Unsigned(1024, "Last fetched store table size") SSITSize = Param.Unsigned(1024, "Store set ID table size") diff --git a/src/cpu/o3/mem_dep_unit_impl.hh b/src/cpu/o3/mem_dep_unit_impl.hh index 0208a622e..d30dcbd3d 100644 --- a/src/cpu/o3/mem_dep_unit_impl.hh +++ b/src/cpu/o3/mem_dep_unit_impl.hh @@ -45,8 +45,10 @@ MemDepUnit::MemDepUnit() template MemDepUnit::MemDepUnit(DerivO3CPUParams *params) : _name(params->name + ".memdepunit"), - depPred(params->SSITSize, params->LFSTSize), loadBarrier(false), - loadBarrierSN(0), storeBarrier(false), storeBarrierSN(0), iqPtr(NULL) + depPred(params->store_set_clear_period, params->SSITSize, + params->LFSTSize), + loadBarrier(false), loadBarrierSN(0), storeBarrier(false), + storeBarrierSN(0), iqPtr(NULL) { DPRINTF(MemDepUnit, "Creating MemDepUnit object.\n"); } @@ -85,7 +87,8 @@ MemDepUnit::init(DerivO3CPUParams *params, ThreadID tid) _name = csprintf("%s.memDep%d", params->name, tid); id = tid; - depPred.init(params->SSITSize, params->LFSTSize); + depPred.init(params->store_set_clear_period, params->SSITSize, + params->LFSTSize); } template diff --git a/src/cpu/o3/store_set.cc b/src/cpu/o3/store_set.cc index fc87c417e..acd4a8d0a 100644 --- a/src/cpu/o3/store_set.cc +++ b/src/cpu/o3/store_set.cc @@ -34,8 +34,8 @@ #include "cpu/o3/store_set.hh" #include "debug/StoreSet.hh" -StoreSet::StoreSet(int _SSIT_size, int _LFST_size) - : SSITSize(_SSIT_size), LFSTSize(_LFST_size) +StoreSet::StoreSet(uint64_t clear_period, int _SSIT_size, int _LFST_size) + : clearPeriod(clear_period), SSITSize(_SSIT_size), LFSTSize(_LFST_size) { DPRINTF(StoreSet, "StoreSet: Creating store set object.\n"); DPRINTF(StoreSet, "StoreSet: SSIT size: %i, LFST size: %i.\n", @@ -68,6 +68,8 @@ StoreSet::StoreSet(int _SSIT_size, int _LFST_size) indexMask = SSITSize - 1; offsetBits = 2; + + memOpsPred = 0; } StoreSet::~StoreSet() @@ -75,10 +77,11 @@ StoreSet::~StoreSet() } void -StoreSet::init(int _SSIT_size, int _LFST_size) +StoreSet::init(uint64_t clear_period, int _SSIT_size, int _LFST_size) { SSITSize = _SSIT_size; LFSTSize = _LFST_size; + clearPeriod = clear_period; DPRINTF(StoreSet, "StoreSet: Creating store set object.\n"); DPRINTF(StoreSet, "StoreSet: SSIT size: %i, LFST size: %i.\n", @@ -103,6 +106,8 @@ StoreSet::init(int _SSIT_size, int _LFST_size) indexMask = SSITSize - 1; offsetBits = 2; + + memOpsPred = 0; } @@ -179,9 +184,22 @@ StoreSet::violation(Addr store_PC, Addr load_PC) } } +void +StoreSet::checkClear() +{ + memOpsPred++; + if (memOpsPred > clearPeriod) { + DPRINTF(StoreSet, "Wiping predictor state beacuse %d ld/st executed\n", + clearPeriod); + memOpsPred = 0; + clear(); + } +} + void StoreSet::insertLoad(Addr load_PC, InstSeqNum load_seq_num) { + checkClear(); // Does nothing. return; } @@ -193,6 +211,7 @@ StoreSet::insertStore(Addr store_PC, InstSeqNum store_seq_num, ThreadID tid) int store_SSID; + checkClear(); assert(index < SSITSize); if (!validSSIT[index]) { diff --git a/src/cpu/o3/store_set.hh b/src/cpu/o3/store_set.hh index ce4591f68..973b83b42 100644 --- a/src/cpu/o3/store_set.hh +++ b/src/cpu/o3/store_set.hh @@ -63,18 +63,24 @@ class StoreSet StoreSet() { }; /** Creates store set predictor with given table sizes. */ - StoreSet(int SSIT_size, int LFST_size); + StoreSet(uint64_t clear_period, int SSIT_size, int LFST_size); /** Default destructor. */ ~StoreSet(); /** Initializes the store set predictor with the given table sizes. */ - void init(int SSIT_size, int LFST_size); + void init(uint64_t clear_period, int SSIT_size, int LFST_size); /** Records a memory ordering violation between the younger load * and the older store. */ void violation(Addr store_PC, Addr load_PC); + /** Clears the store set predictor every so often so that all the + * entries aren't used and stores are constantly predicted as + * conflicting. + */ + void checkClear(); + /** Inserts a load into the store set predictor. This does nothing but * is included in case other predictors require a similar function. */ @@ -130,6 +136,11 @@ class StoreSet typedef std::map::iterator SeqNumMapIt; + /** Number of loads/stores to process before wiping predictor so all + * entries don't get saturated + */ + uint64_t clearPeriod; + /** Store Set ID Table size, in entries. */ int SSITSize; @@ -141,6 +152,9 @@ class StoreSet // HACK: Hardcoded for now. int offsetBits; + + /** Number of memory operations predicted since last clear of predictor */ + int memOpsPred; }; #endif // __CPU_O3_STORE_SET_HH__ -- 2.30.2