From e2a5063e5f18f902833c84894b0ff103e3371493 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Tiago=20M=C3=BCck?= Date: Thu, 18 Jul 2019 12:20:10 -0500 Subject: [PATCH] cpu-o3: MemDepUnit tracks load-acquire/store-release MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit MemDepUnit tracks loads/stores that are also barriers, which is the case of load-acquire / store-release instructions. The tracking logic is also extended to consider multiple outstanding barriers. Change-Id: I95b0c710d7c7e4a138492177e3eaaf5143e9a0ba Signed-off-by: Tiago Mück Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/27132 Reviewed-by: Daniel Carvalho Reviewed-by: Anthony Gutierrez Maintainer: Jason Lowe-Power Tested-by: kokoro --- src/cpu/o3/mem_dep_unit.hh | 27 ++++-- src/cpu/o3/mem_dep_unit_impl.hh | 161 +++++++++++++++++--------------- 2 files changed, 103 insertions(+), 85 deletions(-) diff --git a/src/cpu/o3/mem_dep_unit.hh b/src/cpu/o3/mem_dep_unit.hh index c4a3310cd..3d24b1f13 100644 --- a/src/cpu/o3/mem_dep_unit.hh +++ b/src/cpu/o3/mem_dep_unit.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012, 2014 ARM Limited + * Copyright (c) 2012, 2014, 2020 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -45,6 +45,7 @@ #include #include #include +#include #include "base/statistics.hh" #include "cpu/inst_seq.hh" @@ -177,7 +178,7 @@ class MemDepUnit public: /** Constructs a memory dependence entry. */ MemDepEntry(const DynInstPtr &new_inst) - : inst(new_inst), regsReady(false), memDepReady(false), + : inst(new_inst), regsReady(false), memDeps(0), completed(false), squashed(false) { #ifdef DEBUG @@ -216,8 +217,8 @@ class MemDepUnit /** If the registers are ready or not. */ bool regsReady; - /** If all memory dependencies have been satisfied. */ - bool memDepReady; + /** Number of memory dependencies that need to be satisfied. */ + int memDeps; /** If the instruction is completed. */ bool completed; /** If the instruction is squashed. */ @@ -257,14 +258,20 @@ class MemDepUnit */ MemDepPred depPred; + /** Sequence numbers of outstanding load barriers. */ + std::unordered_set loadBarrierSNs; + + /** Sequence numbers of outstanding store barriers. */ + std::unordered_set storeBarrierSNs; + /** Is there an outstanding load barrier that loads must wait on. */ - bool loadBarrier; - /** The sequence number of the load barrier. */ - InstSeqNum loadBarrierSN; + bool hasLoadBarrier() const { return !loadBarrierSNs.empty(); } + /** Is there an outstanding store barrier that loads must wait on. */ - bool storeBarrier; - /** The sequence number of the store barrier. */ - InstSeqNum storeBarrierSN; + bool hasStoreBarrier() const { return !storeBarrierSNs.empty(); } + + /** Inserts the SN of a barrier inst. to the list of tracked barriers */ + void insertBarrierSN(const DynInstPtr &barr_inst); /** Pointer to the IQ. */ InstructionQueue *iqPtr; diff --git a/src/cpu/o3/mem_dep_unit_impl.hh b/src/cpu/o3/mem_dep_unit_impl.hh index c71296590..9a50341b4 100644 --- a/src/cpu/o3/mem_dep_unit_impl.hh +++ b/src/cpu/o3/mem_dep_unit_impl.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012, 2014 ARM Limited + * Copyright (c) 2012, 2014, 2020 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -42,6 +42,7 @@ #define __CPU_O3_MEM_DEP_UNIT_IMPL_HH__ #include +#include #include "cpu/o3/inst_queue.hh" #include "cpu/o3/mem_dep_unit.hh" @@ -50,8 +51,7 @@ template MemDepUnit::MemDepUnit() - : loadBarrier(false), loadBarrierSN(0), storeBarrier(false), - storeBarrierSN(0), iqPtr(NULL) + : iqPtr(NULL) { } @@ -60,8 +60,7 @@ MemDepUnit::MemDepUnit(DerivO3CPUParams *params) : _name(params->name + ".memdepunit"), depPred(params->store_set_clear_period, params->SSITSize, params->LFSTSize), - loadBarrier(false), loadBarrierSN(0), storeBarrier(false), - storeBarrierSN(0), iqPtr(NULL) + iqPtr(NULL) { DPRINTF(MemDepUnit, "Creating MemDepUnit object.\n"); } @@ -155,8 +154,8 @@ void MemDepUnit::takeOverFrom() { // Be sure to reset all state. - loadBarrier = storeBarrier = false; - loadBarrierSN = storeBarrierSN = 0; + loadBarrierSNs.clear(); + storeBarrierSNs.clear(); depPred.clear(); } @@ -167,6 +166,29 @@ MemDepUnit::setIQ(InstructionQueue *iq_ptr) iqPtr = iq_ptr; } +template +void +MemDepUnit::insertBarrierSN(const DynInstPtr &barr_inst) +{ + InstSeqNum barr_sn = barr_inst->seqNum; + // Memory barriers block loads and stores, write barriers only stores. + if (barr_inst->isMemBarrier()) { + loadBarrierSNs.insert(barr_sn); + storeBarrierSNs.insert(barr_sn); + DPRINTF(MemDepUnit, "Inserted a memory barrier %s SN:%lli\n", + barr_inst->pcState(), barr_sn); + } else if (barr_inst->isWriteBarrier()) { + storeBarrierSNs.insert(barr_sn); + DPRINTF(MemDepUnit, "Inserted a write barrier %s SN:%lli\n", + barr_inst->pcState(), barr_sn); + } + if (loadBarrierSNs.size() || storeBarrierSNs.size()) { + DPRINTF(MemDepUnit, "Outstanding load barriers = %d; " + "store barriers = %d\n", + loadBarrierSNs.size(), storeBarrierSNs.size()); + } +} + template void MemDepUnit::insert(const DynInstPtr &inst) @@ -188,39 +210,46 @@ MemDepUnit::insert(const DynInstPtr &inst) // Check any barriers and the dependence predictor for any // producing memrefs/stores. - InstSeqNum producing_store; - if ((inst->isLoad() || inst->isAtomic()) && loadBarrier) { - DPRINTF(MemDepUnit, "Load barrier [sn:%lli] in flight\n", - loadBarrierSN); - producing_store = loadBarrierSN; - } else if ((inst->isStore() || inst->isAtomic()) && storeBarrier) { - DPRINTF(MemDepUnit, "Store barrier [sn:%lli] in flight\n", - storeBarrierSN); - producing_store = storeBarrierSN; + std::vector producing_stores; + if ((inst->isLoad() || inst->isAtomic()) && hasLoadBarrier()) { + DPRINTF(MemDepUnit, "%d load barriers in flight\n", + loadBarrierSNs.size()); + producing_stores.insert(std::end(producing_stores), + std::begin(loadBarrierSNs), + std::end(loadBarrierSNs)); + } else if ((inst->isStore() || inst->isAtomic()) && hasStoreBarrier()) { + DPRINTF(MemDepUnit, "%d store barriers in flight\n", + storeBarrierSNs.size()); + producing_stores.insert(std::end(producing_stores), + std::begin(storeBarrierSNs), + std::end(storeBarrierSNs)); } else { - producing_store = depPred.checkInst(inst->instAddr()); + InstSeqNum dep = depPred.checkInst(inst->instAddr()); + if (dep != 0) + producing_stores.push_back(dep); } - MemDepEntryPtr store_entry = NULL; + std::vector store_entries; // If there is a producing store, try to find the entry. - if (producing_store != 0) { - DPRINTF(MemDepUnit, "Searching for producer\n"); + for (auto producing_store : producing_stores) { + DPRINTF(MemDepUnit, "Searching for producer [sn:%lli]\n", + producing_store); MemDepHashIt hash_it = memDepHash.find(producing_store); if (hash_it != memDepHash.end()) { - store_entry = (*hash_it).second; - DPRINTF(MemDepUnit, "Proucer found\n"); + store_entries.push_back((*hash_it).second); + DPRINTF(MemDepUnit, "Producer found\n"); } } // If no store entry, then instruction can issue as soon as the registers // are ready. - if (!store_entry) { + if (store_entries.empty()) { DPRINTF(MemDepUnit, "No dependency for inst PC " "%s [sn:%lli].\n", inst->pcState(), inst->seqNum); - inst_entry->memDepReady = true; + assert(inst_entry->memDeps == 0); if (inst->readyToIssue()) { inst_entry->regsReady = true; @@ -229,8 +258,9 @@ MemDepUnit::insert(const DynInstPtr &inst) } } else { // Otherwise make the instruction dependent on the store/barrier. - DPRINTF(MemDepUnit, "Adding to dependency list; " - "inst PC %s is dependent on [sn:%lli].\n", + DPRINTF(MemDepUnit, "Adding to dependency list\n"); + for (auto producing_store : producing_stores) + DPRINTF(MemDepUnit, "\tinst PC %s is dependent on [sn:%lli].\n", inst->pcState(), producing_store); if (inst->readyToIssue()) { @@ -241,7 +271,10 @@ MemDepUnit::insert(const DynInstPtr &inst) inst->clearCanIssue(); // Add this instruction to the list of dependents. - store_entry->dependInsts.push_back(inst_entry); + for (auto store_entry : store_entries) + store_entry->dependInsts.push_back(inst_entry); + + inst_entry->memDeps = store_entries.size(); if (inst->isLoad()) { ++conflictingLoads; @@ -250,6 +283,9 @@ MemDepUnit::insert(const DynInstPtr &inst) } } + // for load-acquire store-release that could also be a barrier + insertBarrierSN(inst); + if (inst->isStore() || inst->isAtomic()) { DPRINTF(MemDepUnit, "Inserting store/atomic PC %s [sn:%lli].\n", inst->pcState(), inst->seqNum); @@ -268,21 +304,7 @@ template void MemDepUnit::insertNonSpec(const DynInstPtr &inst) { - ThreadID tid = inst->threadNumber; - - MemDepEntryPtr inst_entry = std::make_shared(inst); - - // Insert the MemDepEntry into the hash. - memDepHash.insert( - std::pair(inst->seqNum, inst_entry)); -#ifdef DEBUG - MemDepEntry::memdep_insert++; -#endif - - // Add the instruction to the list. - instList[tid].push_back(inst); - - inst_entry->listIt = --(instList[tid].end()); + insertBarrier(inst); // Might want to turn this part into an inline function or something. // It's shared between both insert functions. @@ -304,28 +326,13 @@ template void MemDepUnit::insertBarrier(const DynInstPtr &barr_inst) { - InstSeqNum barr_sn = barr_inst->seqNum; - // Memory barriers block loads and stores, write barriers only stores. - if (barr_inst->isMemBarrier()) { - loadBarrier = true; - loadBarrierSN = barr_sn; - storeBarrier = true; - storeBarrierSN = barr_sn; - DPRINTF(MemDepUnit, "Inserted a memory barrier %s SN:%lli\n", - barr_inst->pcState(),barr_sn); - } else if (barr_inst->isWriteBarrier()) { - storeBarrier = true; - storeBarrierSN = barr_sn; - DPRINTF(MemDepUnit, "Inserted a write barrier\n"); - } - ThreadID tid = barr_inst->threadNumber; MemDepEntryPtr inst_entry = std::make_shared(barr_inst); // Add the MemDepEntry to the hash. memDepHash.insert( - std::pair(barr_sn, inst_entry)); + std::pair(barr_inst->seqNum, inst_entry)); #ifdef DEBUG MemDepEntry::memdep_insert++; #endif @@ -334,6 +341,8 @@ MemDepUnit::insertBarrier(const DynInstPtr &barr_inst) instList[tid].push_back(barr_inst); inst_entry->listIt = --(instList[tid].end()); + + insertBarrierSN(barr_inst); } template @@ -348,7 +357,7 @@ MemDepUnit::regsReady(const DynInstPtr &inst) inst_entry->regsReady = true; - if (inst_entry->memDepReady) { + if (inst_entry->memDeps == 0) { DPRINTF(MemDepUnit, "Instruction has its memory " "dependencies resolved, adding it to the ready list.\n"); @@ -430,18 +439,19 @@ MemDepUnit::completeBarrier(const DynInstPtr &inst) { wakeDependents(inst); completed(inst); - InstSeqNum barr_sn = inst->seqNum; - DPRINTF(MemDepUnit, "barrier completed: %s SN:%lli\n", inst->pcState(), - inst->seqNum); if (inst->isMemBarrier()) { - if (loadBarrierSN == barr_sn) - loadBarrier = false; - if (storeBarrierSN == barr_sn) - storeBarrier = false; + assert(hasLoadBarrier()); + assert(hasStoreBarrier()); + loadBarrierSNs.erase(barr_sn); + storeBarrierSNs.erase(barr_sn); + DPRINTF(MemDepUnit, "Memory barrier completed: %s SN:%lli\n", + inst->pcState(), inst->seqNum); } else if (inst->isWriteBarrier()) { - if (storeBarrierSN == barr_sn) - storeBarrier = false; + assert(hasStoreBarrier()); + storeBarrierSNs.erase(barr_sn); + DPRINTF(MemDepUnit, "Write barrier completed: %s SN:%lli\n", + inst->pcState(), inst->seqNum); } } @@ -469,10 +479,13 @@ MemDepUnit::wakeDependents(const DynInstPtr &inst) "[sn:%lli].\n", woken_inst->inst->seqNum); - if (woken_inst->regsReady && !woken_inst->squashed) { + assert(woken_inst->memDeps > 0); + woken_inst->memDeps -= 1; + + if ((woken_inst->memDeps == 0) && + woken_inst->regsReady && + !woken_inst->squashed) { moveToReady(woken_inst); - } else { - woken_inst->memDepReady = true; } } @@ -507,11 +520,9 @@ MemDepUnit::squash(const InstSeqNum &squashed_num, DPRINTF(MemDepUnit, "Squashing inst [sn:%lli]\n", (*squash_it)->seqNum); - if ((*squash_it)->seqNum == loadBarrierSN) - loadBarrier = false; + loadBarrierSNs.erase((*squash_it)->seqNum); - if ((*squash_it)->seqNum == storeBarrierSN) - storeBarrier = false; + storeBarrierSNs.erase((*squash_it)->seqNum); hash_it = memDepHash.find((*squash_it)->seqNum); -- 2.30.2