From faf0af7a3546543276bb20ac22fb2d1ff450f36c Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Sun, 30 Aug 2020 02:28:33 -0700 Subject: [PATCH] arch,cpu: Rearrange StaticInst flags for memory barriers. There were three different StaticInst flags for memory barriers, IsMemBarrier, IsReadBarrier, and IsWriteBarrier. IsReadBarrier was never used, and IsMemBarrier was for both loads and stores, so a composite of IsReadBarrier and IsWriteBarrier. This change gets rid of IsMemBarrier and replaces by setting IsReadBarrier and IsWriteBarrier at the same time. An isMemBarrier accessor is left, but is now implemented by checking if both of the other flags are set, and renamed to isFullMemBarrier to make it clear that it's checking both for both types of barrier, not one or the other. Change-Id: I702633a047f4777be4b180b42d62438ca69f52ea Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/33743 Reviewed-by: Gabe Black Maintainer: Gabe Black Tested-by: kokoro --- src/arch/arm/insts/mem64.cc | 1 - src/arch/arm/insts/tme64.cc | 1 - src/arch/arm/isa/insts/amo64.isa | 2 +- src/arch/arm/isa/insts/ldr.isa | 8 +-- src/arch/arm/isa/insts/ldr64.isa | 4 +- src/arch/arm/isa/insts/misc.isa | 5 +- src/arch/arm/isa/insts/misc64.isa | 5 +- src/arch/arm/isa/insts/str.isa | 6 +- src/arch/arm/isa/insts/str64.isa | 3 +- src/arch/arm/isa/templates/semihost.isa | 10 +-- src/arch/mips/isa/decoder.isa | 2 +- src/arch/power/isa/decoder.isa | 4 +- src/arch/riscv/isa/decoder.isa | 2 +- src/arch/riscv/isa/formats/amo.isa | 12 ++-- src/arch/sparc/isa/decoder.isa | 3 +- src/arch/x86/isa/decoder/two_byte_opcodes.isa | 13 ++-- src/arch/x86/isa/microops/specop.isa | 3 +- src/cpu/StaticInstFlags.py | 6 -- src/cpu/base_dyn_inst.hh | 3 +- src/cpu/minor/execute.cc | 6 +- src/cpu/minor/lsq.cc | 4 +- src/cpu/o3/commit_impl.hh | 4 +- src/cpu/o3/iew_impl.hh | 2 +- src/cpu/o3/inst_queue_impl.hh | 6 +- src/cpu/o3/mem_dep_unit_impl.hh | 72 +++++++++++-------- src/cpu/static_inst.hh | 7 +- 26 files changed, 104 insertions(+), 90 deletions(-) diff --git a/src/arch/arm/insts/mem64.cc b/src/arch/arm/insts/mem64.cc index 0ddda956e..a12c33058 100644 --- a/src/arch/arm/insts/mem64.cc +++ b/src/arch/arm/insts/mem64.cc @@ -79,7 +79,6 @@ Memory64::setExcAcRel(bool exclusive, bool acrel) else memAccessFlags |= ArmISA::TLB::AllowUnaligned; if (acrel) { - flags[IsMemBarrier] = true; flags[IsWriteBarrier] = true; flags[IsReadBarrier] = true; } diff --git a/src/arch/arm/insts/tme64.cc b/src/arch/arm/insts/tme64.cc index 30aff208e..f7096d0b2 100644 --- a/src/arch/arm/insts/tme64.cc +++ b/src/arch/arm/insts/tme64.cc @@ -83,7 +83,6 @@ MicroTfence64::MicroTfence64(ExtMachInst machInst) _numVecElemDestRegs = 0; _numIntDestRegs = 0; _numCCDestRegs = 0; - flags[IsMemBarrier] = true; flags[IsMicroop] = true; flags[IsReadBarrier] = true; flags[IsWriteBarrier] = true; diff --git a/src/arch/arm/isa/insts/amo64.isa b/src/arch/arm/isa/insts/amo64.isa index 1fe9b7a1e..51e1f38e5 100644 --- a/src/arch/arm/isa/insts/amo64.isa +++ b/src/arch/arm/isa/insts/amo64.isa @@ -91,7 +91,7 @@ let {{ self.instFlags.append("IsMicroop") if self.flavor in ("release", "acquire_release", "acquire"): - self.instFlags.append("IsMemBarrier") + self.instFlags.extend(["IsReadBarrier", "IsWriteBarrier"]) if self.flavor in ("release", "acquire_release"): self.instFlags.append("IsWriteBarrier") if self.flavor in ("acquire_release", "acquire"): diff --git a/src/arch/arm/isa/insts/ldr.isa b/src/arch/arm/isa/insts/ldr.isa index d7e27a481..3be0e3e90 100644 --- a/src/arch/arm/isa/insts/ldr.isa +++ b/src/arch/arm/isa/insts/ldr.isa @@ -179,9 +179,7 @@ let {{ self.memFlags.append("Request::LLSC") if self.flavor in ("acquire", "acex"): - self.instFlags.extend(["IsMemBarrier", - "IsWriteBarrier", - "IsReadBarrier"]) + self.instFlags.extend(["IsWriteBarrier", "IsReadBarrier"]) self.memFlags.append("Request::ACQUIRE") # Disambiguate the class name for different flavors of loads @@ -260,9 +258,7 @@ let {{ self.Name = "%s_%s" % (self.name.upper(), self.Name) if self.flavor in ("acquire", "acex"): - self.instFlags.extend(["IsMemBarrier", - "IsWriteBarrier", - "IsReadBarrier"]) + self.instFlags.extend(["IsWriteBarrier", "IsReadBarrier"]) self.memFlags.append("Request::ACQUIRE") def emit(self): diff --git a/src/arch/arm/isa/insts/ldr64.isa b/src/arch/arm/isa/insts/ldr64.isa index 51f53897b..1ce34cdd0 100644 --- a/src/arch/arm/isa/insts/ldr64.isa +++ b/src/arch/arm/isa/insts/ldr64.isa @@ -91,9 +91,7 @@ let {{ self.memFlags.append("ArmISA::TLB::AllowUnaligned") if self.flavor in ("acquire", "acex", "acexp"): - self.instFlags.extend(["IsMemBarrier", - "IsWriteBarrier", - "IsReadBarrier"]) + self.instFlags.extend(["IsWriteBarrier", "IsReadBarrier"]) self.memFlags.append("Request::ACQUIRE") if self.flavor in ("acex", "exclusive", "exp", "acexp"): diff --git a/src/arch/arm/isa/insts/misc.isa b/src/arch/arm/isa/insts/misc.isa index 6a9b04804..b2543b3da 100644 --- a/src/arch/arm/isa/insts/misc.isa +++ b/src/arch/arm/isa/insts/misc.isa @@ -1238,7 +1238,8 @@ let {{ dsbIop = InstObjParams("dsb", "Dsb", "ImmOp", {"code": dsbCode, "predicate_test": predicateTest}, - ['IsMemBarrier', 'IsSerializeAfter']) + ['IsReadBarrier', 'IsWriteBarrier', + 'IsSerializeAfter']) header_output += ImmOpDeclare.subst(dsbIop) decoder_output += ImmOpConstructor.subst(dsbIop) exec_output += PredOpExecute.subst(dsbIop) @@ -1254,7 +1255,7 @@ let {{ dmbIop = InstObjParams("dmb", "Dmb", "ImmOp", {"code": dmbCode, "predicate_test": predicateTest}, - ['IsMemBarrier']) + ['IsReadBarrier', 'IsWriteBarrier']) header_output += ImmOpDeclare.subst(dmbIop) decoder_output += ImmOpConstructor.subst(dmbIop) exec_output += PredOpExecute.subst(dmbIop) diff --git a/src/arch/arm/isa/insts/misc64.isa b/src/arch/arm/isa/insts/misc64.isa index 7911ec90d..e8dc41cfc 100644 --- a/src/arch/arm/isa/insts/misc64.isa +++ b/src/arch/arm/isa/insts/misc64.isa @@ -173,13 +173,14 @@ let {{ exec_output += BasicExecute.subst(isbIop) dsbIop = InstObjParams("dsb", "Dsb64", "ArmStaticInst", "", - ['IsMemBarrier', 'IsSerializeAfter']) + ['IsReadBarrier', 'IsWriteBarrier', + 'IsSerializeAfter']) header_output += BasicDeclare.subst(dsbIop) decoder_output += BasicConstructor64.subst(dsbIop) exec_output += BasicExecute.subst(dsbIop) dmbIop = InstObjParams("dmb", "Dmb64", "ArmStaticInst", "", - ['IsMemBarrier']) + ['IsReadBarrier', 'IsWriteBarrier']) header_output += BasicDeclare.subst(dmbIop) decoder_output += BasicConstructor64.subst(dmbIop) exec_output += BasicExecute.subst(dmbIop) diff --git a/src/arch/arm/isa/insts/str.isa b/src/arch/arm/isa/insts/str.isa index e99f6adc4..48bf153dd 100644 --- a/src/arch/arm/isa/insts/str.isa +++ b/src/arch/arm/isa/insts/str.isa @@ -187,8 +187,7 @@ let {{ self.memFlags.append("ArmISA::TLB::AllowUnaligned") if self.flavor in ("release", "relex"): - self.instFlags.extend(["IsMemBarrier", - "IsWriteBarrier", + self.instFlags.extend(["IsWriteBarrier", "IsReadBarrier"]) self.memFlags.append("Request::RELEASE") @@ -269,8 +268,7 @@ let {{ self.memFlags.append("ArmISA::TLB::AlignWord") if self.flavor in ("release", "relex"): - self.instFlags.extend(["IsMemBarrier", - "IsWriteBarrier", + self.instFlags.extend(["IsWriteBarrier", "IsReadBarrier"]) self.memFlags.append("Request::RELEASE") diff --git a/src/arch/arm/isa/insts/str64.isa b/src/arch/arm/isa/insts/str64.isa index ac845336a..ed9906487 100644 --- a/src/arch/arm/isa/insts/str64.isa +++ b/src/arch/arm/isa/insts/str64.isa @@ -79,8 +79,7 @@ let {{ self.instFlags.append("IsMicroop") if self.flavor in ("release", "relex", "relexp"): - self.instFlags.extend(["IsMemBarrier", - "IsWriteBarrier", + self.instFlags.extend(["IsWriteBarrier", "IsReadBarrier"]) self.memFlags.append("Request::RELEASE") diff --git a/src/arch/arm/isa/templates/semihost.isa b/src/arch/arm/isa/templates/semihost.isa index 0ad84c887..c60db1752 100644 --- a/src/arch/arm/isa/templates/semihost.isa +++ b/src/arch/arm/isa/templates/semihost.isa @@ -38,8 +38,8 @@ // A new class of Semihosting constructor templates has been added. // Their main purpose is to check if the Exception Generation // Instructions (HLT, SVC) are actually a semihosting command. -// If that is the case, the IsMemBarrier flag is raised, so that -// in the O3 model we perform a coherent memory access during +// If that is the case, the IsReadBarrier and IsWriteBarrier flags are raised, +// so that in the O3 model we perform a coherent memory access during // the semihosting operation. // Please note: since we don't have a thread context pointer in the // constructor we cannot check if semihosting is enabled in the @@ -64,7 +64,8 @@ def template SemihostConstructor {{ auto semihost_imm = machInst.thumb? %(thumb_semihost)s : %(arm_semihost)s; if (_imm == semihost_imm) { - flags[IsMemBarrier] = true; + flags[IsReadBarrier] = true; + flags[IsWriteBarrier] = true; } } }}; @@ -78,7 +79,8 @@ def template SemihostConstructor64 {{ // In AArch64 there is only one instruction for issuing // semhosting commands: HLT #0xF000 if (_imm == 0xF000) { - flags[IsMemBarrier] = true; + flags[IsReadBarrier] = true; + flags[IsWriteBarrier] = true; } } }}; diff --git a/src/arch/mips/isa/decoder.isa b/src/arch/mips/isa/decoder.isa index 73e2b5dc8..3b2b0156e 100644 --- a/src/arch/mips/isa/decoder.isa +++ b/src/arch/mips/isa/decoder.isa @@ -166,7 +166,7 @@ decode OPCODE_HI default Unknown::unknown() { fault = std::make_shared(); }}); } - 0x7: sync({{ ; }}, IsMemBarrier); + 0x7: sync({{ ; }}, IsReadBarrier, IsWriteBarrier); 0x5: break({{fault = std::make_shared();}}); } diff --git a/src/arch/power/isa/decoder.isa b/src/arch/power/isa/decoder.isa index b7b9afffc..475ddcc00 100644 --- a/src/arch/power/isa/decoder.isa +++ b/src/arch/power/isa/decoder.isa @@ -343,8 +343,8 @@ decode OPCODE default Unknown::unknown() { format MiscOp { 278: dcbt({{ }}); 246: dcbtst({{ }}); - 598: sync({{ }}, [ IsMemBarrier ]); - 854: eieio({{ }}, [ IsMemBarrier ]); + 598: sync({{ }}, [ IsReadBarrier, IsWriteBarrier ]); + 854: eieio({{ }}, [ IsReadBarrier, IsWriteBarrier ]); } } diff --git a/src/arch/riscv/isa/decoder.isa b/src/arch/riscv/isa/decoder.isa index 7b19464a6..b39005ff3 100644 --- a/src/arch/riscv/isa/decoder.isa +++ b/src/arch/riscv/isa/decoder.isa @@ -421,7 +421,7 @@ decode QUADRANT default Unknown::unknown() { 0x03: decode FUNCT3 { format FenceOp { 0x0: fence({{ - }}, uint64_t, IsMemBarrier, No_OpClass); + }}, uint64_t, IsReadBarrier, IsWriteBarrier, No_OpClass); 0x1: fence_i({{ }}, uint64_t, IsNonSpeculative, IsSerializeAfter, No_OpClass); } diff --git a/src/arch/riscv/isa/formats/amo.isa b/src/arch/riscv/isa/formats/amo.isa index 8c7a6a510..7b151bd9c 100644 --- a/src/arch/riscv/isa/formats/amo.isa +++ b/src/arch/riscv/isa/formats/amo.isa @@ -100,7 +100,8 @@ def template LRSCMacroConstructor {{ if (RL) { rel_fence = new MemFenceMicro(machInst, No_OpClass); rel_fence->setFlag(IsFirstMicroop); - rel_fence->setFlag(IsMemBarrier); + rel_fence->setFlag(IsReadBarrier); + rel_fence->setFlag(IsWriteBarrier); rel_fence->setFlag(IsDelayedCommit); } @@ -121,7 +122,8 @@ def template LRSCMacroConstructor {{ if (AQ) { acq_fence = new MemFenceMicro(machInst, No_OpClass); acq_fence->setFlag(IsLastMicroop); - acq_fence->setFlag(IsMemBarrier); + acq_fence->setFlag(IsReadBarrier); + acq_fence->setFlag(IsWriteBarrier); } if (RL && AQ) { @@ -159,7 +161,8 @@ def template AtomicMemOpMacroConstructor {{ if (RL) { rel_fence = new MemFenceMicro(machInst, No_OpClass); rel_fence->setFlag(IsFirstMicroop); - rel_fence->setFlag(IsMemBarrier); + rel_fence->setFlag(IsReadBarrier); + rel_fence->setFlag(IsWriteBarrier); rel_fence->setFlag(IsDelayedCommit); } @@ -180,7 +183,8 @@ def template AtomicMemOpMacroConstructor {{ if (AQ) { acq_fence = new MemFenceMicro(machInst, No_OpClass); acq_fence->setFlag(IsLastMicroop); - acq_fence->setFlag(IsMemBarrier); + acq_fence->setFlag(IsReadBarrier); + acq_fence->setFlag(IsWriteBarrier); } if (RL && AQ) { diff --git a/src/arch/sparc/isa/decoder.isa b/src/arch/sparc/isa/decoder.isa index 75a4d7578..c89a14132 100644 --- a/src/arch/sparc/isa/decoder.isa +++ b/src/arch/sparc/isa/decoder.isa @@ -335,7 +335,8 @@ decode OP default Unknown::unknown() // 7-14 should cause an illegal instruction exception 0x0F: decode I { 0x0: Nop::stbar(IsWriteBarrier, MemWriteOp); - 0x1: Nop::membar(IsMemBarrier, MemReadOp); + 0x1: Nop::membar(IsReadBarrier, IsWriteBarrier, + MemReadOp); } 0x10: Priv::rdpcr({{Rd = Pcr;}}); 0x11: Priv::rdpic({{Rd = Pic;}}, {{Pcr<0:>}}); diff --git a/src/arch/x86/isa/decoder/two_byte_opcodes.isa b/src/arch/x86/isa/decoder/two_byte_opcodes.isa index 5d4514405..e8b1e3d76 100644 --- a/src/arch/x86/isa/decoder/two_byte_opcodes.isa +++ b/src/arch/x86/isa/decoder/two_byte_opcodes.isa @@ -786,13 +786,12 @@ //0x6: group15(); 0x6: decode MODRM_MOD { 0x3: decode MODRM_REG { - 0x5: BasicOperate::LFENCE( - {{/*Nothing*/}}, IsReadBarrier, - IsSerializeAfter); - 0x6: BasicOperate::MFENCE( - {{/*Nothing*/}}, IsMemBarrier); - 0x7: BasicOperate::SFENCE( - {{/*Nothing*/}}, IsWriteBarrier); + 0x5: BasicOperate::LFENCE({{/*Nothing*/}}, + IsReadBarrier, IsSerializeAfter); + 0x6: BasicOperate::MFENCE({{/*Nothing*/}}, + IsReadBarrier, IsWriteBarrier); + 0x7: BasicOperate::SFENCE({{/*Nothing*/}}, + IsWriteBarrier); default: Inst::UD2(); } default: decode MODRM_REG { diff --git a/src/arch/x86/isa/microops/specop.isa b/src/arch/x86/isa/microops/specop.isa index a7dda10cf..1d50569f0 100644 --- a/src/arch/x86/isa/microops/specop.isa +++ b/src/arch/x86/isa/microops/specop.isa @@ -233,7 +233,8 @@ let {{ def __init__(self): self.className = "Mfence" self.mnemonic = "mfence" - self.instFlags = "| (1ULL << StaticInst::IsMemBarrier)" + self.instFlags = "| (1ULL << StaticInst::IsReadBarrier)" + \ + "| (1ULL << StaticInst::IsWriteBarrier)" def getAllocator(self, microFlags): allocString = ''' diff --git a/src/cpu/StaticInstFlags.py b/src/cpu/StaticInstFlags.py index cbdfec394..e11ee6813 100644 --- a/src/cpu/StaticInstFlags.py +++ b/src/cpu/StaticInstFlags.py @@ -40,11 +40,6 @@ from m5.params import * # - If IsControl is set, then exactly one of IsDirectControl or IsIndirect # Control will be set, and exactly one of IsCondControl or IsUncondControl # will be set. -# - IsSerializing, IsMemBarrier, and IsWriteBarrier are implemented as flags -# since in the current model there's no other way for instructions to inject -# behavior into the pipeline outside of fetch. Once we go to an exec-in-exec -# CPU model we should be able to get rid of these flags and implement this -# behavior via the execute() methods. class StaticInstFlags(Enum): wrapper_name = 'StaticInstFlags' @@ -79,7 +74,6 @@ class StaticInstFlags(Enum): # older instructions have committed. 'IsSerializeBefore', 'IsSerializeAfter', - 'IsMemBarrier', # Is a memory barrier 'IsWriteBarrier', # Is a write barrier 'IsReadBarrier', # Is a read barrier diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh index 64ed060ac..fd216b173 100644 --- a/src/cpu/base_dyn_inst.hh +++ b/src/cpu/base_dyn_inst.hh @@ -553,7 +553,8 @@ class BaseDynInst : public ExecContext, public RefCounted return staticInst->isSerializeAfter() || status[SerializeAfter]; } bool isSquashAfter() const { return staticInst->isSquashAfter(); } - bool isMemBarrier() const { return staticInst->isMemBarrier(); } + bool isFullMemBarrier() const { return staticInst->isFullMemBarrier(); } + bool isReadBarrier() const { return staticInst->isReadBarrier(); } bool isWriteBarrier() const { return staticInst->isWriteBarrier(); } bool isNonSpeculative() const { return staticInst->isNonSpeculative(); } bool isQuiesce() const { return staticInst->isQuiesce(); } diff --git a/src/cpu/minor/execute.cc b/src/cpu/minor/execute.cc index f8db5231a..083322499 100644 --- a/src/cpu/minor/execute.cc +++ b/src/cpu/minor/execute.cc @@ -781,7 +781,7 @@ Execute::issue(ThreadID thread_id) /* Mark up barriers in the LSQ */ if (!discarded && inst->isInst() && - inst->staticInst->isMemBarrier()) + inst->staticInst->isFullMemBarrier()) { DPRINTF(MinorMem, "Issuing memory barrier inst: %s\n", *inst); lsq.issuedMemBarrierInst(inst); @@ -951,7 +951,7 @@ Execute::commitInst(MinorDynInstPtr inst, bool early_memory_issue, completed_inst = completed_mem_inst; } completed_mem_issue = completed_inst; - } else if (inst->isInst() && inst->staticInst->isMemBarrier() && + } else if (inst->isInst() && inst->staticInst->isFullMemBarrier() && !lsq.canPushIntoStoreBuffer()) { DPRINTF(MinorExecute, "Can't commit data barrier inst: %s yet as" @@ -1368,7 +1368,7 @@ Execute::commit(ThreadID thread_id, bool only_commit_microops, bool discard, ex_info.inFlightInsts->pop(); /* Complete barriers in the LSQ/move to store buffer */ - if (inst->isInst() && inst->staticInst->isMemBarrier()) { + if (inst->isInst() && inst->staticInst->isFullMemBarrier()) { DPRINTF(MinorMem, "Completing memory barrier" " inst: %s committed: %d\n", *inst, committed_inst); lsq.completeMemBarrierInst(inst, committed_inst); diff --git a/src/cpu/minor/lsq.cc b/src/cpu/minor/lsq.cc index 106b51b6e..d8c1c7a3b 100644 --- a/src/cpu/minor/lsq.cc +++ b/src/cpu/minor/lsq.cc @@ -154,7 +154,7 @@ LSQ::LSQRequest::containsAddrRangeOf(LSQRequestPtr other_request) bool LSQ::LSQRequest::isBarrier() { - return inst->isInst() && inst->staticInst->isMemBarrier(); + return inst->isInst() && inst->staticInst->isFullMemBarrier(); } bool @@ -1711,7 +1711,7 @@ makePacketForRequest(const RequestPtr &request, bool isLoad, void LSQ::issuedMemBarrierInst(MinorDynInstPtr inst) { - assert(inst->isInst() && inst->staticInst->isMemBarrier()); + assert(inst->isInst() && inst->staticInst->isFullMemBarrier()); assert(inst->id.execSeqNum > lastMemBarrier[inst->id.threadId]); /* Remember the barrier. We only have a notion of one diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh index 0d5cbe5db..f6a2e2e6e 100644 --- a/src/cpu/o3/commit_impl.hh +++ b/src/cpu/o3/commit_impl.hh @@ -1196,7 +1196,7 @@ DefaultCommit::commitHead(const DynInstPtr &head_inst, unsigned inst_num) // Make sure we are only trying to commit un-executed instructions we // think are possible. assert(head_inst->isNonSpeculative() || head_inst->isStoreConditional() - || head_inst->isMemBarrier() || head_inst->isWriteBarrier() + || head_inst->isReadBarrier() || head_inst->isWriteBarrier() || head_inst->isAtomic() || (head_inst->isLoad() && head_inst->strictlyOrdered())); @@ -1462,7 +1462,7 @@ DefaultCommit::updateComInstStats(const DynInstPtr &inst) } } - if (inst->isMemBarrier()) { + if (inst->isFullMemBarrier()) { stats.membars[tid]++; } diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh index 497c5321b..b39001dbc 100644 --- a/src/cpu/o3/iew_impl.hh +++ b/src/cpu/o3/iew_impl.hh @@ -1119,7 +1119,7 @@ DefaultIEW::dispatchInsts(ThreadID tid) } toRename->iewInfo[tid].dispatchedToSQ++; - } else if (inst->isMemBarrier() || inst->isWriteBarrier()) { + } else if (inst->isReadBarrier() || inst->isWriteBarrier()) { // Same as non-speculative stores. inst->setCanCommit(); instQueue.insertBarrier(inst); diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh index ff5b3be4f..19ed49a5d 100644 --- a/src/cpu/o3/inst_queue_impl.hh +++ b/src/cpu/o3/inst_queue_impl.hh @@ -1014,7 +1014,7 @@ InstructionQueue::wakeDependents(const DynInstPtr &completed_inst) ++freeEntries; completed_inst->memOpDone(true); count[tid]--; - } else if (completed_inst->isMemBarrier() || + } else if (completed_inst->isReadBarrier() || completed_inst->isWriteBarrier()) { // Completes a non mem ref barrier memDepUnit[tid].completeInst(completed_inst); @@ -1245,7 +1245,7 @@ InstructionQueue::doSquash(ThreadID tid) DPRINTF(IQ, "[tid:%i] Instruction [sn:%llu] PC %s squashed.\n", tid, squashed_inst->seqNum, squashed_inst->pcState()); - bool is_acq_rel = squashed_inst->isMemBarrier() && + bool is_acq_rel = squashed_inst->isFullMemBarrier() && (squashed_inst->isLoad() || (squashed_inst->isStore() && !squashed_inst->isStoreConditional())); @@ -1255,7 +1255,7 @@ InstructionQueue::doSquash(ThreadID tid) (!squashed_inst->isNonSpeculative() && !squashed_inst->isStoreConditional() && !squashed_inst->isAtomic() && - !squashed_inst->isMemBarrier() && + !squashed_inst->isReadBarrier() && !squashed_inst->isWriteBarrier())) { for (int src_reg_idx = 0; diff --git a/src/cpu/o3/mem_dep_unit_impl.hh b/src/cpu/o3/mem_dep_unit_impl.hh index 4be98c51d..7b83efdf4 100644 --- a/src/cpu/o3/mem_dep_unit_impl.hh +++ b/src/cpu/o3/mem_dep_unit_impl.hh @@ -44,6 +44,7 @@ #include #include +#include "base/debug.hh" #include "cpu/o3/inst_queue.hh" #include "cpu/o3/mem_dep_unit.hh" #include "debug/MemDepUnit.hh" @@ -171,24 +172,31 @@ void MemDepUnit::insertBarrierSN(const DynInstPtr &barr_inst) { InstSeqNum barr_sn = barr_inst->seqNum; - // Memory barriers block loads and stores, write barriers only stores. - // Required also for hardware transactional memory commands which - // can have strict ordering semantics - if (barr_inst->isMemBarrier() || barr_inst->isHtmCmd()) { + + if (barr_inst->isReadBarrier() || barr_inst->isHtmCmd()) loadBarrierSNs.insert(barr_sn); + if (barr_inst->isWriteBarrier() || barr_inst->isHtmCmd()) storeBarrierSNs.insert(barr_sn); - DPRINTF(MemDepUnit, "Inserted a memory barrier %s SN:%lli\n", - barr_inst->pcState(), barr_sn); - } else if (barr_inst->isWriteBarrier()) { - storeBarrierSNs.insert(barr_sn); - DPRINTF(MemDepUnit, "Inserted a write barrier %s SN:%lli\n", - barr_inst->pcState(), barr_sn); - } - if (loadBarrierSNs.size() || storeBarrierSNs.size()) { - DPRINTF(MemDepUnit, "Outstanding load barriers = %d; " - "store barriers = %d\n", - loadBarrierSNs.size(), storeBarrierSNs.size()); + if (DTRACE(MemDepUnit)) { + const char *barrier_type = nullptr; + if (barr_inst->isReadBarrier() && barr_inst->isWriteBarrier()) + barrier_type = "memory"; + else if (barr_inst->isReadBarrier()) + barrier_type = "read"; + else if (barr_inst->isWriteBarrier()) + barrier_type = "write"; + + if (barrier_type) { + DPRINTF(MemDepUnit, "Inserted a %s barrier %s SN:%lli\n", + barrier_type, barr_inst->pcState(), barr_sn); + } + + if (loadBarrierSNs.size() || storeBarrierSNs.size()) { + DPRINTF(MemDepUnit, "Outstanding load barriers = %d; " + "store barriers = %d\n", + loadBarrierSNs.size(), storeBarrierSNs.size()); + } } } @@ -444,18 +452,27 @@ MemDepUnit::completeInst(const DynInstPtr &inst) completed(inst); InstSeqNum barr_sn = inst->seqNum; - if (inst->isMemBarrier() || inst->isHtmCmd()) { - assert(hasLoadBarrier()); + if (inst->isWriteBarrier() || inst->isHtmCmd()) { assert(hasStoreBarrier()); - loadBarrierSNs.erase(barr_sn); storeBarrierSNs.erase(barr_sn); - DPRINTF(MemDepUnit, "Memory barrier completed: %s SN:%lli\n", - inst->pcState(), inst->seqNum); - } else if (inst->isWriteBarrier()) { - assert(hasStoreBarrier()); - storeBarrierSNs.erase(barr_sn); - DPRINTF(MemDepUnit, "Write barrier completed: %s SN:%lli\n", - inst->pcState(), inst->seqNum); + } + if (inst->isReadBarrier() || inst->isHtmCmd()) { + assert(hasLoadBarrier()); + loadBarrierSNs.erase(barr_sn); + } + if (DTRACE(MemDepUnit)) { + const char *barrier_type = nullptr; + if (inst->isWriteBarrier() && inst->isReadBarrier()) + barrier_type = "Memory"; + else if (inst->isWriteBarrier()) + barrier_type = "Write"; + else if (inst->isReadBarrier()) + barrier_type = "Read"; + + if (barrier_type) { + DPRINTF(MemDepUnit, "%s barrier completed: %s SN:%lli\n", + barrier_type, inst->pcState(), inst->seqNum); + } } } @@ -463,9 +480,8 @@ template void MemDepUnit::wakeDependents(const DynInstPtr &inst) { - // Only stores, atomics, barriers and - // hardware transactional memory commands have dependents. - if (!inst->isStore() && !inst->isAtomic() && !inst->isMemBarrier() && + // Only stores, atomics and barriers have dependents. + if (!inst->isStore() && !inst->isAtomic() && !inst->isReadBarrier() && !inst->isWriteBarrier() && !inst->isHtmCmd()) { return; } diff --git a/src/cpu/static_inst.hh b/src/cpu/static_inst.hh index f77193a44..258749c4c 100644 --- a/src/cpu/static_inst.hh +++ b/src/cpu/static_inst.hh @@ -185,7 +185,12 @@ class StaticInst : public RefCounted, public StaticInstFlags bool isSerializeBefore() const { return flags[IsSerializeBefore]; } bool isSerializeAfter() const { return flags[IsSerializeAfter]; } bool isSquashAfter() const { return flags[IsSquashAfter]; } - bool isMemBarrier() const { return flags[IsMemBarrier]; } + bool + isFullMemBarrier() const + { + return flags[IsReadBarrier] && flags[IsWriteBarrier]; + } + bool isReadBarrier() const { return flags[IsReadBarrier]; } bool isWriteBarrier() const { return flags[IsWriteBarrier]; } bool isNonSpeculative() const { return flags[IsNonSpeculative]; } bool isQuiesce() const { return flags[IsQuiesce]; } -- 2.30.2