From 4f4846c5327e4780bd8e714bada80bd15dc1fc51 Mon Sep 17 00:00:00 2001 From: Tuan Ta Date: Mon, 12 Feb 2018 23:13:34 -0500 Subject: [PATCH] riscv: fix AMO, LR and SC instructions (1) Atomic Memory Operation (AMO) This patch changes how RISC-V AMO instructions are implemented. For each AMO, instead of issuing a locking load and an unlocking store request to downstream memory system, this patch issues a single memory request that contains a corresponding AtomicOpFunctor to the memory system. Once the memory system receives the request, the atomic operation is executed in one single step. This patch also changes how AMO instructions handle acquire and release flags in AMOs (e.g., amoadd.aq and amoadd.rl). If an AMO is associated with an acquire flag, a memory fence is inserted after the AMO completes as a micro-op. If an AMO is associated with a release flag, another memory fence is inserted before the AMO executes. If both flags are specified, the AMO is broken down into a sequence of 3 micro-ops: mem fence -> atomic RMW -> mem fence. This change makes this AMO implementation comply to the release consistency model. (2) Load-Reserved (LR) and Store-Conditional (SC) Addresses locked by LR instructions are tracked in a stack data structure. LR instruction pushes its target address to the stack, and SC instruction pops the top address from the stack. As specified by RISC-V ISA, a SC fails if its target address does not match with the most recent LR. Previously, there was a single stack for all hardware thread contexts. A shared stack between thread contexts can lead to a infinite sequence of failed SCs if LRs from other threads keep pushing new addresses to this stack. This patch gives each context its private stack to address the problem. This patch also adds extra memory fence micro-ops to lr/sc to guarantee a correct execution order of memory instructions with respect to release consistency model. Change-Id: I1e95900367c89dd866ba872a5203f63359ac51ae Reviewed-on: https://gem5-review.googlesource.com/c/8189 Reviewed-by: Alec Roelke Maintainer: Alec Roelke --- src/arch/riscv/insts/amo.cc | 43 +++- src/arch/riscv/insts/amo.hh | 65 +++++- src/arch/riscv/isa/decoder.isa | 202 ++++++++++------ src/arch/riscv/isa/formats/amo.isa | 355 +++++++++++++++++++++-------- src/arch/riscv/locked_mem.cc | 4 +- src/arch/riscv/locked_mem.hh | 27 ++- 6 files changed, 505 insertions(+), 191 deletions(-) diff --git a/src/arch/riscv/insts/amo.cc b/src/arch/riscv/insts/amo.cc index 7f5740f14..d12064720 100644 --- a/src/arch/riscv/insts/amo.cc +++ b/src/arch/riscv/insts/amo.cc @@ -43,6 +43,22 @@ using namespace std; namespace RiscvISA { +// memfence micro instruction +string MemFenceMicro::generateDisassembly(Addr pc, + const SymbolTable *symtab) const +{ + stringstream ss; + ss << csprintf("0x%08x", machInst) << ' ' << mnemonic; + return ss.str(); +} + +Fault MemFenceMicro::execute(ExecContext *xc, + Trace::InstRecord *traceData) const +{ + return NoFault; +} + +// load-reserved string LoadReserved::generateDisassembly(Addr pc, const SymbolTable *symtab) const { @@ -52,6 +68,16 @@ string LoadReserved::generateDisassembly(Addr pc, return ss.str(); } +string LoadReservedMicro::generateDisassembly(Addr pc, + const SymbolTable *symtab) const +{ + stringstream ss; + ss << mnemonic << ' ' << registerName(_destRegIdx[0]) << ", (" + << registerName(_srcRegIdx[0]) << ')'; + return ss.str(); +} + +// store-conditional string StoreCond::generateDisassembly(Addr pc, const SymbolTable *symtab) const { @@ -62,6 +88,17 @@ string StoreCond::generateDisassembly(Addr pc, return ss.str(); } +string StoreCondMicro::generateDisassembly(Addr pc, + const SymbolTable *symtab) const +{ + stringstream ss; + ss << mnemonic << ' ' << registerName(_destRegIdx[0]) << ", " + << registerName(_srcRegIdx[1]) << ", (" + << registerName(_srcRegIdx[0]) << ')'; + return ss.str(); +} + +// AMOs string AtomicMemOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const { @@ -76,8 +113,10 @@ string AtomicMemOpMicro::generateDisassembly(Addr pc, const SymbolTable *symtab) const { stringstream ss; - ss << csprintf("0x%08x", machInst) << ' ' << mnemonic; + ss << mnemonic << ' ' << registerName(_destRegIdx[0]) << ", " + << registerName(_srcRegIdx[1]) << ", (" + << registerName(_srcRegIdx[0]) << ')'; return ss.str(); } -} \ No newline at end of file +} diff --git a/src/arch/riscv/insts/amo.hh b/src/arch/riscv/insts/amo.hh index 7c07bc243..748fe14a3 100644 --- a/src/arch/riscv/insts/amo.hh +++ b/src/arch/riscv/insts/amo.hh @@ -41,24 +41,62 @@ namespace RiscvISA { -class LoadReserved : public MemInst +// memfence micro instruction +class MemFenceMicro : public RiscvMicroInst { + public: + MemFenceMicro(ExtMachInst _machInst, OpClass __opClass) + : RiscvMicroInst("fence", _machInst, __opClass) + { } protected: - using MemInst::MemInst; + using RiscvMicroInst::RiscvMicroInst; + Fault execute(ExecContext *, Trace::InstRecord *) const override; std::string generateDisassembly( Addr pc, const SymbolTable *symtab) const override; }; -class StoreCond : public MemInst +// load-reserved +class LoadReserved : public RiscvMacroInst { protected: - using MemInst::MemInst; + using RiscvMacroInst::RiscvMacroInst; std::string generateDisassembly( Addr pc, const SymbolTable *symtab) const override; }; +class LoadReservedMicro : public RiscvMicroInst +{ + protected: + Request::Flags memAccessFlags; + using RiscvMicroInst::RiscvMicroInst; + + std::string generateDisassembly( + Addr pc, const SymbolTable *symtab) const override; +}; + +// store-cond +class StoreCond : public RiscvMacroInst +{ + protected: + using RiscvMacroInst::RiscvMacroInst; + + std::string generateDisassembly( + Addr pc, const SymbolTable *symtab) const override; +}; + +class StoreCondMicro : public RiscvMicroInst +{ + protected: + Request::Flags memAccessFlags; + using RiscvMicroInst::RiscvMicroInst; + + std::string generateDisassembly( + Addr pc, const SymbolTable *symtab) const override; +}; + +// AMOs class AtomicMemOp : public RiscvMacroInst { protected: @@ -78,6 +116,23 @@ class AtomicMemOpMicro : public RiscvMicroInst Addr pc, const SymbolTable *symtab) const override; }; +/** + * A generic atomic op class + */ + +template +class AtomicGenericOp : public TypedAtomicOpFunctor +{ + public: + AtomicGenericOp(T _a, std::function _op) + : a(_a), op(_op) { } + AtomicOpFunctor* clone() { return new AtomicGenericOp(*this); } + void execute(T *b) { op(b, a); } + private: + T a; + std::function op; +}; + } -#endif // __ARCH_RISCV_INSTS_AMO_HH__ \ No newline at end of file +#endif // __ARCH_RISCV_INSTS_AMO_HH__ diff --git a/src/arch/riscv/isa/decoder.isa b/src/arch/riscv/isa/decoder.isa index 3a04a02de..8de4829a6 100644 --- a/src/arch/riscv/isa/decoder.isa +++ b/src/arch/riscv/isa/decoder.isa @@ -512,44 +512,69 @@ decode QUADRANT default Unknown::unknown() { }}, {{ Rd = result; }}, inst_flags=IsStoreConditional, mem_flags=LLSC); - format AtomicMemOp { - 0x0: amoadd_w({{Rt_sd = Mem_sw;}}, {{ - Mem_sw = Rs2_sw + Rt_sd; - Rd_sd = Rt_sd; - }}, {{EA = Rs1;}}); - 0x1: amoswap_w({{Rt_sd = Mem_sw;}}, {{ - Mem_sw = Rs2_uw; - Rd_sd = Rt_sd; - }}, {{EA = Rs1;}}); - 0x4: amoxor_w({{Rt_sd = Mem_sw;}}, {{ - Mem_sw = Rs2_uw^Rt_sd; - Rd_sd = Rt_sd; - }}, {{EA = Rs1;}}); - 0x8: amoor_w({{Rt_sd = Mem_sw;}}, {{ - Mem_sw = Rs2_uw | Rt_sd; - Rd_sd = Rt_sd; - }}, {{EA = Rs1;}}); - 0xc: amoand_w({{Rt_sd = Mem_sw;}}, {{ - Mem_sw = Rs2_uw&Rt_sd; - Rd_sd = Rt_sd; - }}, {{EA = Rs1;}}); - 0x10: amomin_w({{Rt_sd = Mem_sw;}}, {{ - Mem_sw = min(Rs2_sw, Rt_sd); - Rd_sd = Rt_sd; - }}, {{EA = Rs1;}}); - 0x14: amomax_w({{Rt_sd = Mem_sw;}}, {{ - Mem_sw = max(Rs2_sw, Rt_sd); - Rd_sd = Rt_sd; - }}, {{EA = Rs1;}}); - 0x18: amominu_w({{Rt_sd = Mem_sw;}}, {{ - Mem_sw = min(Rs2_uw, Rt_sd); - Rd_sd = Rt_sd; - }}, {{EA = Rs1;}}); - 0x1c: amomaxu_w({{Rt_sd = Mem_sw;}}, {{ - Mem_sw = max(Rs2_uw, Rt_sd); - Rd_sd = Rt_sd; - }}, {{EA = Rs1;}}); - } + 0x0: AtomicMemOp::amoadd_w({{ + Rd_sd = Mem_sw; + }}, {{ + TypedAtomicOpFunctor *amo_op = + new AtomicGenericOp(Rs2_sw, + [](int32_t* b, int32_t a){ *b += a; }); + }}, mem_flags=ATOMIC_RETURN_OP); + 0x1: AtomicMemOp::amoswap_w({{ + Rd_sd = Mem_sw; + }}, {{ + TypedAtomicOpFunctor *amo_op = + new AtomicGenericOp(Rs2_uw, + [](uint32_t* b, uint32_t a){ *b = a; }); + }}, mem_flags=ATOMIC_RETURN_OP); + 0x4: AtomicMemOp::amoxor_w({{ + Rd_sd = Mem_sw; + }}, {{ + TypedAtomicOpFunctor *amo_op = + new AtomicGenericOp(Rs2_uw, + [](uint32_t* b, uint32_t a){ *b ^= a; }); + }}, mem_flags=ATOMIC_RETURN_OP); + 0x8: AtomicMemOp::amoor_w({{ + Rd_sd = Mem_sw; + }}, {{ + TypedAtomicOpFunctor *amo_op = + new AtomicGenericOp(Rs2_uw, + [](uint32_t* b, uint32_t a){ *b |= a; }); + }}, mem_flags=ATOMIC_RETURN_OP); + 0xc: AtomicMemOp::amoand_w({{ + Rd_sd = Mem_sw; + }}, {{ + TypedAtomicOpFunctor *amo_op = + new AtomicGenericOp(Rs2_uw, + [](uint32_t* b, uint32_t a){ *b &= a; }); + }}, mem_flags=ATOMIC_RETURN_OP); + 0x10: AtomicMemOp::amomin_w({{ + Rd_sd = Mem_sw; + }}, {{ + TypedAtomicOpFunctor *amo_op = + new AtomicGenericOp(Rs2_sw, + [](int32_t* b, int32_t a){ if (a < *b) *b = a; }); + }}, mem_flags=ATOMIC_RETURN_OP); + 0x14: AtomicMemOp::amomax_w({{ + Rd_sd = Mem_sw; + }}, {{ + TypedAtomicOpFunctor *amo_op = + new AtomicGenericOp(Rs2_sw, + [](int32_t* b, int32_t a){ if (a > *b) *b = a; }); + }}, mem_flags=ATOMIC_RETURN_OP); + 0x18: AtomicMemOp::amominu_w({{ + Rd_sd = Mem_sw; + }}, {{ + TypedAtomicOpFunctor *amo_op = + new AtomicGenericOp(Rs2_uw, + [](uint32_t* b, uint32_t a){ if (a < *b) *b = a; }); + }}, mem_flags=ATOMIC_RETURN_OP); + 0x1c: AtomicMemOp::amomaxu_w({{ + Rd_sd = Mem_sw; + }}, {{ + TypedAtomicOpFunctor *amo_op = + new AtomicGenericOp(Rs2_uw, + [](uint32_t* b, uint32_t a){ if (a > *b) *b = a; }); + }}, mem_flags=ATOMIC_RETURN_OP); } 0x3: decode AMOFUNCT { 0x2: LoadReserved::lr_d({{ @@ -560,44 +585,69 @@ decode QUADRANT default Unknown::unknown() { }}, {{ Rd = result; }}, mem_flags=LLSC, inst_flags=IsStoreConditional); - format AtomicMemOp { - 0x0: amoadd_d({{Rt_sd = Mem_sd;}}, {{ - Mem_sd = Rs2_sd + Rt_sd; - Rd_sd = Rt_sd; - }}, {{EA = Rs1;}}); - 0x1: amoswap_d({{Rt = Mem;}}, {{ - Mem = Rs2; - Rd = Rt; - }}, {{EA = Rs1;}}); - 0x4: amoxor_d({{Rt = Mem;}}, {{ - Mem = Rs2^Rt; - Rd = Rt; - }}, {{EA = Rs1;}}); - 0x8: amoor_d({{Rt = Mem;}}, {{ - Mem = Rs2 | Rt; - Rd = Rt; - }}, {{EA = Rs1;}}); - 0xc: amoand_d({{Rt = Mem;}}, {{ - Mem = Rs2&Rt; - Rd = Rt; - }}, {{EA = Rs1;}}); - 0x10: amomin_d({{Rt_sd = Mem_sd;}}, {{ - Mem_sd = min(Rs2_sd, Rt_sd); - Rd_sd = Rt_sd; - }}, {{EA = Rs1;}}); - 0x14: amomax_d({{Rt_sd = Mem_sd;}}, {{ - Mem_sd = max(Rs2_sd, Rt_sd); - Rd_sd = Rt_sd; - }}, {{EA = Rs1;}}); - 0x18: amominu_d({{Rt = Mem;}}, {{ - Mem = min(Rs2, Rt); - Rd = Rt; - }}, {{EA = Rs1;}}); - 0x1c: amomaxu_d({{Rt = Mem;}}, {{ - Mem = max(Rs2, Rt); - Rd = Rt; - }}, {{EA = Rs1;}}); - } + 0x0: AtomicMemOp::amoadd_d({{ + Rd_sd = Mem_sd; + }}, {{ + TypedAtomicOpFunctor *amo_op = + new AtomicGenericOp(Rs2_sd, + [](int64_t* b, int64_t a){ *b += a; }); + }}, mem_flags=ATOMIC_RETURN_OP); + 0x1: AtomicMemOp::amoswap_d({{ + Rd_sd = Mem_sd; + }}, {{ + TypedAtomicOpFunctor *amo_op = + new AtomicGenericOp(Rs2_ud, + [](uint64_t* b, uint64_t a){ *b = a; }); + }}, mem_flags=ATOMIC_RETURN_OP); + 0x4: AtomicMemOp::amoxor_d({{ + Rd_sd = Mem_sd; + }}, {{ + TypedAtomicOpFunctor *amo_op = + new AtomicGenericOp(Rs2_ud, + [](uint64_t* b, uint64_t a){ *b ^= a; }); + }}, mem_flags=ATOMIC_RETURN_OP); + 0x8: AtomicMemOp::amoor_d({{ + Rd_sd = Mem_sd; + }}, {{ + TypedAtomicOpFunctor *amo_op = + new AtomicGenericOp(Rs2_ud, + [](uint64_t* b, uint64_t a){ *b |= a; }); + }}, mem_flags=ATOMIC_RETURN_OP); + 0xc: AtomicMemOp::amoand_d({{ + Rd_sd = Mem_sd; + }}, {{ + TypedAtomicOpFunctor *amo_op = + new AtomicGenericOp(Rs2_ud, + [](uint64_t* b, uint64_t a){ *b &= a; }); + }}, mem_flags=ATOMIC_RETURN_OP); + 0x10: AtomicMemOp::amomin_d({{ + Rd_sd = Mem_sd; + }}, {{ + TypedAtomicOpFunctor *amo_op = + new AtomicGenericOp(Rs2_sd, + [](int64_t* b, int64_t a){ if (a < *b) *b = a; }); + }}, mem_flags=ATOMIC_RETURN_OP); + 0x14: AtomicMemOp::amomax_d({{ + Rd_sd = Mem_sd; + }}, {{ + TypedAtomicOpFunctor *amo_op = + new AtomicGenericOp(Rs2_sd, + [](int64_t* b, int64_t a){ if (a > *b) *b = a; }); + }}, mem_flags=ATOMIC_RETURN_OP); + 0x18: AtomicMemOp::amominu_d({{ + Rd_sd = Mem_sd; + }}, {{ + TypedAtomicOpFunctor *amo_op = + new AtomicGenericOp(Rs2_ud, + [](uint64_t* b, uint64_t a){ if (a < *b) *b = a; }); + }}, mem_flags=ATOMIC_RETURN_OP); + 0x1c: AtomicMemOp::amomaxu_d({{ + Rd_sd = Mem_sd; + }}, {{ + TypedAtomicOpFunctor *amo_op = + new AtomicGenericOp(Rs2_ud, + [](uint64_t* b, uint64_t a){ if (a > *b) *b = a; }); + }}, mem_flags=ATOMIC_RETURN_OP); } } 0x0c: decode FUNCT3 { diff --git a/src/arch/riscv/isa/formats/amo.isa b/src/arch/riscv/isa/formats/amo.isa index 1dca57191..cc7346aa8 100644 --- a/src/arch/riscv/isa/formats/amo.isa +++ b/src/arch/riscv/isa/formats/amo.isa @@ -29,10 +29,7 @@ // // Authors: Alec Roelke -//////////////////////////////////////////////////////////////////// -// -// Atomic memory operation instructions -// +// Declaration templates def template AtomicMemOpDeclare {{ /** * Static instruction class for an AtomicMemOp operation @@ -45,11 +42,14 @@ def template AtomicMemOpDeclare {{ protected: - class %(class_name)sLoad : public %(base_class)sMicro + /* + * The main RMW part of an AMO + */ + class %(class_name)sRMW : public %(base_class)sMicro { public: // Constructor - %(class_name)sLoad(ExtMachInst machInst, %(class_name)s *_p); + %(class_name)sRMW(ExtMachInst machInst, %(class_name)s *_p); Fault execute(ExecContext *, Trace::InstRecord *) const override; Fault initiateAcc(ExecContext *, @@ -57,12 +57,26 @@ def template AtomicMemOpDeclare {{ Fault completeAcc(PacketPtr, ExecContext *, Trace::InstRecord *) const override; }; + }; +}}; + +def template LRSCDeclare {{ + /** + * Static instruction class for an AtomicMemOp operation + */ + class %(class_name)s : public %(base_class)s + { + public: + // Constructor + %(class_name)s(ExtMachInst machInst); + + protected: - class %(class_name)sStore : public %(base_class)sMicro + class %(class_name)sMicro : public %(base_class)sMicro { public: // Constructor - %(class_name)sStore(ExtMachInst machInst, %(class_name)s *_p); + %(class_name)sMicro(ExtMachInst machInst, %(class_name)s *_p); Fault execute(ExecContext *, Trace::InstRecord *) const override; Fault initiateAcc(ExecContext *, @@ -73,15 +87,63 @@ def template AtomicMemOpDeclare {{ }; }}; -def template LRSCConstructor {{ +// Constructor templates +def template LRSCMacroConstructor {{ %(class_name)s::%(class_name)s(ExtMachInst machInst): %(base_class)s("%(mnemonic)s", machInst, %(op_class)s) { %(constructor)s; - if (AQ) - memAccessFlags = memAccessFlags | Request::ACQUIRE; - if (RL) - memAccessFlags = memAccessFlags | Request::RELEASE; + + StaticInstPtr rel_fence; + StaticInstPtr lrsc; + StaticInstPtr acq_fence; + + // set up release fence + if (RL) { + rel_fence = new MemFenceMicro(machInst, No_OpClass); + rel_fence->setFlag(IsFirstMicroop); + rel_fence->setFlag(IsMemBarrier); + rel_fence->setFlag(IsDelayedCommit); + } + + // set up atomic rmw op + lrsc = new %(class_name)sMicro(machInst, this); + + if (!RL) { + lrsc->setFlag(IsFirstMicroop); + } + + if (!AQ) { + lrsc->setFlag(IsLastMicroop); + } else { + lrsc->setFlag(IsDelayedCommit); + } + + // set up acquire fence + if (AQ) { + acq_fence = new MemFenceMicro(machInst, No_OpClass); + acq_fence->setFlag(IsLastMicroop); + acq_fence->setFlag(IsMemBarrier); + } + + if (RL && AQ) { + microops = {rel_fence, lrsc, acq_fence}; + } else if (RL) { + microops = {rel_fence, lrsc}; + } else if (AQ) { + microops = {lrsc, acq_fence}; + } else { + microops = {lrsc}; + } + } +}}; + +def template LRSCMicroConstructor {{ + %(class_name)s::%(class_name)sMicro::%(class_name)sMicro( + ExtMachInst machInst, %(class_name)s *_p) + : %(base_class)sMicro("%(mnemonic)s", machInst, %(op_class)s) + { + %(constructor)s; } }}; @@ -90,39 +152,95 @@ def template AtomicMemOpMacroConstructor {{ : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s) { %(constructor)s; - microops = {new %(class_name)sLoad(machInst, this), - new %(class_name)sStore(machInst, this)}; + + StaticInstPtr rel_fence; + StaticInstPtr rmw_op; + StaticInstPtr acq_fence; + + // set up release fence + if (RL) { + rel_fence = new MemFenceMicro(machInst, No_OpClass); + rel_fence->setFlag(IsFirstMicroop); + rel_fence->setFlag(IsMemBarrier); + rel_fence->setFlag(IsDelayedCommit); + } + + // set up atomic rmw op + rmw_op = new %(class_name)sRMW(machInst, this); + + if (!RL) { + rmw_op->setFlag(IsFirstMicroop); + } + + if (!AQ) { + rmw_op->setFlag(IsLastMicroop); + } else { + rmw_op->setFlag(IsDelayedCommit); + } + + // set up acquire fence + if (AQ) { + acq_fence = new MemFenceMicro(machInst, No_OpClass); + acq_fence->setFlag(IsLastMicroop); + acq_fence->setFlag(IsMemBarrier); + } + + if (RL && AQ) { + microops = {rel_fence, rmw_op, acq_fence}; + } else if (RL) { + microops = {rel_fence, rmw_op}; + } else if (AQ) { + microops = {rmw_op, acq_fence}; + } else { + microops = {rmw_op}; + } } }}; -def template AtomicMemOpLoadConstructor {{ - %(class_name)s::%(class_name)sLoad::%(class_name)sLoad( +def template AtomicMemOpRMWConstructor {{ + %(class_name)s::%(class_name)sRMW::%(class_name)sRMW( ExtMachInst machInst, %(class_name)s *_p) : %(base_class)s("%(mnemonic)s[l]", machInst, %(op_class)s) { %(constructor)s; - flags[IsFirstMicroop] = true; - flags[IsDelayedCommit] = true; - if (AQ) - memAccessFlags = Request::ACQUIRE; + + // overwrite default flags + flags[IsMemRef] = true; + flags[IsLoad] = false; + flags[IsStore] = false; + flags[IsAtomic] = true; } }}; -def template AtomicMemOpStoreConstructor {{ - %(class_name)s::%(class_name)sStore::%(class_name)sStore( - ExtMachInst machInst, %(class_name)s *_p) - : %(base_class)s("%(mnemonic)s[s]", machInst, %(op_class)s) +// execute() templates + +def template LoadReservedExecute {{ + Fault + %(class_name)s::%(class_name)sMicro::execute( + ExecContext *xc, Trace::InstRecord *traceData) const { - %(constructor)s; - flags[IsLastMicroop] = true; - flags[IsNonSpeculative] = true; - if (RL) - memAccessFlags = Request::RELEASE; + Addr EA; + Fault fault = NoFault; + + %(op_decl)s; + %(op_rd)s; + %(ea_code)s; + + if (fault == NoFault) { + fault = readMemAtomic(xc, traceData, EA, Mem, memAccessFlags); + %(memacc_code)s; + } + + if (fault == NoFault) { + %(op_wb)s; + } + + return fault; } }}; def template StoreCondExecute {{ - Fault %(class_name)s::execute(ExecContext *xc, + Fault %(class_name)s::%(class_name)sMicro::execute(ExecContext *xc, Trace::InstRecord *traceData) const { Addr EA; @@ -157,8 +275,8 @@ def template StoreCondExecute {{ } }}; -def template AtomicMemOpLoadExecute {{ - Fault %(class_name)s::%(class_name)sLoad::execute(ExecContext *xc, +def template AtomicMemOpRMWExecute {{ + Fault %(class_name)s::%(class_name)sRMW::execute(ExecContext *xc, Trace::InstRecord *traceData) const { Addr EA; @@ -167,13 +285,18 @@ def template AtomicMemOpLoadExecute {{ %(op_decl)s; %(op_rd)s; %(ea_code)s; + %(amoop_code)s; + + assert(amo_op); if (fault == NoFault) { - fault = readMemAtomic(xc, traceData, EA, Mem, memAccessFlags); + fault = amoMemAtomic(xc, traceData, Mem, EA, memAccessFlags, + amo_op); + %(memacc_code)s; } if (fault == NoFault) { - %(code)s; + %(postacc_code)s; } if (fault == NoFault) { @@ -184,8 +307,31 @@ def template AtomicMemOpLoadExecute {{ } }}; -def template AtomicMemOpStoreExecute {{ - Fault %(class_name)s::%(class_name)sStore::execute(ExecContext *xc, +// initiateAcc() templates + +def template LoadReservedInitiateAcc {{ + Fault + %(class_name)s::%(class_name)sMicro::initiateAcc(ExecContext *xc, + Trace::InstRecord *traceData) const + { + Addr EA; + Fault fault = NoFault; + + %(op_src_decl)s; + %(op_rd)s; + %(ea_code)s; + + if (fault == NoFault) { + fault = initiateMemRead(xc, traceData, EA, Mem, memAccessFlags); + } + + return fault; + } +}}; + +def template StoreCondInitiateAcc {{ + Fault + %(class_name)s::%(class_name)sMicro::initiateAcc(ExecContext *xc, Trace::InstRecord *traceData) const { Addr EA; @@ -196,12 +342,12 @@ def template AtomicMemOpStoreExecute {{ %(ea_code)s; if (fault == NoFault) { - %(code)s; + %(memacc_code)s; } if (fault == NoFault) { - fault = writeMemAtomic(xc, traceData, Mem, EA, memAccessFlags, - nullptr); + fault = writeMemTiming(xc, traceData, Mem, EA, + memAccessFlags, nullptr); } if (fault == NoFault) { @@ -212,8 +358,9 @@ def template AtomicMemOpStoreExecute {{ } }}; -def template AtomicMemOpLoadInitiateAcc {{ - Fault %(class_name)s::%(class_name)sLoad::initiateAcc(ExecContext *xc, +def template AtomicMemOpRMWInitiateAcc {{ + Fault + %(class_name)s::%(class_name)sRMW::initiateAcc(ExecContext *xc, Trace::InstRecord *traceData) const { Addr EA; @@ -222,33 +369,35 @@ def template AtomicMemOpLoadInitiateAcc {{ %(op_src_decl)s; %(op_rd)s; %(ea_code)s; + %(amoop_code)s; + + assert(amo_op); if (fault == NoFault) { - fault = initiateMemRead(xc, traceData, EA, Mem, memAccessFlags); + fault = initiateMemAMO(xc, traceData, EA, Mem, memAccessFlags, + amo_op); } return fault; } }}; -def template AtomicMemOpStoreInitiateAcc {{ - Fault %(class_name)s::%(class_name)sStore::initiateAcc( +// completeAcc() templates + +def template LoadReservedCompleteAcc {{ + Fault + %(class_name)s::%(class_name)sMicro::completeAcc(PacketPtr pkt, ExecContext *xc, Trace::InstRecord *traceData) const { - Addr EA; Fault fault = NoFault; %(op_decl)s; %(op_rd)s; - %(ea_code)s; - if (fault == NoFault) { - %(code)s; - } + getMem(pkt, Mem, traceData); if (fault == NoFault) { - fault = writeMemTiming(xc, traceData, Mem, EA, memAccessFlags, - nullptr); + %(memacc_code)s; } if (fault == NoFault) { @@ -260,8 +409,8 @@ def template AtomicMemOpStoreInitiateAcc {{ }}; def template StoreCondCompleteAcc {{ - Fault %(class_name)s::completeAcc(Packet *pkt, ExecContext *xc, - Trace::InstRecord *traceData) const + Fault %(class_name)s::%(class_name)sMicro::completeAcc(Packet *pkt, + ExecContext *xc, Trace::InstRecord *traceData) const { Fault fault = NoFault; @@ -283,8 +432,8 @@ def template StoreCondCompleteAcc {{ } }}; -def template AtomicMemOpLoadCompleteAcc {{ - Fault %(class_name)s::%(class_name)sLoad::completeAcc(PacketPtr pkt, +def template AtomicMemOpRMWCompleteAcc {{ + Fault %(class_name)s::%(class_name)sRMW::completeAcc(Packet *pkt, ExecContext *xc, Trace::InstRecord *traceData) const { Fault fault = NoFault; @@ -295,7 +444,7 @@ def template AtomicMemOpLoadCompleteAcc {{ getMem(pkt, Mem, traceData); if (fault == NoFault) { - %(code)s; + %(memacc_code)s; } if (fault == NoFault) { @@ -306,16 +455,20 @@ def template AtomicMemOpLoadCompleteAcc {{ } }}; -def template AtomicMemOpStoreCompleteAcc {{ - Fault %(class_name)s::%(class_name)sStore::completeAcc(PacketPtr pkt, - ExecContext *xc, Trace::InstRecord *traceData) const - { - return NoFault; - } -}}; +// LR/SC/AMO decode formats def format LoadReserved(memacc_code, postacc_code={{ }}, ea_code={{EA = Rs1;}}, mem_flags=[], inst_flags=[]) {{ + macro_ea_code = '' + macro_inst_flags = [] + macro_iop = InstObjParams(name, Name, 'LoadReserved', macro_ea_code, + macro_inst_flags) + header_output = LRSCDeclare.subst(macro_iop) + decoder_output = LRSCMacroConstructor.subst(macro_iop) + decode_block = BasicDecode.subst(macro_iop) + + exec_output = '' + mem_flags = makeList(mem_flags) inst_flags = makeList(inst_flags) iop = InstObjParams(name, Name, 'LoadReserved', @@ -324,16 +477,25 @@ def format LoadReserved(memacc_code, postacc_code={{ }}, ea_code={{EA = Rs1;}}, iop.constructor += '\n\tmemAccessFlags = memAccessFlags | ' + \ '|'.join(['Request::%s' % flag for flag in mem_flags]) + ';' - header_output = LoadStoreDeclare.subst(iop) - decoder_output = LRSCConstructor.subst(iop) - decode_block = BasicDecode.subst(iop) - exec_output = LoadExecute.subst(iop) \ - + LoadInitiateAcc.subst(iop) \ - + LoadCompleteAcc.subst(iop) + decoder_output += LRSCMicroConstructor.subst(iop) + decode_block += BasicDecode.subst(iop) + exec_output += LoadReservedExecute.subst(iop) \ + + LoadReservedInitiateAcc.subst(iop) \ + + LoadReservedCompleteAcc.subst(iop) }}; def format StoreCond(memacc_code, postacc_code={{ }}, ea_code={{EA = Rs1;}}, mem_flags=[], inst_flags=[]) {{ + macro_ea_code = '' + macro_inst_flags = [] + macro_iop = InstObjParams(name, Name, 'StoreCond', macro_ea_code, + macro_inst_flags) + header_output = LRSCDeclare.subst(macro_iop) + decoder_output = LRSCMacroConstructor.subst(macro_iop) + decode_block = BasicDecode.subst(macro_iop) + + exec_output = '' + mem_flags = makeList(mem_flags) inst_flags = makeList(inst_flags) iop = InstObjParams(name, Name, 'StoreCond', @@ -342,37 +504,40 @@ def format StoreCond(memacc_code, postacc_code={{ }}, ea_code={{EA = Rs1;}}, iop.constructor += '\n\tmemAccessFlags = memAccessFlags | ' + \ '|'.join(['Request::%s' % flag for flag in mem_flags]) + ';' - header_output = LoadStoreDeclare.subst(iop) - decoder_output = LRSCConstructor.subst(iop) - decode_block = BasicDecode.subst(iop) - exec_output = StoreCondExecute.subst(iop) \ - + StoreInitiateAcc.subst(iop) \ + decoder_output += LRSCMicroConstructor.subst(iop) + decode_block += BasicDecode.subst(iop) + exec_output += StoreCondExecute.subst(iop) \ + + StoreCondInitiateAcc.subst(iop) \ + StoreCondCompleteAcc.subst(iop) }}; -def format AtomicMemOp(load_code, store_code, ea_code, load_flags=[], - store_flags=[], inst_flags=[]) {{ - macro_iop = InstObjParams(name, Name, 'AtomicMemOp', ea_code, inst_flags) +def format AtomicMemOp(memacc_code, amoop_code, postacc_code={{ }}, + ea_code={{EA = Rs1;}}, mem_flags=[], inst_flags=[]) {{ + macro_ea_code = '' + macro_inst_flags = [] + macro_iop = InstObjParams(name, Name, 'AtomicMemOp', macro_ea_code, + macro_inst_flags) header_output = AtomicMemOpDeclare.subst(macro_iop) decoder_output = AtomicMemOpMacroConstructor.subst(macro_iop) decode_block = BasicDecode.subst(macro_iop) + exec_output = '' - load_inst_flags = makeList(inst_flags) + ["IsMemRef", "IsLoad"] - load_iop = InstObjParams(name, Name, 'AtomicMemOpMicro', - {'ea_code': ea_code, 'code': load_code, 'op_name': 'Load'}, - load_inst_flags) - decoder_output += AtomicMemOpLoadConstructor.subst(load_iop) - exec_output += AtomicMemOpLoadExecute.subst(load_iop) \ - + AtomicMemOpLoadInitiateAcc.subst(load_iop) \ - + AtomicMemOpLoadCompleteAcc.subst(load_iop) - - store_inst_flags = makeList(inst_flags) + ["IsMemRef", "IsStore"] - store_iop = InstObjParams(name, Name, 'AtomicMemOpMicro', - {'ea_code': ea_code, 'code': store_code, 'op_name': 'Store'}, - store_inst_flags) - decoder_output += AtomicMemOpStoreConstructor.subst(store_iop) - exec_output += AtomicMemOpStoreExecute.subst(store_iop) \ - + AtomicMemOpStoreInitiateAcc.subst(store_iop) \ - + AtomicMemOpStoreCompleteAcc.subst(store_iop) + rmw_mem_flags = makeList(mem_flags) + rmw_inst_flags = makeList(inst_flags) + rmw_iop = InstObjParams(name, Name, 'AtomicMemOpMicro', + {'ea_code': ea_code, + 'memacc_code': memacc_code, + 'postacc_code': postacc_code, + 'amoop_code': amoop_code}, + rmw_inst_flags) + + rmw_iop.constructor += '\n\tmemAccessFlags = memAccessFlags | ' + \ + '|'.join(['Request::%s' % flag for flag in rmw_mem_flags]) + ';' + + decoder_output += AtomicMemOpRMWConstructor.subst(rmw_iop) + decode_block += BasicDecode.subst(rmw_iop) + exec_output += AtomicMemOpRMWExecute.subst(rmw_iop) \ + + AtomicMemOpRMWInitiateAcc.subst(rmw_iop) \ + + AtomicMemOpRMWCompleteAcc.subst(rmw_iop) }}; diff --git a/src/arch/riscv/locked_mem.cc b/src/arch/riscv/locked_mem.cc index 3c8dbe948..957cffba3 100644 --- a/src/arch/riscv/locked_mem.cc +++ b/src/arch/riscv/locked_mem.cc @@ -6,7 +6,5 @@ namespace RiscvISA { - -std::stack locked_addrs; - + std::unordered_map> locked_addrs; } diff --git a/src/arch/riscv/locked_mem.hh b/src/arch/riscv/locked_mem.hh index b1cde34c6..08d27f15c 100644 --- a/src/arch/riscv/locked_mem.hh +++ b/src/arch/riscv/locked_mem.hh @@ -49,6 +49,7 @@ #define __ARCH_RISCV_LOCKED_MEM_HH__ #include +#include #include "arch/registers.hh" #include "base/logging.hh" @@ -67,24 +68,28 @@ const int WARN_FAILURE = 10000; // RISC-V allows multiple locks per hart, but each SC has to unlock the most // recent one, so we use a stack here. -extern std::stack locked_addrs; +extern std::unordered_map> locked_addrs; template inline void handleLockedSnoop(XC *xc, PacketPtr pkt, Addr cacheBlockMask) { - if (locked_addrs.empty()) + std::stack& locked_addr_stack = locked_addrs[xc->contextId()]; + + if (locked_addr_stack.empty()) return; Addr snoop_addr = pkt->getAddr() & cacheBlockMask; DPRINTF(LLSC, "Locked snoop on address %x.\n", snoop_addr); - if ((locked_addrs.top() & cacheBlockMask) == snoop_addr) - locked_addrs.pop(); + if ((locked_addr_stack.top() & cacheBlockMask) == snoop_addr) + locked_addr_stack.pop(); } template inline void handleLockedRead(XC *xc, const RequestPtr &req) { - locked_addrs.push(req->getPaddr() & ~0xF); + std::stack& locked_addr_stack = locked_addrs[xc->contextId()]; + + locked_addr_stack.push(req->getPaddr() & ~0xF); DPRINTF(LLSC, "[cid:%d]: Reserved address %x.\n", req->contextId(), req->getPaddr() & ~0xF); } @@ -96,21 +101,23 @@ handleLockedSnoopHit(XC *xc) template inline bool handleLockedWrite(XC *xc, const RequestPtr &req, Addr cacheBlockMask) { + std::stack& locked_addr_stack = locked_addrs[xc->contextId()]; + // Normally RISC-V uses zero to indicate success and nonzero to indicate // failure (right now only 1 is reserved), but in gem5 zero indicates // failure and one indicates success, so here we conform to that (it should // be switched in the instruction's implementation) DPRINTF(LLSC, "[cid:%d]: locked_addrs empty? %s.\n", req->contextId(), - locked_addrs.empty() ? "yes" : "no"); - if (!locked_addrs.empty()) { + locked_addr_stack.empty() ? "yes" : "no"); + if (!locked_addr_stack.empty()) { DPRINTF(LLSC, "[cid:%d]: addr = %x.\n", req->contextId(), req->getPaddr() & ~0xF); DPRINTF(LLSC, "[cid:%d]: last locked addr = %x.\n", req->contextId(), - locked_addrs.top()); + locked_addr_stack.top()); } - if (locked_addrs.empty() - || locked_addrs.top() != ((req->getPaddr() & ~0xF))) { + if (locked_addr_stack.empty() + || locked_addr_stack.top() != ((req->getPaddr() & ~0xF))) { req->setExtraData(0); int stCondFailures = xc->readStCondFailures(); xc->setStCondFailures(++stCondFailures); -- 2.30.2