riscv: fix AMO, LR and SC instructions
authorTuan Ta <qtt2@cornell.edu>
Tue, 13 Feb 2018 04:13:34 +0000 (23:13 -0500)
committerTuan Ta <qtt2@cornell.edu>
Fri, 8 Feb 2019 15:27:04 +0000 (15:27 +0000)
(1) Atomic Memory Operation (AMO)

This patch changes how RISC-V AMO instructions are implemented. For each
AMO, instead of issuing a locking load and an unlocking store request to
downstream memory system, this patch issues a single memory request that
contains a corresponding AtomicOpFunctor to the memory system. Once the
memory system receives the request, the atomic operation is executed in
one single step.

This patch also changes how AMO instructions handle acquire and release
flags in AMOs (e.g., amoadd.aq and amoadd.rl). If an AMO is associated
with an acquire flag, a memory fence is inserted after the AMO completes
as a micro-op. If an AMO is associated with a release flag, another
memory fence is inserted before the AMO executes. If both flags are
specified, the AMO is broken down into a sequence of 3 micro-ops:
mem fence -> atomic RMW -> mem fence. This change makes this AMO
implementation comply to the release consistency model.

(2) Load-Reserved (LR) and Store-Conditional (SC)

Addresses locked by LR instructions are tracked in a stack data
structure. LR instruction pushes its target address to the stack, and SC
instruction pops the top address from the stack. As specified by RISC-V
ISA, a SC fails if its target address does not match with the most recent
LR.

Previously, there was a single stack for all hardware thread contexts.
A shared stack between thread contexts can lead to a infinite sequence
of failed SCs if LRs from other threads keep pushing new addresses to
this stack.

This patch gives each context its private stack to address the problem.

This patch also adds extra memory fence micro-ops to lr/sc to guarantee
a correct execution order of memory instructions with respect to release
consistency model.

Change-Id: I1e95900367c89dd866ba872a5203f63359ac51ae
Reviewed-on: https://gem5-review.googlesource.com/c/8189
Reviewed-by: Alec Roelke <ar4jc@virginia.edu>
Maintainer: Alec Roelke <ar4jc@virginia.edu>

src/arch/riscv/insts/amo.cc
src/arch/riscv/insts/amo.hh
src/arch/riscv/isa/decoder.isa
src/arch/riscv/isa/formats/amo.isa
src/arch/riscv/locked_mem.cc
src/arch/riscv/locked_mem.hh

index 7f5740f14b3aaaee8caadf7668d4e4465e1d5335..d12064720b49b81919d02eb8df92dbd8c054fd84 100644 (file)
@@ -43,6 +43,22 @@ using namespace std;
 namespace RiscvISA
 {
 
+// memfence micro instruction
+string MemFenceMicro::generateDisassembly(Addr pc,
+    const SymbolTable *symtab) const
+{
+    stringstream ss;
+    ss << csprintf("0x%08x", machInst) << ' ' << mnemonic;
+    return ss.str();
+}
+
+Fault MemFenceMicro::execute(ExecContext *xc,
+    Trace::InstRecord *traceData) const
+{
+    return NoFault;
+}
+
+// load-reserved
 string LoadReserved::generateDisassembly(Addr pc,
     const SymbolTable *symtab) const
 {
@@ -52,6 +68,16 @@ string LoadReserved::generateDisassembly(Addr pc,
     return ss.str();
 }
 
+string LoadReservedMicro::generateDisassembly(Addr pc,
+    const SymbolTable *symtab) const
+{
+    stringstream ss;
+    ss << mnemonic << ' ' << registerName(_destRegIdx[0]) << ", ("
+            << registerName(_srcRegIdx[0]) << ')';
+    return ss.str();
+}
+
+// store-conditional
 string StoreCond::generateDisassembly(Addr pc,
     const SymbolTable *symtab) const
 {
@@ -62,6 +88,17 @@ string StoreCond::generateDisassembly(Addr pc,
     return ss.str();
 }
 
+string StoreCondMicro::generateDisassembly(Addr pc,
+    const SymbolTable *symtab) const
+{
+    stringstream ss;
+    ss << mnemonic << ' ' << registerName(_destRegIdx[0]) << ", "
+            << registerName(_srcRegIdx[1]) << ", ("
+            << registerName(_srcRegIdx[0]) << ')';
+    return ss.str();
+}
+
+// AMOs
 string AtomicMemOp::generateDisassembly(Addr pc,
     const SymbolTable *symtab) const
 {
@@ -76,8 +113,10 @@ string AtomicMemOpMicro::generateDisassembly(Addr pc,
     const SymbolTable *symtab) const
 {
     stringstream ss;
-    ss << csprintf("0x%08x", machInst) << ' ' << mnemonic;
+    ss << mnemonic << ' ' << registerName(_destRegIdx[0]) << ", "
+            << registerName(_srcRegIdx[1]) << ", ("
+            << registerName(_srcRegIdx[0]) << ')';
     return ss.str();
 }
 
-}
\ No newline at end of file
+}
index 7c07bc24307f300858a97d7b7fb6968bbd3653cf..748fe14a3350013865b0750836bb352c981627d7 100644 (file)
 namespace RiscvISA
 {
 
-class LoadReserved : public MemInst
+// memfence micro instruction
+class MemFenceMicro : public RiscvMicroInst
 {
+  public:
+    MemFenceMicro(ExtMachInst _machInst, OpClass __opClass)
+        : RiscvMicroInst("fence", _machInst, __opClass)
+    { }
   protected:
-    using MemInst::MemInst;
+    using RiscvMicroInst::RiscvMicroInst;
 
+    Fault execute(ExecContext *, Trace::InstRecord *) const override;
     std::string generateDisassembly(
         Addr pc, const SymbolTable *symtab) const override;
 };
 
-class StoreCond : public MemInst
+// load-reserved
+class LoadReserved : public RiscvMacroInst
 {
   protected:
-    using MemInst::MemInst;
+    using RiscvMacroInst::RiscvMacroInst;
 
     std::string generateDisassembly(
         Addr pc, const SymbolTable *symtab) const override;
 };
 
+class LoadReservedMicro : public RiscvMicroInst
+{
+  protected:
+    Request::Flags memAccessFlags;
+    using RiscvMicroInst::RiscvMicroInst;
+
+    std::string generateDisassembly(
+        Addr pc, const SymbolTable *symtab) const override;
+};
+
+// store-cond
+class StoreCond : public RiscvMacroInst
+{
+  protected:
+    using RiscvMacroInst::RiscvMacroInst;
+
+    std::string generateDisassembly(
+        Addr pc, const SymbolTable *symtab) const override;
+};
+
+class StoreCondMicro : public RiscvMicroInst
+{
+  protected:
+    Request::Flags memAccessFlags;
+    using RiscvMicroInst::RiscvMicroInst;
+
+    std::string generateDisassembly(
+        Addr pc, const SymbolTable *symtab) const override;
+};
+
+// AMOs
 class AtomicMemOp : public RiscvMacroInst
 {
   protected:
@@ -78,6 +116,23 @@ class AtomicMemOpMicro : public RiscvMicroInst
         Addr pc, const SymbolTable *symtab) const override;
 };
 
+/**
+ * A generic atomic op class
+ */
+
+template<typename T>
+class AtomicGenericOp : public TypedAtomicOpFunctor<T>
+{
+  public:
+    AtomicGenericOp(T _a, std::function<void(T*,T)> _op)
+      : a(_a), op(_op) { }
+    AtomicOpFunctor* clone() { return new AtomicGenericOp<T>(*this); }
+    void execute(T *b) { op(b, a); }
+  private:
+    T a;
+    std::function<void(T*,T)> op;
+};
+
 }
 
-#endif // __ARCH_RISCV_INSTS_AMO_HH__
\ No newline at end of file
+#endif // __ARCH_RISCV_INSTS_AMO_HH__
index 3a04a02de7689b85f53302d1f33c6abb49c681e2..8de4829a6f6ef6cf63b1ce3e2248be88461374d6 100644 (file)
@@ -512,44 +512,69 @@ decode QUADRANT default Unknown::unknown() {
                 }}, {{
                     Rd = result;
                 }}, inst_flags=IsStoreConditional, mem_flags=LLSC);
-                format AtomicMemOp {
-                    0x0: amoadd_w({{Rt_sd = Mem_sw;}}, {{
-                        Mem_sw = Rs2_sw + Rt_sd;
-                        Rd_sd = Rt_sd;
-                    }}, {{EA = Rs1;}});
-                    0x1: amoswap_w({{Rt_sd = Mem_sw;}}, {{
-                        Mem_sw = Rs2_uw;
-                        Rd_sd = Rt_sd;
-                    }}, {{EA = Rs1;}});
-                    0x4: amoxor_w({{Rt_sd = Mem_sw;}}, {{
-                        Mem_sw = Rs2_uw^Rt_sd;
-                        Rd_sd = Rt_sd;
-                    }}, {{EA = Rs1;}});
-                    0x8: amoor_w({{Rt_sd = Mem_sw;}}, {{
-                        Mem_sw = Rs2_uw | Rt_sd;
-                        Rd_sd = Rt_sd;
-                    }}, {{EA = Rs1;}});
-                    0xc: amoand_w({{Rt_sd = Mem_sw;}}, {{
-                        Mem_sw = Rs2_uw&Rt_sd;
-                        Rd_sd = Rt_sd;
-                    }}, {{EA = Rs1;}});
-                    0x10: amomin_w({{Rt_sd = Mem_sw;}}, {{
-                        Mem_sw = min<int32_t>(Rs2_sw, Rt_sd);
-                        Rd_sd = Rt_sd;
-                    }}, {{EA = Rs1;}});
-                    0x14: amomax_w({{Rt_sd = Mem_sw;}}, {{
-                        Mem_sw = max<int32_t>(Rs2_sw, Rt_sd);
-                        Rd_sd = Rt_sd;
-                    }}, {{EA = Rs1;}});
-                    0x18: amominu_w({{Rt_sd = Mem_sw;}}, {{
-                        Mem_sw = min<uint32_t>(Rs2_uw, Rt_sd);
-                        Rd_sd = Rt_sd;
-                    }}, {{EA = Rs1;}});
-                    0x1c: amomaxu_w({{Rt_sd = Mem_sw;}}, {{
-                        Mem_sw = max<uint32_t>(Rs2_uw, Rt_sd);
-                        Rd_sd = Rt_sd;
-                    }}, {{EA = Rs1;}});
-                }
+                0x0: AtomicMemOp::amoadd_w({{
+                    Rd_sd = Mem_sw;
+                }}, {{
+                    TypedAtomicOpFunctor<int32_t> *amo_op =
+                          new AtomicGenericOp<int32_t>(Rs2_sw,
+                                  [](int32_t* b, int32_t a){ *b += a; });
+                }}, mem_flags=ATOMIC_RETURN_OP);
+                0x1: AtomicMemOp::amoswap_w({{
+                    Rd_sd = Mem_sw;
+                }}, {{
+                    TypedAtomicOpFunctor<uint32_t> *amo_op =
+                          new AtomicGenericOp<uint32_t>(Rs2_uw,
+                                  [](uint32_t* b, uint32_t a){ *b = a; });
+                }}, mem_flags=ATOMIC_RETURN_OP);
+                0x4: AtomicMemOp::amoxor_w({{
+                    Rd_sd = Mem_sw;
+                }}, {{
+                    TypedAtomicOpFunctor<uint32_t> *amo_op =
+                          new AtomicGenericOp<uint32_t>(Rs2_uw,
+                                  [](uint32_t* b, uint32_t a){ *b ^= a; });
+                }}, mem_flags=ATOMIC_RETURN_OP);
+                0x8: AtomicMemOp::amoor_w({{
+                    Rd_sd = Mem_sw;
+                }}, {{
+                    TypedAtomicOpFunctor<uint32_t> *amo_op =
+                          new AtomicGenericOp<uint32_t>(Rs2_uw,
+                                  [](uint32_t* b, uint32_t a){ *b |= a; });
+                }}, mem_flags=ATOMIC_RETURN_OP);
+                0xc: AtomicMemOp::amoand_w({{
+                    Rd_sd = Mem_sw;
+                }}, {{
+                    TypedAtomicOpFunctor<uint32_t> *amo_op =
+                          new AtomicGenericOp<uint32_t>(Rs2_uw,
+                                  [](uint32_t* b, uint32_t a){ *b &= a; });
+                }}, mem_flags=ATOMIC_RETURN_OP);
+                0x10: AtomicMemOp::amomin_w({{
+                    Rd_sd = Mem_sw;
+                }}, {{
+                    TypedAtomicOpFunctor<int32_t> *amo_op =
+                      new AtomicGenericOp<int32_t>(Rs2_sw,
+                        [](int32_t* b, int32_t a){ if (a < *b) *b = a; });
+                }}, mem_flags=ATOMIC_RETURN_OP);
+                0x14: AtomicMemOp::amomax_w({{
+                    Rd_sd = Mem_sw;
+                }}, {{
+                    TypedAtomicOpFunctor<int32_t> *amo_op =
+                      new AtomicGenericOp<int32_t>(Rs2_sw,
+                        [](int32_t* b, int32_t a){ if (a > *b) *b = a; });
+                }}, mem_flags=ATOMIC_RETURN_OP);
+                0x18: AtomicMemOp::amominu_w({{
+                    Rd_sd = Mem_sw;
+                }}, {{
+                    TypedAtomicOpFunctor<uint32_t> *amo_op =
+                      new AtomicGenericOp<uint32_t>(Rs2_uw,
+                        [](uint32_t* b, uint32_t a){ if (a < *b) *b = a; });
+                }}, mem_flags=ATOMIC_RETURN_OP);
+                0x1c: AtomicMemOp::amomaxu_w({{
+                    Rd_sd = Mem_sw;
+                }}, {{
+                    TypedAtomicOpFunctor<uint32_t> *amo_op =
+                      new AtomicGenericOp<uint32_t>(Rs2_uw,
+                        [](uint32_t* b, uint32_t a){ if (a > *b) *b = a; });
+                }}, mem_flags=ATOMIC_RETURN_OP);
             }
             0x3: decode AMOFUNCT {
                 0x2: LoadReserved::lr_d({{
@@ -560,44 +585,69 @@ decode QUADRANT default Unknown::unknown() {
                 }}, {{
                     Rd = result;
                 }}, mem_flags=LLSC, inst_flags=IsStoreConditional);
-                format AtomicMemOp {
-                    0x0: amoadd_d({{Rt_sd = Mem_sd;}}, {{
-                        Mem_sd = Rs2_sd + Rt_sd;
-                        Rd_sd = Rt_sd;
-                    }}, {{EA = Rs1;}});
-                    0x1: amoswap_d({{Rt = Mem;}}, {{
-                        Mem = Rs2;
-                        Rd = Rt;
-                    }}, {{EA = Rs1;}});
-                    0x4: amoxor_d({{Rt = Mem;}}, {{
-                        Mem = Rs2^Rt;
-                        Rd = Rt;
-                    }}, {{EA = Rs1;}});
-                    0x8: amoor_d({{Rt = Mem;}}, {{
-                        Mem = Rs2 | Rt;
-                        Rd = Rt;
-                    }}, {{EA = Rs1;}});
-                    0xc: amoand_d({{Rt = Mem;}}, {{
-                        Mem = Rs2&Rt;
-                        Rd = Rt;
-                    }}, {{EA = Rs1;}});
-                    0x10: amomin_d({{Rt_sd = Mem_sd;}}, {{
-                        Mem_sd = min(Rs2_sd, Rt_sd);
-                        Rd_sd = Rt_sd;
-                    }}, {{EA = Rs1;}});
-                    0x14: amomax_d({{Rt_sd = Mem_sd;}}, {{
-                        Mem_sd = max(Rs2_sd, Rt_sd);
-                        Rd_sd = Rt_sd;
-                    }}, {{EA = Rs1;}});
-                    0x18: amominu_d({{Rt = Mem;}}, {{
-                        Mem = min(Rs2, Rt);
-                        Rd = Rt;
-                    }}, {{EA = Rs1;}});
-                    0x1c: amomaxu_d({{Rt = Mem;}}, {{
-                        Mem = max(Rs2, Rt);
-                        Rd = Rt;
-                    }}, {{EA = Rs1;}});
-                }
+                0x0: AtomicMemOp::amoadd_d({{
+                    Rd_sd = Mem_sd;
+                }}, {{
+                    TypedAtomicOpFunctor<int64_t> *amo_op =
+                          new AtomicGenericOp<int64_t>(Rs2_sd,
+                                  [](int64_t* b, int64_t a){ *b += a; });
+                }}, mem_flags=ATOMIC_RETURN_OP);
+                0x1: AtomicMemOp::amoswap_d({{
+                    Rd_sd = Mem_sd;
+                }}, {{
+                    TypedAtomicOpFunctor<uint64_t> *amo_op =
+                          new AtomicGenericOp<uint64_t>(Rs2_ud,
+                                  [](uint64_t* b, uint64_t a){ *b = a; });
+                }}, mem_flags=ATOMIC_RETURN_OP);
+                0x4: AtomicMemOp::amoxor_d({{
+                    Rd_sd = Mem_sd;
+                }}, {{
+                    TypedAtomicOpFunctor<uint64_t> *amo_op =
+                          new AtomicGenericOp<uint64_t>(Rs2_ud,
+                                 [](uint64_t* b, uint64_t a){ *b ^= a; });
+                }}, mem_flags=ATOMIC_RETURN_OP);
+                0x8: AtomicMemOp::amoor_d({{
+                    Rd_sd = Mem_sd;
+                }}, {{
+                    TypedAtomicOpFunctor<uint64_t> *amo_op =
+                          new AtomicGenericOp<uint64_t>(Rs2_ud,
+                                 [](uint64_t* b, uint64_t a){ *b |= a; });
+                }}, mem_flags=ATOMIC_RETURN_OP);
+                0xc: AtomicMemOp::amoand_d({{
+                    Rd_sd = Mem_sd;
+                }}, {{
+                    TypedAtomicOpFunctor<uint64_t> *amo_op =
+                          new AtomicGenericOp<uint64_t>(Rs2_ud,
+                                 [](uint64_t* b, uint64_t a){ *b &= a; });
+                }}, mem_flags=ATOMIC_RETURN_OP);
+                0x10: AtomicMemOp::amomin_d({{
+                    Rd_sd = Mem_sd;
+                }}, {{
+                    TypedAtomicOpFunctor<int64_t> *amo_op =
+                      new AtomicGenericOp<int64_t>(Rs2_sd,
+                        [](int64_t* b, int64_t a){ if (a < *b) *b = a; });
+                }}, mem_flags=ATOMIC_RETURN_OP);
+                0x14: AtomicMemOp::amomax_d({{
+                    Rd_sd = Mem_sd;
+                }}, {{
+                    TypedAtomicOpFunctor<int64_t> *amo_op =
+                      new AtomicGenericOp<int64_t>(Rs2_sd,
+                        [](int64_t* b, int64_t a){ if (a > *b) *b = a; });
+                }}, mem_flags=ATOMIC_RETURN_OP);
+                0x18: AtomicMemOp::amominu_d({{
+                    Rd_sd = Mem_sd;
+                }}, {{
+                    TypedAtomicOpFunctor<uint64_t> *amo_op =
+                      new AtomicGenericOp<uint64_t>(Rs2_ud,
+                        [](uint64_t* b, uint64_t a){ if (a < *b) *b = a; });
+                }}, mem_flags=ATOMIC_RETURN_OP);
+                0x1c: AtomicMemOp::amomaxu_d({{
+                    Rd_sd = Mem_sd;
+                }}, {{
+                    TypedAtomicOpFunctor<uint64_t> *amo_op =
+                      new AtomicGenericOp<uint64_t>(Rs2_ud,
+                        [](uint64_t* b, uint64_t a){ if (a > *b) *b = a; });
+                }}, mem_flags=ATOMIC_RETURN_OP);
             }
         }
         0x0c: decode FUNCT3 {
index 1dca5719101359aaa4bfa41fb6ccf199d2a5ea6f..cc7346aa821f229fabd364907ab28f0f1a838b7e 100644 (file)
 //
 // Authors: Alec Roelke
 
-////////////////////////////////////////////////////////////////////
-//
-// Atomic memory operation instructions
-//
+// Declaration templates
 def template AtomicMemOpDeclare {{
     /**
      * Static instruction class for an AtomicMemOp operation
@@ -45,11 +42,14 @@ def template AtomicMemOpDeclare {{
 
     protected:
 
-        class %(class_name)sLoad : public %(base_class)sMicro
+        /*
+         * The main RMW part of an AMO
+         */
+        class %(class_name)sRMW : public %(base_class)sMicro
         {
           public:
             // Constructor
-            %(class_name)sLoad(ExtMachInst machInst, %(class_name)s *_p);
+            %(class_name)sRMW(ExtMachInst machInst, %(class_name)s *_p);
 
             Fault execute(ExecContext *, Trace::InstRecord *) const override;
             Fault initiateAcc(ExecContext *,
@@ -57,12 +57,26 @@ def template AtomicMemOpDeclare {{
             Fault completeAcc(PacketPtr, ExecContext *,
                               Trace::InstRecord *) const override;
         };
+    };
+}};
+
+def template LRSCDeclare {{
+    /**
+     * Static instruction class for an AtomicMemOp operation
+     */
+    class %(class_name)s : public %(base_class)s
+    {
+      public:
+        // Constructor
+        %(class_name)s(ExtMachInst machInst);
+
+    protected:
 
-        class %(class_name)sStore : public %(base_class)sMicro
+        class %(class_name)sMicro : public %(base_class)sMicro
         {
           public:
             // Constructor
-            %(class_name)sStore(ExtMachInst machInst, %(class_name)s *_p);
+            %(class_name)sMicro(ExtMachInst machInst, %(class_name)s *_p);
 
             Fault execute(ExecContext *, Trace::InstRecord *) const override;
             Fault initiateAcc(ExecContext *,
@@ -73,15 +87,63 @@ def template AtomicMemOpDeclare {{
     };
 }};
 
-def template LRSCConstructor {{
+// Constructor templates
+def template LRSCMacroConstructor {{
     %(class_name)s::%(class_name)s(ExtMachInst machInst):
         %(base_class)s("%(mnemonic)s", machInst, %(op_class)s)
     {
         %(constructor)s;
-        if (AQ)
-            memAccessFlags = memAccessFlags | Request::ACQUIRE;
-        if (RL)
-            memAccessFlags = memAccessFlags | Request::RELEASE;
+
+        StaticInstPtr rel_fence;
+        StaticInstPtr lrsc;
+        StaticInstPtr acq_fence;
+
+        // set up release fence
+        if (RL) {
+            rel_fence = new MemFenceMicro(machInst, No_OpClass);
+            rel_fence->setFlag(IsFirstMicroop);
+            rel_fence->setFlag(IsMemBarrier);
+            rel_fence->setFlag(IsDelayedCommit);
+        }
+
+        // set up atomic rmw op
+        lrsc = new %(class_name)sMicro(machInst, this);
+
+        if (!RL) {
+            lrsc->setFlag(IsFirstMicroop);
+        }
+
+        if (!AQ) {
+            lrsc->setFlag(IsLastMicroop);
+        } else {
+            lrsc->setFlag(IsDelayedCommit);
+        }
+
+        // set up acquire fence
+        if (AQ) {
+            acq_fence = new MemFenceMicro(machInst, No_OpClass);
+            acq_fence->setFlag(IsLastMicroop);
+            acq_fence->setFlag(IsMemBarrier);
+        }
+
+        if (RL && AQ) {
+            microops = {rel_fence, lrsc, acq_fence};
+        } else if (RL) {
+            microops = {rel_fence, lrsc};
+        } else if (AQ) {
+            microops = {lrsc, acq_fence};
+        } else {
+            microops = {lrsc};
+        }
+    }
+}};
+
+def template LRSCMicroConstructor {{
+    %(class_name)s::%(class_name)sMicro::%(class_name)sMicro(
+        ExtMachInst machInst, %(class_name)s *_p)
+            : %(base_class)sMicro("%(mnemonic)s", machInst, %(op_class)s)
+    {
+        %(constructor)s;
     }
 }};
 
@@ -90,39 +152,95 @@ def template AtomicMemOpMacroConstructor {{
             : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s)
     {
         %(constructor)s;
-        microops = {new %(class_name)sLoad(machInst, this),
-            new %(class_name)sStore(machInst, this)};
+
+        StaticInstPtr rel_fence;
+        StaticInstPtr rmw_op;
+        StaticInstPtr acq_fence;
+
+        // set up release fence
+        if (RL) {
+            rel_fence = new MemFenceMicro(machInst, No_OpClass);
+            rel_fence->setFlag(IsFirstMicroop);
+            rel_fence->setFlag(IsMemBarrier);
+            rel_fence->setFlag(IsDelayedCommit);
+        }
+
+        // set up atomic rmw op
+        rmw_op = new %(class_name)sRMW(machInst, this);
+
+        if (!RL) {
+            rmw_op->setFlag(IsFirstMicroop);
+        }
+
+        if (!AQ) {
+            rmw_op->setFlag(IsLastMicroop);
+        } else {
+            rmw_op->setFlag(IsDelayedCommit);
+        }
+
+        // set up acquire fence
+        if (AQ) {
+            acq_fence = new MemFenceMicro(machInst, No_OpClass);
+            acq_fence->setFlag(IsLastMicroop);
+            acq_fence->setFlag(IsMemBarrier);
+        }
+
+        if (RL && AQ) {
+            microops = {rel_fence, rmw_op, acq_fence};
+        } else if (RL) {
+            microops = {rel_fence, rmw_op};
+        } else if (AQ) {
+            microops = {rmw_op, acq_fence};
+        } else {
+            microops = {rmw_op};
+        }
     }
 }};
 
-def template AtomicMemOpLoadConstructor {{
-    %(class_name)s::%(class_name)sLoad::%(class_name)sLoad(
+def template AtomicMemOpRMWConstructor {{
+    %(class_name)s::%(class_name)sRMW::%(class_name)sRMW(
         ExtMachInst machInst, %(class_name)s *_p)
             : %(base_class)s("%(mnemonic)s[l]", machInst, %(op_class)s)
     {
         %(constructor)s;
-        flags[IsFirstMicroop] = true;
-        flags[IsDelayedCommit] = true;
-        if (AQ)
-            memAccessFlags = Request::ACQUIRE;
+
+        // overwrite default flags
+        flags[IsMemRef] = true;
+        flags[IsLoad] = false;
+        flags[IsStore] = false;
+        flags[IsAtomic] = true;
     }
 }};
 
-def template AtomicMemOpStoreConstructor {{
-    %(class_name)s::%(class_name)sStore::%(class_name)sStore(
-        ExtMachInst machInst, %(class_name)s *_p)
-            : %(base_class)s("%(mnemonic)s[s]", machInst, %(op_class)s)
+// execute() templates
+
+def template LoadReservedExecute {{
+    Fault
+    %(class_name)s::%(class_name)sMicro::execute(
+        ExecContext *xc, Trace::InstRecord *traceData) const
     {
-        %(constructor)s;
-        flags[IsLastMicroop] = true;
-        flags[IsNonSpeculative] = true;
-        if (RL)
-            memAccessFlags = Request::RELEASE;
+        Addr EA;
+        Fault fault = NoFault;
+
+        %(op_decl)s;
+        %(op_rd)s;
+        %(ea_code)s;
+
+        if (fault == NoFault) {
+            fault = readMemAtomic(xc, traceData, EA, Mem, memAccessFlags);
+            %(memacc_code)s;
+        }
+
+        if (fault == NoFault) {
+            %(op_wb)s;
+        }
+
+        return fault;
     }
 }};
 
 def template StoreCondExecute {{
-    Fault %(class_name)s::execute(ExecContext *xc,
+    Fault %(class_name)s::%(class_name)sMicro::execute(ExecContext *xc,
         Trace::InstRecord *traceData) const
     {
         Addr EA;
@@ -157,8 +275,8 @@ def template StoreCondExecute {{
     }
 }};
 
-def template AtomicMemOpLoadExecute {{
-    Fault %(class_name)s::%(class_name)sLoad::execute(ExecContext *xc,
+def template AtomicMemOpRMWExecute {{
+    Fault %(class_name)s::%(class_name)sRMW::execute(ExecContext *xc,
         Trace::InstRecord *traceData) const
     {
         Addr EA;
@@ -167,13 +285,18 @@ def template AtomicMemOpLoadExecute {{
         %(op_decl)s;
         %(op_rd)s;
         %(ea_code)s;
+        %(amoop_code)s;
+
+        assert(amo_op);
 
         if (fault == NoFault) {
-            fault = readMemAtomic(xc, traceData, EA, Mem, memAccessFlags);
+            fault = amoMemAtomic(xc, traceData, Mem, EA, memAccessFlags,
+                                 amo_op);
+            %(memacc_code)s;
         }
 
         if (fault == NoFault) {
-            %(code)s;
+            %(postacc_code)s;
         }
 
         if (fault == NoFault) {
@@ -184,8 +307,31 @@ def template AtomicMemOpLoadExecute {{
     }
 }};
 
-def template AtomicMemOpStoreExecute {{
-    Fault %(class_name)s::%(class_name)sStore::execute(ExecContext *xc,
+// initiateAcc() templates
+
+def template LoadReservedInitiateAcc {{
+    Fault
+    %(class_name)s::%(class_name)sMicro::initiateAcc(ExecContext *xc,
+        Trace::InstRecord *traceData) const
+    {
+        Addr EA;
+        Fault fault = NoFault;
+
+        %(op_src_decl)s;
+        %(op_rd)s;
+        %(ea_code)s;
+
+        if (fault == NoFault) {
+            fault = initiateMemRead(xc, traceData, EA, Mem, memAccessFlags);
+        }
+
+        return fault;
+    }
+}};
+
+def template StoreCondInitiateAcc {{
+    Fault
+    %(class_name)s::%(class_name)sMicro::initiateAcc(ExecContext *xc,
         Trace::InstRecord *traceData) const
     {
         Addr EA;
@@ -196,12 +342,12 @@ def template AtomicMemOpStoreExecute {{
         %(ea_code)s;
 
         if (fault == NoFault) {
-            %(code)s;
+            %(memacc_code)s;
         }
 
         if (fault == NoFault) {
-            fault = writeMemAtomic(xc, traceData, Mem, EA, memAccessFlags,
-                nullptr);
+            fault = writeMemTiming(xc, traceData, Mem, EA,
+                memAccessFlags, nullptr);
         }
 
         if (fault == NoFault) {
@@ -212,8 +358,9 @@ def template AtomicMemOpStoreExecute {{
     }
 }};
 
-def template AtomicMemOpLoadInitiateAcc {{
-    Fault %(class_name)s::%(class_name)sLoad::initiateAcc(ExecContext *xc,
+def template AtomicMemOpRMWInitiateAcc {{
+    Fault
+    %(class_name)s::%(class_name)sRMW::initiateAcc(ExecContext *xc,
         Trace::InstRecord *traceData) const
     {
         Addr EA;
@@ -222,33 +369,35 @@ def template AtomicMemOpLoadInitiateAcc {{
         %(op_src_decl)s;
         %(op_rd)s;
         %(ea_code)s;
+        %(amoop_code)s;
+
+        assert(amo_op);
 
         if (fault == NoFault) {
-            fault = initiateMemRead(xc, traceData, EA, Mem, memAccessFlags);
+            fault = initiateMemAMO(xc, traceData, EA, Mem, memAccessFlags,
+                                   amo_op);
         }
 
         return fault;
     }
 }};
 
-def template AtomicMemOpStoreInitiateAcc {{
-    Fault %(class_name)s::%(class_name)sStore::initiateAcc(
+// completeAcc() templates
+
+def template LoadReservedCompleteAcc {{
+    Fault
+    %(class_name)s::%(class_name)sMicro::completeAcc(PacketPtr pkt,
         ExecContext *xc, Trace::InstRecord *traceData) const
     {
-        Addr EA;
         Fault fault = NoFault;
 
         %(op_decl)s;
         %(op_rd)s;
-        %(ea_code)s;
 
-        if (fault == NoFault) {
-            %(code)s;
-        }
+        getMem(pkt, Mem, traceData);
 
         if (fault == NoFault) {
-            fault = writeMemTiming(xc, traceData, Mem, EA, memAccessFlags,
-                nullptr);
+            %(memacc_code)s;
         }
 
         if (fault == NoFault) {
@@ -260,8 +409,8 @@ def template AtomicMemOpStoreInitiateAcc {{
 }};
 
 def template StoreCondCompleteAcc {{
-    Fault %(class_name)s::completeAcc(Packet *pkt, ExecContext *xc,
-        Trace::InstRecord *traceData) const
+    Fault %(class_name)s::%(class_name)sMicro::completeAcc(Packet *pkt,
+          ExecContext *xc, Trace::InstRecord *traceData) const
     {
         Fault fault = NoFault;
 
@@ -283,8 +432,8 @@ def template StoreCondCompleteAcc {{
     }
 }};
 
-def template AtomicMemOpLoadCompleteAcc {{
-    Fault %(class_name)s::%(class_name)sLoad::completeAcc(PacketPtr pkt,
+def template AtomicMemOpRMWCompleteAcc {{
+    Fault %(class_name)s::%(class_name)sRMW::completeAcc(Packet *pkt,
         ExecContext *xc, Trace::InstRecord *traceData) const
     {
         Fault fault = NoFault;
@@ -295,7 +444,7 @@ def template AtomicMemOpLoadCompleteAcc {{
         getMem(pkt, Mem, traceData);
 
         if (fault == NoFault) {
-            %(code)s;
+            %(memacc_code)s;
         }
 
         if (fault == NoFault) {
@@ -306,16 +455,20 @@ def template AtomicMemOpLoadCompleteAcc {{
     }
 }};
 
-def template AtomicMemOpStoreCompleteAcc {{
-    Fault %(class_name)s::%(class_name)sStore::completeAcc(PacketPtr pkt,
-        ExecContext *xc, Trace::InstRecord *traceData) const
-    {
-        return NoFault;
-    }
-}};
+// LR/SC/AMO decode formats
 
 def format LoadReserved(memacc_code, postacc_code={{ }}, ea_code={{EA = Rs1;}},
         mem_flags=[], inst_flags=[]) {{
+    macro_ea_code = ''
+    macro_inst_flags = []
+    macro_iop = InstObjParams(name, Name, 'LoadReserved', macro_ea_code,
+                              macro_inst_flags)
+    header_output = LRSCDeclare.subst(macro_iop)
+    decoder_output = LRSCMacroConstructor.subst(macro_iop)
+    decode_block = BasicDecode.subst(macro_iop)
+
+    exec_output = ''
+
     mem_flags = makeList(mem_flags)
     inst_flags = makeList(inst_flags)
     iop = InstObjParams(name, Name, 'LoadReserved',
@@ -324,16 +477,25 @@ def format LoadReserved(memacc_code, postacc_code={{ }}, ea_code={{EA = Rs1;}},
     iop.constructor += '\n\tmemAccessFlags = memAccessFlags | ' + \
         '|'.join(['Request::%s' % flag for flag in mem_flags]) + ';'
 
-    header_output = LoadStoreDeclare.subst(iop)
-    decoder_output = LRSCConstructor.subst(iop)
-    decode_block = BasicDecode.subst(iop)
-    exec_output = LoadExecute.subst(iop) \
-        + LoadInitiateAcc.subst(iop) \
-        + LoadCompleteAcc.subst(iop)
+    decoder_output += LRSCMicroConstructor.subst(iop)
+    decode_block += BasicDecode.subst(iop)
+    exec_output += LoadReservedExecute.subst(iop) \
+        + LoadReservedInitiateAcc.subst(iop) \
+        + LoadReservedCompleteAcc.subst(iop)
 }};
 
 def format StoreCond(memacc_code, postacc_code={{ }}, ea_code={{EA = Rs1;}},
         mem_flags=[], inst_flags=[]) {{
+    macro_ea_code = ''
+    macro_inst_flags = []
+    macro_iop = InstObjParams(name, Name, 'StoreCond', macro_ea_code,
+                              macro_inst_flags)
+    header_output = LRSCDeclare.subst(macro_iop)
+    decoder_output = LRSCMacroConstructor.subst(macro_iop)
+    decode_block = BasicDecode.subst(macro_iop)
+
+    exec_output = ''
+
     mem_flags = makeList(mem_flags)
     inst_flags = makeList(inst_flags)
     iop = InstObjParams(name, Name, 'StoreCond',
@@ -342,37 +504,40 @@ def format StoreCond(memacc_code, postacc_code={{ }}, ea_code={{EA = Rs1;}},
     iop.constructor += '\n\tmemAccessFlags = memAccessFlags | ' + \
         '|'.join(['Request::%s' % flag for flag in mem_flags]) + ';'
 
-    header_output = LoadStoreDeclare.subst(iop)
-    decoder_output = LRSCConstructor.subst(iop)
-    decode_block = BasicDecode.subst(iop)
-    exec_output = StoreCondExecute.subst(iop) \
-        + StoreInitiateAcc.subst(iop) \
+    decoder_output += LRSCMicroConstructor.subst(iop)
+    decode_block += BasicDecode.subst(iop)
+    exec_output += StoreCondExecute.subst(iop) \
+        + StoreCondInitiateAcc.subst(iop) \
         + StoreCondCompleteAcc.subst(iop)
 }};
 
-def format AtomicMemOp(load_code, store_code, ea_code, load_flags=[],
-        store_flags=[], inst_flags=[]) {{
-    macro_iop = InstObjParams(name, Name, 'AtomicMemOp', ea_code, inst_flags)
+def format AtomicMemOp(memacc_code, amoop_code, postacc_code={{ }},
+        ea_code={{EA = Rs1;}}, mem_flags=[], inst_flags=[]) {{
+    macro_ea_code = ''
+    macro_inst_flags = []
+    macro_iop = InstObjParams(name, Name, 'AtomicMemOp', macro_ea_code,
+                              macro_inst_flags)
     header_output = AtomicMemOpDeclare.subst(macro_iop)
     decoder_output = AtomicMemOpMacroConstructor.subst(macro_iop)
     decode_block = BasicDecode.subst(macro_iop)
+
     exec_output = ''
 
-    load_inst_flags = makeList(inst_flags) + ["IsMemRef", "IsLoad"]
-    load_iop = InstObjParams(name, Name, 'AtomicMemOpMicro',
-        {'ea_code': ea_code, 'code': load_code, 'op_name': 'Load'},
-        load_inst_flags)
-    decoder_output += AtomicMemOpLoadConstructor.subst(load_iop)
-    exec_output += AtomicMemOpLoadExecute.subst(load_iop) \
-        + AtomicMemOpLoadInitiateAcc.subst(load_iop) \
-        + AtomicMemOpLoadCompleteAcc.subst(load_iop)
-
-    store_inst_flags = makeList(inst_flags) + ["IsMemRef", "IsStore"]
-    store_iop = InstObjParams(name, Name, 'AtomicMemOpMicro',
-        {'ea_code': ea_code, 'code': store_code, 'op_name': 'Store'},
-        store_inst_flags)
-    decoder_output += AtomicMemOpStoreConstructor.subst(store_iop)
-    exec_output += AtomicMemOpStoreExecute.subst(store_iop) \
-        + AtomicMemOpStoreInitiateAcc.subst(store_iop) \
-        + AtomicMemOpStoreCompleteAcc.subst(store_iop)
+    rmw_mem_flags = makeList(mem_flags)
+    rmw_inst_flags = makeList(inst_flags)
+    rmw_iop = InstObjParams(name, Name, 'AtomicMemOpMicro',
+                            {'ea_code': ea_code,
+                             'memacc_code': memacc_code,
+                             'postacc_code': postacc_code,
+                             'amoop_code': amoop_code},
+                            rmw_inst_flags)
+
+    rmw_iop.constructor += '\n\tmemAccessFlags = memAccessFlags | ' + \
+          '|'.join(['Request::%s' % flag for flag in rmw_mem_flags]) + ';'
+
+    decoder_output += AtomicMemOpRMWConstructor.subst(rmw_iop)
+    decode_block += BasicDecode.subst(rmw_iop)
+    exec_output += AtomicMemOpRMWExecute.subst(rmw_iop) \
+                 + AtomicMemOpRMWInitiateAcc.subst(rmw_iop) \
+                 + AtomicMemOpRMWCompleteAcc.subst(rmw_iop)
 }};
index 3c8dbe948dd55ec6aca967b0906c0364b40095cf..957cffba317e9b0e10a9eb5733afef554e6dc16f 100644 (file)
@@ -6,7 +6,5 @@
 
 namespace RiscvISA
 {
-
-std::stack<Addr> locked_addrs;
-
+    std::unordered_map<int, std::stack<Addr>> locked_addrs;
 }
index b1cde34c69f605e8bf47fc1864681db65841d7d0..08d27f15cd3291931c3f4ab075c77855862595d3 100644 (file)
@@ -49,6 +49,7 @@
 #define __ARCH_RISCV_LOCKED_MEM_HH__
 
 #include <stack>
+#include <unordered_map>
 
 #include "arch/registers.hh"
 #include "base/logging.hh"
@@ -67,24 +68,28 @@ const int WARN_FAILURE = 10000;
 
 // RISC-V allows multiple locks per hart, but each SC has to unlock the most
 // recent one, so we use a stack here.
-extern std::stack<Addr> locked_addrs;
+extern std::unordered_map<int, std::stack<Addr>> locked_addrs;
 
 template <class XC> inline void
 handleLockedSnoop(XC *xc, PacketPtr pkt, Addr cacheBlockMask)
 {
-    if (locked_addrs.empty())
+    std::stack<Addr>& locked_addr_stack = locked_addrs[xc->contextId()];
+
+    if (locked_addr_stack.empty())
         return;
     Addr snoop_addr = pkt->getAddr() & cacheBlockMask;
     DPRINTF(LLSC, "Locked snoop on address %x.\n", snoop_addr);
-    if ((locked_addrs.top() & cacheBlockMask) == snoop_addr)
-        locked_addrs.pop();
+    if ((locked_addr_stack.top() & cacheBlockMask) == snoop_addr)
+        locked_addr_stack.pop();
 }
 
 
 template <class XC> inline void
 handleLockedRead(XC *xc, const RequestPtr &req)
 {
-    locked_addrs.push(req->getPaddr() & ~0xF);
+    std::stack<Addr>& locked_addr_stack = locked_addrs[xc->contextId()];
+
+    locked_addr_stack.push(req->getPaddr() & ~0xF);
     DPRINTF(LLSC, "[cid:%d]: Reserved address %x.\n",
             req->contextId(), req->getPaddr() & ~0xF);
 }
@@ -96,21 +101,23 @@ handleLockedSnoopHit(XC *xc)
 template <class XC> inline bool
 handleLockedWrite(XC *xc, const RequestPtr &req, Addr cacheBlockMask)
 {
+    std::stack<Addr>& locked_addr_stack = locked_addrs[xc->contextId()];
+
     // Normally RISC-V uses zero to indicate success and nonzero to indicate
     // failure (right now only 1 is reserved), but in gem5 zero indicates
     // failure and one indicates success, so here we conform to that (it should
     // be switched in the instruction's implementation)
 
     DPRINTF(LLSC, "[cid:%d]: locked_addrs empty? %s.\n", req->contextId(),
-            locked_addrs.empty() ? "yes" : "no");
-    if (!locked_addrs.empty()) {
+            locked_addr_stack.empty() ? "yes" : "no");
+    if (!locked_addr_stack.empty()) {
         DPRINTF(LLSC, "[cid:%d]: addr = %x.\n", req->contextId(),
                 req->getPaddr() & ~0xF);
         DPRINTF(LLSC, "[cid:%d]: last locked addr = %x.\n", req->contextId(),
-                locked_addrs.top());
+                locked_addr_stack.top());
     }
-    if (locked_addrs.empty()
-            || locked_addrs.top() != ((req->getPaddr() & ~0xF))) {
+    if (locked_addr_stack.empty()
+            || locked_addr_stack.top() != ((req->getPaddr() & ~0xF))) {
         req->setExtraData(0);
         int stCondFailures = xc->readStCondFailures();
         xc->setStCondFailures(++stCondFailures);