X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmem%2Fprotocol%2FMOESI_hammer-cache.sm;h=9cbd277d4fb82f22a0d7f2c79c08b1612aaab943;hb=e33b3aa6692b172f6db5957774a9e0289e81fa5b;hp=94fd25f56817fb20a97611b27aa98d8e9387536f;hpb=847ba941ea345f01b2f5176432b6541902a41d2b;p=gem5.git

diff --git a/src/mem/protocol/MOESI_hammer-cache.sm b/src/mem/protocol/MOESI_hammer-cache.sm
index 94fd25f56..9cbd277d4 100644
--- a/src/mem/protocol/MOESI_hammer-cache.sm
+++ b/src/mem/protocol/MOESI_hammer-cache.sm
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
+ * Copyright (c) 1999-2013 Mark D. Hill and David A. Wood
  * Copyright (c) 2009 Advanced Micro Devices, Inc.
  * All rights reserved.
  *
@@ -26,33 +26,41 @@
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
- * AMD's contributions to the MOESI hammer protocol do not constitute an 
+ * AMD's contributions to the MOESI hammer protocol do not constitute an
  * endorsement of its similarity to any AMD products.
  *
  * Authors: Milo Martin
  *          Brad Beckmann
  */
 
-machine(L1Cache, "AMD Hammer-like protocol") 
-: Sequencer * sequencer,
-  CacheMemory * L1IcacheMemory,
-  CacheMemory * L1DcacheMemory,
-  CacheMemory * L2cacheMemory,
-  int cache_response_latency = 10,
-  int issue_latency = 2,
-  int l2_cache_hit_latency = 10,
-  bool no_mig_atomic = true
+machine(MachineType:L1Cache, "AMD Hammer-like protocol")
+    : Sequencer * sequencer;
+      CacheMemory * L1Icache;
+      CacheMemory * L1Dcache;
+      CacheMemory * L2cache;
+      Cycles cache_response_latency := 10;
+      Cycles issue_latency := 2;
+      Cycles l2_cache_hit_latency := 10;
+      bool no_mig_atomic := "True";
+      bool send_evictions;
+
+      // NETWORK BUFFERS
+      MessageBuffer * requestFromCache, network="To", virtual_network="2",
+            vnet_type="request";
+      MessageBuffer * responseFromCache, network="To", virtual_network="4",
+            vnet_type="response";
+      MessageBuffer * unblockFromCache, network="To", virtual_network="5",
+            vnet_type="unblock";
+
+      MessageBuffer * forwardToCache, network="From", virtual_network="3",
+            vnet_type="forward";
+      MessageBuffer * responseToCache, network="From", virtual_network="4",
+            vnet_type="response";
+
+      MessageBuffer * mandatoryQueue;
+
+      MessageBuffer * triggerQueue;
 {
-
-  // NETWORK BUFFERS
-  MessageBuffer requestFromCache, network="To", virtual_network="2", ordered="false";
-  MessageBuffer responseFromCache, network="To", virtual_network="4", ordered="false";
-  MessageBuffer unblockFromCache, network="To", virtual_network="5", ordered="false";
-
-  MessageBuffer forwardToCache, network="From", virtual_network="3", ordered="false";
-  MessageBuffer responseToCache, network="From", virtual_network="4", ordered="false";
-
-
   // STATES
   state_declaration(State, desc="Cache states", default="L1Cache_State_I") {
     // Base states
@@ -62,6 +70,13 @@ machine(L1Cache, "AMD Hammer-like protocol")
     M, AccessPermission:Read_Only, desc="Modified (dirty)";
     MM, AccessPermission:Read_Write, desc="Modified (dirty and locally modified)";
 
+    // Base states, locked and ready to service the mandatory queue
+    IR, AccessPermission:Invalid, desc="Idle";
+    SR, AccessPermission:Read_Only, desc="Shared";
+    OR, AccessPermission:Read_Only, desc="Owned";
+    MR, AccessPermission:Read_Only, desc="Modified (dirty)";
+    MMR, AccessPermission:Read_Write, desc="Modified (dirty and locally modified)";
+
     // Transient States
     IM, AccessPermission:Busy, "IM", desc="Issued GetX";
     SM, AccessPermission:Read_Only, "SM", desc="Issued GetX, we still have a valid copy of the line";
@@ -74,11 +89,19 @@ machine(L1Cache, "AMD Hammer-like protocol")
     OI, AccessPermission:Busy, "OI", desc="Issued PutO, waiting for ack";
     MI, AccessPermission:Busy, "MI", desc="Issued PutX, waiting for ack";
     II, AccessPermission:Busy, "II", desc="Issued PutX/O, saw Other_GETS or Other_GETX, waiting for ack";
-    IT, AccessPermission:Busy, "IT", desc="Invalid block transferring to L1";
     ST, AccessPermission:Busy, "ST", desc="S block transferring to L1";
     OT, AccessPermission:Busy, "OT", desc="O block transferring to L1";
     MT, AccessPermission:Busy, "MT", desc="M block transferring to L1";
-    MMT, AccessPermission:Busy, "MMT", desc="MM block transferring to L1";
+    MMT, AccessPermission:Busy, "MMT", desc="MM block transferring to L0";
+
+    //Transition States Related to Flushing
+    MI_F, AccessPermission:Busy, "MI_F", desc="Issued PutX due to a Flush, waiting for ack";
+    MM_F, AccessPermission:Busy, "MM_F", desc="Issued GETF due to a Flush, waiting for ack";
+    IM_F, AccessPermission:Busy, "IM_F", desc="Issued GetX due to a Flush";
+    ISM_F, AccessPermission:Read_Only, "ISM_F", desc="Issued GetX, received data, waiting for all acks";
+    SM_F, AccessPermission:Read_Only, "SM_F", desc="Issued GetX, we still have an old copy of the line";
+    OM_F, AccessPermission:Read_Only, "OM_F", desc="Issued GetX, received data";
+    MM_WF, AccessPermission:Busy, "MM_WF", desc="Issued GetX, received exclusive data";
   }
 
   // EVENTS
@@ -113,14 +136,13 @@ machine(L1Cache, "AMD Hammer-like protocol")
     // Triggers
     All_acks,                  desc="Received all required data and message acks";
     All_acks_no_sharers,        desc="Received all acks and no other processor has a shared copy";
-  }
 
-  // TYPES
+    // For Flush
+    Flush_line,                  desc="flush the cache line from all caches";
+    Block_Ack,                   desc="the directory is blocked and ready for the flush";
+  }
 
   // STRUCTURE DEFINITIONS
-
-  MessageBuffer mandatoryQueue, ordered="false";
-
   // CacheEntry
   structure(Entry, desc="...", interface="AbstractCacheEntry") {
     State CacheState,        desc="cache state";
@@ -140,57 +162,95 @@ machine(L1Cache, "AMD Hammer-like protocol")
     bool AppliedSilentAcks, default="false", desc="for full-bit dir, does the pending msg count reflect the silent acks";
     MachineID LastResponder, desc="last machine to send a response for this request";
     MachineID CurOwner,      desc="current owner of the block, used for UnblockS responses";
-    Time InitialRequestTime, default="0", desc="time the initial requests was sent from the L1Cache";
-    Time ForwardRequestTime, default="0", desc="time the dir forwarded the request";
-    Time FirstResponseTime, default="0", desc="the time the first response was received";
+
+    Cycles InitialRequestTime, default="Cycles(0)",
+            desc="time the initial requests was sent from the L1Cache";
+    Cycles ForwardRequestTime, default="Cycles(0)",
+            desc="time the dir forwarded the request";
+    Cycles FirstResponseTime, default="Cycles(0)",
+            desc="the time the first response was received";
   }
 
-  external_type(TBETable) {
-    TBE lookup(Address);
-    void allocate(Address);
-    void deallocate(Address);
-    bool isPresent(Address);
+  structure(TBETable, external="yes") {
+    TBE lookup(Addr);
+    void allocate(Addr);
+    void deallocate(Addr);
+    bool isPresent(Addr);
   }
 
-  TBETable TBEs, template_hack="<L1Cache_TBE>";
+  TBETable TBEs, template="<L1Cache_TBE>", constructor="m_number_of_TBEs";
 
+  Tick clockEdge();
   void set_cache_entry(AbstractCacheEntry b);
   void unset_cache_entry();
   void set_tbe(TBE b);
   void unset_tbe();
   void wakeUpAllBuffers();
+  void wakeUpBuffers(Addr a);
+  Cycles curCycle();
+  MachineID mapAddressToMachine(Addr addr, MachineType mtype);
 
-  Entry getCacheEntry(Address address), return_by_pointer="yes" {
-    Entry L2cache_entry := static_cast(Entry, "pointer", L2cacheMemory.lookup(address));
+  Entry getCacheEntry(Addr address), return_by_pointer="yes" {
+    Entry L2cache_entry := static_cast(Entry, "pointer", L2cache.lookup(address));
     if(is_valid(L2cache_entry)) {
       return L2cache_entry;
     }
 
-    Entry L1Dcache_entry := static_cast(Entry, "pointer", L1DcacheMemory.lookup(address));
+    Entry L1Dcache_entry := static_cast(Entry, "pointer", L1Dcache.lookup(address));
     if(is_valid(L1Dcache_entry)) {
       return L1Dcache_entry;
     }
 
-    Entry L1Icache_entry := static_cast(Entry, "pointer", L1IcacheMemory.lookup(address));
+    Entry L1Icache_entry := static_cast(Entry, "pointer", L1Icache.lookup(address));
     return L1Icache_entry;
   }
 
-  Entry getL2CacheEntry(Address address), return_by_pointer="yes" {
-    Entry L2cache_entry := static_cast(Entry, "pointer", L2cacheMemory.lookup(address));
+  void functionalRead(Addr addr, Packet *pkt) {
+    Entry cache_entry := getCacheEntry(addr);
+    if(is_valid(cache_entry)) {
+      testAndRead(addr, cache_entry.DataBlk, pkt);
+    } else {
+      TBE tbe := TBEs[addr];
+      if(is_valid(tbe)) {
+        testAndRead(addr, tbe.DataBlk, pkt);
+      } else {
+        error("Missing data block");
+      }
+    }
+  }
+
+  int functionalWrite(Addr addr, Packet *pkt) {
+    int num_functional_writes := 0;
+
+    Entry cache_entry := getCacheEntry(addr);
+    if(is_valid(cache_entry)) {
+      num_functional_writes := num_functional_writes +
+        testAndWrite(addr, cache_entry.DataBlk, pkt);
+      return num_functional_writes;
+    }
+
+    TBE tbe := TBEs[addr];
+    num_functional_writes := num_functional_writes +
+      testAndWrite(addr, tbe.DataBlk, pkt);
+    return num_functional_writes;
+  }
+
+  Entry getL2CacheEntry(Addr address), return_by_pointer="yes" {
+    Entry L2cache_entry := static_cast(Entry, "pointer", L2cache.lookup(address));
     return L2cache_entry;
   }
 
-  Entry getL1DCacheEntry(Address address), return_by_pointer="yes" {
-    Entry L1Dcache_entry := static_cast(Entry, "pointer", L1DcacheMemory.lookup(address));
+  Entry getL1DCacheEntry(Addr address), return_by_pointer="yes" {
+    Entry L1Dcache_entry := static_cast(Entry, "pointer", L1Dcache.lookup(address));
     return L1Dcache_entry;
   }
 
-  Entry getL1ICacheEntry(Address address), return_by_pointer="yes" {
-    Entry L1Icache_entry := static_cast(Entry, "pointer", L1IcacheMemory.lookup(address));
+  Entry getL1ICacheEntry(Addr address), return_by_pointer="yes" {
+    Entry L1Icache_entry := static_cast(Entry, "pointer", L1Icache.lookup(address));
     return L1Icache_entry;
   }
 
-  State getState(TBE tbe, Entry cache_entry, Address addr) {
+  State getState(TBE tbe, Entry cache_entry, Addr addr) {
     if(is_valid(tbe)) {
       return tbe.TBEState;
     } else if (is_valid(cache_entry)) {
@@ -199,10 +259,10 @@ machine(L1Cache, "AMD Hammer-like protocol")
     return State:I;
   }
 
-  void setState(TBE tbe, Entry cache_entry, Address addr, State state) {
-    assert((L1DcacheMemory.isTagPresent(addr) && L1IcacheMemory.isTagPresent(addr)) == false);
-    assert((L1IcacheMemory.isTagPresent(addr) && L2cacheMemory.isTagPresent(addr)) == false);
-    assert((L1DcacheMemory.isTagPresent(addr) && L2cacheMemory.isTagPresent(addr)) == false);
+  void setState(TBE tbe, Entry cache_entry, Addr addr, State state) {
+    assert((L1Dcache.isTagPresent(addr) && L1Icache.isTagPresent(addr)) == false);
+    assert((L1Icache.isTagPresent(addr) && L2cache.isTagPresent(addr)) == false);
+    assert((L1Dcache.isTagPresent(addr) && L2cache.isTagPresent(addr)) == false);
 
     if (is_valid(tbe)) {
       tbe.TBEState := state;
@@ -213,36 +273,46 @@ machine(L1Cache, "AMD Hammer-like protocol")
     }
   }
 
-  Event mandatory_request_type_to_event(CacheRequestType type) {
-    if (type == CacheRequestType:LD) {
-      return Event:Load;
-    } else if (type == CacheRequestType:IFETCH) {
-      return Event:Ifetch;
-    } else if ((type == CacheRequestType:ST) || (type == CacheRequestType:ATOMIC)) {
-      return Event:Store;
-    } else {
-      error("Invalid CacheRequestType");
+  AccessPermission getAccessPermission(Addr addr) {
+    TBE tbe := TBEs[addr];
+    if(is_valid(tbe)) {
+      return L1Cache_State_to_permission(tbe.TBEState);
+    }
+
+    Entry cache_entry := getCacheEntry(addr);
+    if(is_valid(cache_entry)) {
+      return L1Cache_State_to_permission(cache_entry.CacheState);
+    }
+
+    return AccessPermission:NotPresent;
+  }
+
+  void setAccessPermission(Entry cache_entry, Addr addr, State state) {
+    if (is_valid(cache_entry)) {
+      cache_entry.changePermission(L1Cache_State_to_permission(state));
     }
   }
 
-  GenericMachineType getNondirectHitMachType(Address addr, MachineID sender) {
-    if (machineIDToMachineType(sender) == MachineType:L1Cache) {
-      //
-      // NOTE direct local hits should not call this
-      //
-      return GenericMachineType:L1Cache_wCC; 
+  Event mandatory_request_type_to_event(RubyRequestType type) {
+    if (type == RubyRequestType:LD) {
+      return Event:Load;
+    } else if (type == RubyRequestType:IFETCH) {
+      return Event:Ifetch;
+    } else if ((type == RubyRequestType:ST) || (type == RubyRequestType:ATOMIC)) {
+      return Event:Store;
+    } else if ((type == RubyRequestType:FLUSH)) {
+      return Event:Flush_line;
     } else {
-      return ConvertMachToGenericMach(machineIDToMachineType(sender));
+      error("Invalid RubyRequestType");
     }
   }
 
-  GenericMachineType testAndClearLocalHit(Entry cache_entry) {
+  MachineType testAndClearLocalHit(Entry cache_entry) {
     if (is_valid(cache_entry) && cache_entry.FromL2) {
       cache_entry.FromL2 := false;
-      return GenericMachineType:L2Cache;
-    } else {
-      return GenericMachineType:L1Cache; 
+      return MachineType:L2Cache;
     }
+    return MachineType:L1Cache;
   }
 
   bool IsAtomicAccessed(Entry cache_entry) {
@@ -250,10 +320,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
     return cache_entry.AtomicAccessed;
   }
 
-  MessageBuffer triggerQueue, ordered="false";
-
   // ** OUT_PORTS **
-
   out_port(requestNetwork_out, RequestMsg, requestFromCache);
   out_port(responseNetwork_out, ResponseMsg, responseFromCache);
   out_port(unblockNetwork_out, ResponseMsg, unblockFromCache);
@@ -263,18 +330,18 @@ machine(L1Cache, "AMD Hammer-like protocol")
 
   // Trigger Queue
   in_port(triggerQueue_in, TriggerMsg, triggerQueue, rank=3) {
-    if (triggerQueue_in.isReady()) {
+    if (triggerQueue_in.isReady(clockEdge())) {
       peek(triggerQueue_in, TriggerMsg) {
 
-        Entry cache_entry := getCacheEntry(in_msg.Address);
-        TBE tbe := TBEs[in_msg.Address];
+        Entry cache_entry := getCacheEntry(in_msg.addr);
+        TBE tbe := TBEs[in_msg.addr];
 
         if (in_msg.Type == TriggerType:L2_to_L1) {
-          trigger(Event:Complete_L2_to_L1, in_msg.Address, cache_entry, tbe);
+          trigger(Event:Complete_L2_to_L1, in_msg.addr, cache_entry, tbe);
         } else if (in_msg.Type == TriggerType:ALL_ACKS) {
-          trigger(Event:All_acks, in_msg.Address, cache_entry, tbe);
+          trigger(Event:All_acks, in_msg.addr, cache_entry, tbe);
         } else if (in_msg.Type == TriggerType:ALL_ACKS_NO_SHARERS) {
-          trigger(Event:All_acks_no_sharers, in_msg.Address, cache_entry, tbe);
+          trigger(Event:All_acks_no_sharers, in_msg.addr, cache_entry, tbe);
         } else {
           error("Unexpected message");
         }
@@ -286,22 +353,22 @@ machine(L1Cache, "AMD Hammer-like protocol")
 
   // Response Network
   in_port(responseToCache_in, ResponseMsg, responseToCache, rank=2) {
-    if (responseToCache_in.isReady()) {
-      peek(responseToCache_in, ResponseMsg, block_on="Address") {
+    if (responseToCache_in.isReady(clockEdge())) {
+      peek(responseToCache_in, ResponseMsg, block_on="addr") {
 
-        Entry cache_entry := getCacheEntry(in_msg.Address);
-        TBE tbe := TBEs[in_msg.Address];
+        Entry cache_entry := getCacheEntry(in_msg.addr);
+        TBE tbe := TBEs[in_msg.addr];
 
         if (in_msg.Type == CoherenceResponseType:ACK) {
-          trigger(Event:Ack, in_msg.Address, cache_entry, tbe);
+          trigger(Event:Ack, in_msg.addr, cache_entry, tbe);
         } else if (in_msg.Type == CoherenceResponseType:ACK_SHARED) {
-          trigger(Event:Shared_Ack, in_msg.Address, cache_entry, tbe);
+          trigger(Event:Shared_Ack, in_msg.addr, cache_entry, tbe);
         } else if (in_msg.Type == CoherenceResponseType:DATA) {
-          trigger(Event:Data, in_msg.Address, cache_entry, tbe);
+          trigger(Event:Data, in_msg.addr, cache_entry, tbe);
         } else if (in_msg.Type == CoherenceResponseType:DATA_SHARED) {
-          trigger(Event:Shared_Data, in_msg.Address, cache_entry, tbe);
+          trigger(Event:Shared_Data, in_msg.addr, cache_entry, tbe);
         } else if (in_msg.Type == CoherenceResponseType:DATA_EXCLUSIVE) {
-          trigger(Event:Exclusive_Data, in_msg.Address, cache_entry, tbe);
+          trigger(Event:Exclusive_Data, in_msg.addr, cache_entry, tbe);
         } else {
           error("Unexpected message");
         }
@@ -311,36 +378,39 @@ machine(L1Cache, "AMD Hammer-like protocol")
 
   // Forward Network
   in_port(forwardToCache_in, RequestMsg, forwardToCache, rank=1) {
-    if (forwardToCache_in.isReady()) {
-      peek(forwardToCache_in, RequestMsg, block_on="Address") {
+    if (forwardToCache_in.isReady(clockEdge())) {
+      peek(forwardToCache_in, RequestMsg, block_on="addr") {
 
-        Entry cache_entry := getCacheEntry(in_msg.Address);
-        TBE tbe := TBEs[in_msg.Address];
+        Entry cache_entry := getCacheEntry(in_msg.addr);
+        TBE tbe := TBEs[in_msg.addr];
 
-        if (in_msg.Type == CoherenceRequestType:GETX) {
-          trigger(Event:Other_GETX, in_msg.Address, cache_entry, tbe);
+        if ((in_msg.Type == CoherenceRequestType:GETX) ||
+            (in_msg.Type == CoherenceRequestType:GETF)) {
+          trigger(Event:Other_GETX, in_msg.addr, cache_entry, tbe);
         } else if (in_msg.Type == CoherenceRequestType:MERGED_GETS) {
-          trigger(Event:Merged_GETS, in_msg.Address, cache_entry, tbe);
+          trigger(Event:Merged_GETS, in_msg.addr, cache_entry, tbe);
         } else if (in_msg.Type == CoherenceRequestType:GETS) {
           if (machineCount(MachineType:L1Cache) > 1) {
             if (is_valid(cache_entry)) {
               if (IsAtomicAccessed(cache_entry) && no_mig_atomic) {
-                trigger(Event:Other_GETS_No_Mig, in_msg.Address, cache_entry, tbe);
+                trigger(Event:Other_GETS_No_Mig, in_msg.addr, cache_entry, tbe);
               } else {
-                trigger(Event:Other_GETS, in_msg.Address, cache_entry, tbe);
+                trigger(Event:Other_GETS, in_msg.addr, cache_entry, tbe);
               }
             } else {
-              trigger(Event:Other_GETS, in_msg.Address, cache_entry, tbe);
+              trigger(Event:Other_GETS, in_msg.addr, cache_entry, tbe);
             }
           } else {
-            trigger(Event:NC_DMA_GETS, in_msg.Address, cache_entry, tbe);
+            trigger(Event:NC_DMA_GETS, in_msg.addr, cache_entry, tbe);
           }
         } else if (in_msg.Type == CoherenceRequestType:INV) {
-          trigger(Event:Invalidate, in_msg.Address, cache_entry, tbe);
+          trigger(Event:Invalidate, in_msg.addr, cache_entry, tbe);
         } else if (in_msg.Type == CoherenceRequestType:WB_ACK) {
-          trigger(Event:Writeback_Ack, in_msg.Address, cache_entry, tbe);
+          trigger(Event:Writeback_Ack, in_msg.addr, cache_entry, tbe);
         } else if (in_msg.Type == CoherenceRequestType:WB_NACK) {
-          trigger(Event:Writeback_Nack, in_msg.Address, cache_entry, tbe);
+          trigger(Event:Writeback_Nack, in_msg.addr, cache_entry, tbe);
+        } else if (in_msg.Type == CoherenceRequestType:BLOCK_ACK) {
+          trigger(Event:Block_Ack, in_msg.addr, cache_entry, tbe);
         } else {
           error("Unexpected message");
         }
@@ -351,19 +421,20 @@ machine(L1Cache, "AMD Hammer-like protocol")
   // Nothing from the request network
 
   // Mandatory Queue
-  in_port(mandatoryQueue_in, CacheMsg, mandatoryQueue, desc="...", rank=0) {
-    if (mandatoryQueue_in.isReady()) {
-      peek(mandatoryQueue_in, CacheMsg, block_on="LineAddress") {
+  in_port(mandatoryQueue_in, RubyRequest, mandatoryQueue, desc="...", rank=0) {
+    if (mandatoryQueue_in.isReady(clockEdge())) {
+      peek(mandatoryQueue_in, RubyRequest, block_on="LineAddress") {
 
         // Check for data access to blocks in I-cache and ifetchs to blocks in D-cache
         TBE tbe := TBEs[in_msg.LineAddress];
 
-        if (in_msg.Type == CacheRequestType:IFETCH) {
+        if (in_msg.Type == RubyRequestType:IFETCH) {
           // ** INSTRUCTION ACCESS ***
 
           Entry L1Icache_entry := getL1ICacheEntry(in_msg.LineAddress);
           if (is_valid(L1Icache_entry)) {
-            // The tag matches for the L1, so the L1 fetches the line.  We know it can't be in the L2 due to exclusion
+            // The tag matches for the L1, so the L1 fetches the line.
+            // We know it can't be in the L2 due to exclusion
             trigger(mandatory_request_type_to_event(in_msg.Type),
                     in_msg.LineAddress, L1Icache_entry, tbe);
           } else {
@@ -371,18 +442,18 @@ machine(L1Cache, "AMD Hammer-like protocol")
             Entry L1Dcache_entry := getL1DCacheEntry(in_msg.LineAddress);
             if (is_valid(L1Dcache_entry)) {
               // The block is in the wrong L1, try to write it to the L2
-              if (L2cacheMemory.cacheAvail(in_msg.LineAddress)) {
+              if (L2cache.cacheAvail(in_msg.LineAddress)) {
                 trigger(Event:L1_to_L2, in_msg.LineAddress, L1Dcache_entry, tbe);
               } else {
-                Address l2_victim_addr := L2cacheMemory.cacheProbe(in_msg.LineAddress);
+                Addr l2_victim_addr := L2cache.cacheProbe(in_msg.LineAddress);
                 trigger(Event:L2_Replacement,
-                        l2_victim_addr, 
+                        l2_victim_addr,
                         getL2CacheEntry(l2_victim_addr),
                         TBEs[l2_victim_addr]);
               }
             }
 
-            if (L1IcacheMemory.cacheAvail(in_msg.LineAddress)) {
+            if (L1Icache.cacheAvail(in_msg.LineAddress)) {
               // L1 does't have the line, but we have space for it in the L1
 
               Entry L2cache_entry := getL2CacheEntry(in_msg.LineAddress);
@@ -397,15 +468,15 @@ machine(L1Cache, "AMD Hammer-like protocol")
               }
             } else {
               // No room in the L1, so we need to make room
-              Address l1i_victim_addr := L1IcacheMemory.cacheProbe(in_msg.LineAddress);
-              if (L2cacheMemory.cacheAvail(l1i_victim_addr)) {
+              Addr l1i_victim_addr := L1Icache.cacheProbe(in_msg.LineAddress);
+              if (L2cache.cacheAvail(l1i_victim_addr)) {
                 // The L2 has room, so we move the line from the L1 to the L2
                 trigger(Event:L1_to_L2,
                         l1i_victim_addr,
                         getL1ICacheEntry(l1i_victim_addr),
                         TBEs[l1i_victim_addr]);
               } else {
-                Address l2_victim_addr := L2cacheMemory.cacheProbe(l1i_victim_addr);
+                Addr l2_victim_addr := L2cache.cacheProbe(l1i_victim_addr);
                 // The L2 does not have room, so we replace a line from the L2
                 trigger(Event:L2_Replacement,
                         l2_victim_addr,
@@ -419,7 +490,8 @@ machine(L1Cache, "AMD Hammer-like protocol")
 
           Entry L1Dcache_entry := getL1DCacheEntry(in_msg.LineAddress);
           if (is_valid(L1Dcache_entry)) {
-            // The tag matches for the L1, so the L1 fetches the line.  We know it can't be in the L2 due to exclusion
+            // The tag matches for the L1, so the L1 fetches the line.
+            // We know it can't be in the L2 due to exclusion
             trigger(mandatory_request_type_to_event(in_msg.Type),
                     in_msg.LineAddress, L1Dcache_entry, tbe);
           } else {
@@ -428,10 +500,10 @@ machine(L1Cache, "AMD Hammer-like protocol")
             Entry L1Icache_entry := getL1ICacheEntry(in_msg.LineAddress);
             if (is_valid(L1Icache_entry)) {
               // The block is in the wrong L1, try to write it to the L2
-              if (L2cacheMemory.cacheAvail(in_msg.LineAddress)) {
+              if (L2cache.cacheAvail(in_msg.LineAddress)) {
                 trigger(Event:L1_to_L2, in_msg.LineAddress, L1Icache_entry, tbe);
               } else {
-                Address l2_victim_addr := L2cacheMemory.cacheProbe(in_msg.LineAddress);
+                Addr l2_victim_addr := L2cache.cacheProbe(in_msg.LineAddress);
                 trigger(Event:L2_Replacement,
                         l2_victim_addr,
                         getL2CacheEntry(l2_victim_addr),
@@ -439,7 +511,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
               }
             }
 
-            if (L1DcacheMemory.cacheAvail(in_msg.LineAddress)) {
+            if (L1Dcache.cacheAvail(in_msg.LineAddress)) {
               // L1 does't have the line, but we have space for it in the L1
               Entry L2cache_entry := getL2CacheEntry(in_msg.LineAddress);
               if (is_valid(L2cache_entry)) {
@@ -453,15 +525,15 @@ machine(L1Cache, "AMD Hammer-like protocol")
               }
             } else {
               // No room in the L1, so we need to make room
-              Address l1d_victim_addr := L1DcacheMemory.cacheProbe(in_msg.LineAddress);
-              if (L2cacheMemory.cacheAvail(l1d_victim_addr)) {
+              Addr l1d_victim_addr := L1Dcache.cacheProbe(in_msg.LineAddress);
+              if (L2cache.cacheAvail(l1d_victim_addr)) {
                 // The L2 has room, so we move the line from the L1 to the L2
                 trigger(Event:L1_to_L2,
                         l1d_victim_addr,
                         getL1DCacheEntry(l1d_victim_addr),
                         TBEs[l1d_victim_addr]);
               } else {
-                Address l2_victim_addr := L2cacheMemory.cacheProbe(l1d_victim_addr);
+                Addr l2_victim_addr := L2cache.cacheProbe(l1d_victim_addr);
                 // The L2 does not have room, so we replace a line from the L2
                 trigger(Event:L2_Replacement,
                         l2_victim_addr,
@@ -474,40 +546,76 @@ machine(L1Cache, "AMD Hammer-like protocol")
       }
     }
   }
-  
+
   // ACTIONS
 
   action(a_issueGETS, "a", desc="Issue GETS") {
-    enqueue(requestNetwork_out, RequestMsg, latency=issue_latency) {
+    enqueue(requestNetwork_out, RequestMsg, issue_latency) {
       assert(is_valid(tbe));
-      out_msg.Address := address;
+      out_msg.addr := address;
       out_msg.Type := CoherenceRequestType:GETS;
       out_msg.Requestor := machineID;
-      out_msg.Destination.add(map_Address_to_Directory(address));
+      out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory));
       out_msg.MessageSize := MessageSizeType:Request_Control;
-      out_msg.InitialRequestTime := get_time();
-      tbe.NumPendingMsgs := machineCount(MachineType:L1Cache); // One from each other cache (n-1) plus the memory (+1)
+      out_msg.InitialRequestTime := curCycle();
+
+      // One from each other cache (n-1) plus the memory (+1)
+      tbe.NumPendingMsgs := machineCount(MachineType:L1Cache);
     }
   }
 
   action(b_issueGETX, "b", desc="Issue GETX") {
-    enqueue(requestNetwork_out, RequestMsg, latency=issue_latency) {
+    enqueue(requestNetwork_out, RequestMsg, issue_latency) {
       assert(is_valid(tbe));
-      out_msg.Address := address;
+      out_msg.addr := address;
       out_msg.Type := CoherenceRequestType:GETX;
       out_msg.Requestor := machineID;
-      out_msg.Destination.add(map_Address_to_Directory(address));
+      out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory));
       out_msg.MessageSize := MessageSizeType:Request_Control;
-      out_msg.InitialRequestTime := get_time();
-      tbe.NumPendingMsgs := machineCount(MachineType:L1Cache); // One from each other cache (n-1) plus the memory (+1)
+      out_msg.InitialRequestTime := curCycle();
+
+      // One from each other cache (n-1) plus the memory (+1)
+      tbe.NumPendingMsgs := machineCount(MachineType:L1Cache);
+    }
+  }
+
+  action(b_issueGETXIfMoreThanOne, "bo", desc="Issue GETX") {
+    if (machineCount(MachineType:L1Cache) > 1) {
+      enqueue(requestNetwork_out, RequestMsg, issue_latency) {
+        assert(is_valid(tbe));
+        out_msg.addr := address;
+        out_msg.Type := CoherenceRequestType:GETX;
+        out_msg.Requestor := machineID;
+        out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory));
+        out_msg.MessageSize := MessageSizeType:Request_Control;
+        out_msg.InitialRequestTime := curCycle();
+      }
+    }
+
+    // One from each other cache (n-1) plus the memory (+1)
+    tbe.NumPendingMsgs := machineCount(MachineType:L1Cache);
+  }
+
+  action(bf_issueGETF, "bf", desc="Issue GETF") {
+    enqueue(requestNetwork_out, RequestMsg, issue_latency) {
+      assert(is_valid(tbe));
+      out_msg.addr := address;
+      out_msg.Type := CoherenceRequestType:GETF;
+      out_msg.Requestor := machineID;
+      out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory));
+      out_msg.MessageSize := MessageSizeType:Request_Control;
+      out_msg.InitialRequestTime := curCycle();
+
+      // One from each other cache (n-1) plus the memory (+1)
+      tbe.NumPendingMsgs := machineCount(MachineType:L1Cache);
     }
   }
 
   action(c_sendExclusiveData, "c", desc="Send exclusive data from cache to requestor") {
     peek(forwardToCache_in, RequestMsg) {
-      enqueue(responseNetwork_out, ResponseMsg, latency=cache_response_latency) {
+      enqueue(responseNetwork_out, ResponseMsg, cache_response_latency) {
         assert(is_valid(cache_entry));
-        out_msg.Address := address;
+        out_msg.addr := address;
         out_msg.Type := CoherenceResponseType:DATA_EXCLUSIVE;
         out_msg.Sender := machineID;
         out_msg.Destination.add(in_msg.Requestor);
@@ -526,21 +634,54 @@ machine(L1Cache, "AMD Hammer-like protocol")
     }
   }
 
+  action(ct_sendExclusiveDataFromTBE, "ct", desc="Send exclusive data from tbe to requestor") {
+    peek(forwardToCache_in, RequestMsg) {
+      enqueue(responseNetwork_out, ResponseMsg, cache_response_latency) {
+        assert(is_valid(tbe));
+        out_msg.addr := address;
+        out_msg.Type := CoherenceResponseType:DATA_EXCLUSIVE;
+        out_msg.Sender := machineID;
+        out_msg.Destination.add(in_msg.Requestor);
+        out_msg.DataBlk := tbe.DataBlk;
+        out_msg.Dirty := tbe.Dirty;
+        if (in_msg.DirectedProbe) {
+          out_msg.Acks := machineCount(MachineType:L1Cache);
+        } else {
+          out_msg.Acks := 2;
+        }
+        out_msg.SilentAcks := in_msg.SilentAcks;
+        out_msg.MessageSize := MessageSizeType:Response_Data;
+        out_msg.InitialRequestTime := in_msg.InitialRequestTime;
+        out_msg.ForwardRequestTime := in_msg.ForwardRequestTime;
+      }
+    }
+  }
+
   action(d_issuePUT, "d", desc="Issue PUT") {
-    enqueue(requestNetwork_out, RequestMsg, latency=issue_latency) {
-      out_msg.Address := address;
+    enqueue(requestNetwork_out, RequestMsg, issue_latency) {
+      out_msg.addr := address;
       out_msg.Type := CoherenceRequestType:PUT;
       out_msg.Requestor := machineID;
-      out_msg.Destination.add(map_Address_to_Directory(address));
+      out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory));
+      out_msg.MessageSize := MessageSizeType:Writeback_Control;
+    }
+  }
+
+  action(df_issuePUTF, "df", desc="Issue PUTF") {
+    enqueue(requestNetwork_out, RequestMsg, issue_latency) {
+      out_msg.addr := address;
+      out_msg.Type := CoherenceRequestType:PUTF;
+      out_msg.Requestor := machineID;
+      out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory));
       out_msg.MessageSize := MessageSizeType:Writeback_Control;
     }
   }
 
   action(e_sendData, "e", desc="Send data from cache to requestor") {
     peek(forwardToCache_in, RequestMsg) {
-      enqueue(responseNetwork_out, ResponseMsg, latency=cache_response_latency) {
+      enqueue(responseNetwork_out, ResponseMsg, cache_response_latency) {
         assert(is_valid(cache_entry));
-        out_msg.Address := address;
+        out_msg.addr := address;
         out_msg.Type := CoherenceResponseType:DATA;
         out_msg.Sender := machineID;
         out_msg.Destination.add(in_msg.Requestor);
@@ -559,11 +700,11 @@ machine(L1Cache, "AMD Hammer-like protocol")
     }
   }
 
-  action(ee_sendDataShared, "\e", desc="Send data from cache to requestor, keep a shared copy") {
+  action(ee_sendDataShared, "\e", desc="Send data from cache to requestor, remaining the owner") {
     peek(forwardToCache_in, RequestMsg) {
-      enqueue(responseNetwork_out, ResponseMsg, latency=cache_response_latency) {
+      enqueue(responseNetwork_out, ResponseMsg, cache_response_latency) {
         assert(is_valid(cache_entry));
-        out_msg.Address := address;
+        out_msg.addr := address;
         out_msg.Type := CoherenceResponseType:DATA_SHARED;
         out_msg.Sender := machineID;
         out_msg.Destination.add(in_msg.Requestor);
@@ -582,12 +723,36 @@ machine(L1Cache, "AMD Hammer-like protocol")
       }
     }
   }
-  
-  action(em_sendDataSharedMultiple, "em", desc="Send data from cache to all requestors") {
+
+  action(et_sendDataSharedFromTBE, "\et", desc="Send data from TBE to requestor, keep a shared copy") {
+    peek(forwardToCache_in, RequestMsg) {
+      enqueue(responseNetwork_out, ResponseMsg, cache_response_latency) {
+        assert(is_valid(tbe));
+        out_msg.addr := address;
+        out_msg.Type := CoherenceResponseType:DATA_SHARED;
+        out_msg.Sender := machineID;
+        out_msg.Destination.add(in_msg.Requestor);
+        out_msg.DataBlk := tbe.DataBlk;
+        out_msg.Dirty := tbe.Dirty;
+        DPRINTF(RubySlicc, "%s\n", out_msg.DataBlk);
+        if (in_msg.DirectedProbe) {
+          out_msg.Acks := machineCount(MachineType:L1Cache);
+        } else {
+          out_msg.Acks := 2;
+        }
+        out_msg.SilentAcks := in_msg.SilentAcks;
+        out_msg.MessageSize := MessageSizeType:Response_Data;
+        out_msg.InitialRequestTime := in_msg.InitialRequestTime;
+        out_msg.ForwardRequestTime := in_msg.ForwardRequestTime;
+      }
+    }
+  }
+
+  action(em_sendDataSharedMultiple, "em", desc="Send data from cache to all requestors, still the owner") {
     peek(forwardToCache_in, RequestMsg) {
-      enqueue(responseNetwork_out, ResponseMsg, latency=cache_response_latency) {
+      enqueue(responseNetwork_out, ResponseMsg, cache_response_latency) {
         assert(is_valid(cache_entry));
-        out_msg.Address := address;
+        out_msg.addr := address;
         out_msg.Type := CoherenceResponseType:DATA_SHARED;
         out_msg.Sender := machineID;
         out_msg.Destination := in_msg.MergedRequestors;
@@ -602,11 +767,31 @@ machine(L1Cache, "AMD Hammer-like protocol")
       }
     }
   }
-  
+
+  action(emt_sendDataSharedMultipleFromTBE, "emt", desc="Send data from tbe to all requestors") {
+    peek(forwardToCache_in, RequestMsg) {
+      enqueue(responseNetwork_out, ResponseMsg, cache_response_latency) {
+        assert(is_valid(tbe));
+        out_msg.addr := address;
+        out_msg.Type := CoherenceResponseType:DATA_SHARED;
+        out_msg.Sender := machineID;
+        out_msg.Destination := in_msg.MergedRequestors;
+        out_msg.DataBlk := tbe.DataBlk;
+        out_msg.Dirty := tbe.Dirty;
+        DPRINTF(RubySlicc, "%s\n", out_msg.DataBlk);
+        out_msg.Acks := machineCount(MachineType:L1Cache);
+        out_msg.SilentAcks := in_msg.SilentAcks;
+        out_msg.MessageSize := MessageSizeType:Response_Data;
+        out_msg.InitialRequestTime := in_msg.InitialRequestTime;
+        out_msg.ForwardRequestTime := in_msg.ForwardRequestTime;
+      }
+    }
+  }
+
   action(f_sendAck, "f", desc="Send ack from cache to requestor") {
     peek(forwardToCache_in, RequestMsg) {
-      enqueue(responseNetwork_out, ResponseMsg, latency=cache_response_latency) {
-        out_msg.Address := address;
+      enqueue(responseNetwork_out, ResponseMsg, cache_response_latency) {
+        out_msg.addr := address;
         out_msg.Type := CoherenceResponseType:ACK;
         out_msg.Sender := machineID;
         out_msg.Destination.add(in_msg.Requestor);
@@ -622,8 +807,8 @@ machine(L1Cache, "AMD Hammer-like protocol")
 
   action(ff_sendAckShared, "\f", desc="Send shared ack from cache to requestor") {
     peek(forwardToCache_in, RequestMsg) {
-      enqueue(responseNetwork_out, ResponseMsg, latency=cache_response_latency) {
-        out_msg.Address := address;
+      enqueue(responseNetwork_out, ResponseMsg, cache_response_latency) {
+        out_msg.addr := address;
         out_msg.Type := CoherenceResponseType:ACK_SHARED;
         out_msg.Sender := machineID;
         out_msg.Destination.add(in_msg.Requestor);
@@ -638,42 +823,51 @@ machine(L1Cache, "AMD Hammer-like protocol")
   }
 
   action(g_sendUnblock, "g", desc="Send unblock to memory") {
-    enqueue(unblockNetwork_out, ResponseMsg, latency=cache_response_latency) {
-      out_msg.Address := address;
+    enqueue(unblockNetwork_out, ResponseMsg, cache_response_latency) {
+      out_msg.addr := address;
       out_msg.Type := CoherenceResponseType:UNBLOCK;
       out_msg.Sender := machineID;
-      out_msg.Destination.add(map_Address_to_Directory(address));
+      out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory));
       out_msg.MessageSize := MessageSizeType:Unblock_Control;
     }
   }
 
   action(gm_sendUnblockM, "gm", desc="Send unblock to memory and indicate M/O/E state") {
-    enqueue(unblockNetwork_out, ResponseMsg, latency=cache_response_latency) {
-      out_msg.Address := address;
+    enqueue(unblockNetwork_out, ResponseMsg, cache_response_latency) {
+      out_msg.addr := address;
       out_msg.Type := CoherenceResponseType:UNBLOCKM;
       out_msg.Sender := machineID;
-      out_msg.Destination.add(map_Address_to_Directory(address));
+      out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory));
       out_msg.MessageSize := MessageSizeType:Unblock_Control;
     }
   }
 
   action(gs_sendUnblockS, "gs", desc="Send unblock to memory and indicate S state") {
-    enqueue(unblockNetwork_out, ResponseMsg, latency=cache_response_latency) {
+    enqueue(unblockNetwork_out, ResponseMsg, cache_response_latency) {
       assert(is_valid(tbe));
-      out_msg.Address := address;
+      out_msg.addr := address;
       out_msg.Type := CoherenceResponseType:UNBLOCKS;
       out_msg.Sender := machineID;
       out_msg.CurOwner := tbe.CurOwner;
-      out_msg.Destination.add(map_Address_to_Directory(address));
+      out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory));
       out_msg.MessageSize := MessageSizeType:Unblock_Control;
     }
   }
 
-  action(h_load_hit, "h", desc="Notify sequencer the load completed.") {
+  action(h_load_hit, "hd", desc="Notify sequencer the load completed.") {
+    assert(is_valid(cache_entry));
+    DPRINTF(RubySlicc, "%s\n", cache_entry.DataBlk);
+    L1Dcache.setMRU(cache_entry);
+    sequencer.readCallback(address, cache_entry.DataBlk, false,
+                           testAndClearLocalHit(cache_entry));
+  }
+
+  action(h_ifetch_hit, "hi", desc="Notify sequencer the ifetch completed.") {
     assert(is_valid(cache_entry));
     DPRINTF(RubySlicc, "%s\n", cache_entry.DataBlk);
-    sequencer.readCallback(address, testAndClearLocalHit(cache_entry),
-                           cache_entry.DataBlk);
+    L1Icache.setMRU(cache_entry);
+    sequencer.readCallback(address, cache_entry.DataBlk, false,
+                           testAndClearLocalHit(cache_entry));
   }
 
   action(hx_external_load_hit, "hx", desc="load required external msgs") {
@@ -681,43 +875,47 @@ machine(L1Cache, "AMD Hammer-like protocol")
     assert(is_valid(tbe));
     DPRINTF(RubySlicc, "%s\n", cache_entry.DataBlk);
     peek(responseToCache_in, ResponseMsg) {
-
-      sequencer.readCallback(address, 
-                             getNondirectHitMachType(in_msg.Address, in_msg.Sender),
-                             cache_entry.DataBlk,
-                             tbe.InitialRequestTime,
-                             tbe.ForwardRequestTime,
-                             tbe.FirstResponseTime);
+      L1Icache.setMRU(address);
+      L1Dcache.setMRU(address);
+      sequencer.readCallback(address, cache_entry.DataBlk, true,
+                 machineIDToMachineType(in_msg.Sender), tbe.InitialRequestTime,
+                 tbe.ForwardRequestTime, tbe.FirstResponseTime);
     }
   }
 
   action(hh_store_hit, "\h", desc="Notify sequencer that store completed.") {
     assert(is_valid(cache_entry));
     DPRINTF(RubySlicc, "%s\n", cache_entry.DataBlk);
-    peek(mandatoryQueue_in, CacheMsg) {
-      sequencer.writeCallback(address, testAndClearLocalHit(cache_entry),
-                              cache_entry.DataBlk);
+    peek(mandatoryQueue_in, RubyRequest) {
+      L1Dcache.setMRU(cache_entry);
+      sequencer.writeCallback(address, cache_entry.DataBlk, false,
+                              testAndClearLocalHit(cache_entry));
 
       cache_entry.Dirty := true;
-      if (in_msg.Type == CacheRequestType:ATOMIC) {
+      if (in_msg.Type == RubyRequestType:ATOMIC) {
         cache_entry.AtomicAccessed := true;
       }
     }
   }
 
+  action(hh_flush_hit, "\hf", desc="Notify sequencer that flush completed.") {
+    assert(is_valid(tbe));
+    DPRINTF(RubySlicc, "%s\n", tbe.DataBlk);
+    sequencer.writeCallback(address, tbe.DataBlk, false, MachineType:L1Cache);
+  }
+
   action(sx_external_store_hit, "sx", desc="store required external msgs.") {
     assert(is_valid(cache_entry));
     assert(is_valid(tbe));
     DPRINTF(RubySlicc, "%s\n", cache_entry.DataBlk);
     peek(responseToCache_in, ResponseMsg) {
-
-      sequencer.writeCallback(address, 
-                              getNondirectHitMachType(address, in_msg.Sender),
-                              cache_entry.DataBlk,
-                              tbe.InitialRequestTime,
-                              tbe.ForwardRequestTime,
-                              tbe.FirstResponseTime);
+      L1Icache.setMRU(address);
+      L1Dcache.setMRU(address);
+      sequencer.writeCallback(address, cache_entry.DataBlk, true,
+              machineIDToMachineType(in_msg.Sender), tbe.InitialRequestTime,
+              tbe.ForwardRequestTime, tbe.FirstResponseTime);
     }
+    DPRINTF(RubySlicc, "%s\n", cache_entry.DataBlk);
     cache_entry.Dirty := true;
   }
 
@@ -725,13 +923,11 @@ machine(L1Cache, "AMD Hammer-like protocol")
     assert(is_valid(cache_entry));
     assert(is_valid(tbe));
     DPRINTF(RubySlicc, "%s\n", cache_entry.DataBlk);
-
-    sequencer.writeCallback(address, 
-                            getNondirectHitMachType(address, tbe.LastResponder),
-                            cache_entry.DataBlk,
-                            tbe.InitialRequestTime,
-                            tbe.ForwardRequestTime,
-                            tbe.FirstResponseTime);
+    L1Icache.setMRU(address);
+    L1Dcache.setMRU(address);
+    sequencer.writeCallback(address, cache_entry.DataBlk, true,
+            machineIDToMachineType(tbe.LastResponder), tbe.InitialRequestTime,
+            tbe.ForwardRequestTime, tbe.FirstResponseTime);
 
     cache_entry.Dirty := true;
   }
@@ -746,16 +942,24 @@ machine(L1Cache, "AMD Hammer-like protocol")
     tbe.Sharers := false;
   }
 
+  action(it_allocateTBE, "it", desc="Allocate TBE") {
+    check_allocate(TBEs);
+    TBEs.allocate(address);
+    set_tbe(TBEs[address]);
+    tbe.Dirty := false;
+    tbe.Sharers := false;
+  }
+
   action(j_popTriggerQueue, "j", desc="Pop trigger queue.") {
-    triggerQueue_in.dequeue();
+    triggerQueue_in.dequeue(clockEdge());
   }
 
   action(k_popMandatoryQueue, "k", desc="Pop mandatory queue.") {
-    mandatoryQueue_in.dequeue();
+    mandatoryQueue_in.dequeue(clockEdge());
   }
 
   action(l_popForwardQueue, "l", desc="Pop forwareded request queue.") {
-    forwardToCache_in.dequeue();
+    forwardToCache_in.dequeue(clockEdge());
   }
 
   action(hp_copyFromTBEToL2, "li", desc="Copy data from TBE to L2 cache entry.") {
@@ -775,7 +979,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
 
   action(m_decrementNumberOfMessages, "m", desc="Decrement the number of messages for which we're waiting") {
     peek(responseToCache_in, ResponseMsg) {
-      assert(in_msg.Acks > 0);
+      assert(in_msg.Acks >= 0);
       assert(is_valid(tbe));
       DPRINTF(RubySlicc, "Sender = %s\n", in_msg.Sender);
       DPRINTF(RubySlicc, "SilentAcks = %d\n", in_msg.SilentAcks);
@@ -802,7 +1006,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
         tbe.ForwardRequestTime := in_msg.ForwardRequestTime;
       }
       if (tbe.FirstResponseTime == zero_time()) {
-        tbe.FirstResponseTime := get_time();
+        tbe.FirstResponseTime := curCycle();
       }
     }
   }
@@ -814,12 +1018,12 @@ machine(L1Cache, "AMD Hammer-like protocol")
   }
 
   action(n_popResponseQueue, "n", desc="Pop response queue") {
-    responseToCache_in.dequeue();
+    responseToCache_in.dequeue(clockEdge());
   }
 
   action(ll_L2toL1Transfer, "ll", desc="") {
-    enqueue(triggerQueue_out, TriggerMsg, latency=l2_cache_hit_latency) {
-      out_msg.Address := address;
+    enqueue(triggerQueue_out, TriggerMsg, l2_cache_hit_latency) {
+      out_msg.addr := address;
       out_msg.Type := TriggerType:L2_to_L1;
     }
   }
@@ -828,7 +1032,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
     assert(is_valid(tbe));
     if (tbe.NumPendingMsgs == 0) {
       enqueue(triggerQueue_out, TriggerMsg) {
-        out_msg.Address := address;
+        out_msg.addr := address;
         if (tbe.Sharers) {
           out_msg.Type := TriggerType:ALL_ACKS;
         } else {
@@ -851,9 +1055,9 @@ machine(L1Cache, "AMD Hammer-like protocol")
   action(q_sendDataFromTBEToCache, "q", desc="Send data from TBE to cache") {
     peek(forwardToCache_in, RequestMsg) {
         assert(in_msg.Requestor != machineID);
-      enqueue(responseNetwork_out, ResponseMsg, latency=cache_response_latency) {
+      enqueue(responseNetwork_out, ResponseMsg, cache_response_latency) {
         assert(is_valid(tbe));
-        out_msg.Address := address;
+        out_msg.addr := address;
         out_msg.Type := CoherenceResponseType:DATA;
         out_msg.Sender := machineID;
         out_msg.Destination.add(in_msg.Requestor);
@@ -873,12 +1077,37 @@ machine(L1Cache, "AMD Hammer-like protocol")
     }
   }
 
-  action(qm_sendDataFromTBEToCache, "qm", desc="Send data from TBE to cache, multiple sharers") {
+  action(sq_sendSharedDataFromTBEToCache, "sq", desc="Send shared data from TBE to cache, still the owner") {
     peek(forwardToCache_in, RequestMsg) {
-      enqueue(responseNetwork_out, ResponseMsg, latency=cache_response_latency) {
+        assert(in_msg.Requestor != machineID);
+      enqueue(responseNetwork_out, ResponseMsg, cache_response_latency) {
         assert(is_valid(tbe));
-        out_msg.Address := address;
-        out_msg.Type := CoherenceResponseType:DATA;
+        out_msg.addr := address;
+        out_msg.Type := CoherenceResponseType:DATA_SHARED;
+        out_msg.Sender := machineID;
+        out_msg.Destination.add(in_msg.Requestor);
+        DPRINTF(RubySlicc, "%s\n", out_msg.Destination);
+        out_msg.DataBlk := tbe.DataBlk;
+        out_msg.Dirty := tbe.Dirty;
+        if (in_msg.DirectedProbe) {
+          out_msg.Acks := machineCount(MachineType:L1Cache);
+        } else {
+          out_msg.Acks := 2;
+        }
+        out_msg.SilentAcks := in_msg.SilentAcks;
+        out_msg.MessageSize := MessageSizeType:Response_Data;
+        out_msg.InitialRequestTime := in_msg.InitialRequestTime;
+        out_msg.ForwardRequestTime := in_msg.ForwardRequestTime;
+      }
+    }
+  }
+
+  action(qm_sendDataFromTBEToCache, "qm", desc="Send data from TBE to cache, multiple sharers, still the owner") {
+    peek(forwardToCache_in, RequestMsg) {
+      enqueue(responseNetwork_out, ResponseMsg, cache_response_latency) {
+        assert(is_valid(tbe));
+        out_msg.addr := address;
+        out_msg.Type := CoherenceResponseType:DATA_SHARED;
         out_msg.Sender := machineID;
         out_msg.Destination := in_msg.MergedRequestors;
         DPRINTF(RubySlicc, "%s\n", out_msg.Destination);
@@ -894,11 +1123,11 @@ machine(L1Cache, "AMD Hammer-like protocol")
   }
 
   action(qq_sendDataFromTBEToMemory, "\q", desc="Send data from TBE to memory") {
-    enqueue(unblockNetwork_out, ResponseMsg, latency=cache_response_latency) {
+    enqueue(unblockNetwork_out, ResponseMsg, cache_response_latency) {
       assert(is_valid(tbe));
-      out_msg.Address := address;
+      out_msg.addr := address;
       out_msg.Sender := machineID;
-      out_msg.Destination.add(map_Address_to_Directory(address));
+      out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory));
       out_msg.Dirty := tbe.Dirty;
       if (tbe.Dirty) {
         out_msg.Type := CoherenceResponseType:WB_DIRTY;
@@ -908,7 +1137,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
         out_msg.Type := CoherenceResponseType:WB_CLEAN;
         // NOTE: in a real system this would not send data.  We send
         // data here only so we can check it at the memory
-        out_msg.DataBlk := tbe.DataBlk; 
+        out_msg.DataBlk := tbe.DataBlk;
         out_msg.MessageSize := MessageSizeType:Writeback_Control;
       }
     }
@@ -925,12 +1154,12 @@ machine(L1Cache, "AMD Hammer-like protocol")
   }
 
   action(t_sendExclusiveDataFromTBEToMemory, "t", desc="Send exclusive data from TBE to memory") {
-    enqueue(unblockNetwork_out, ResponseMsg, latency=cache_response_latency) {
+    enqueue(unblockNetwork_out, ResponseMsg, cache_response_latency) {
       assert(is_valid(tbe));
-      out_msg.Address := address;
+      out_msg.addr := address;
       out_msg.Sender := machineID;
-      out_msg.Destination.add(map_Address_to_Directory(address));
-      out_msg.DataBlk := tbe.DataBlk; 
+      out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory));
+      out_msg.DataBlk := tbe.DataBlk;
       out_msg.Dirty := tbe.Dirty;
       if (tbe.Dirty) {
         out_msg.Type := CoherenceResponseType:WB_EXCLUSIVE_DIRTY;
@@ -954,6 +1183,14 @@ machine(L1Cache, "AMD Hammer-like protocol")
     }
   }
 
+  action(uf_writeDataToCacheTBE, "uf", desc="Write data to TBE") {
+    peek(responseToCache_in, ResponseMsg) {
+      assert(is_valid(tbe));
+      tbe.DataBlk := in_msg.DataBlk;
+      tbe.Dirty := in_msg.Dirty;
+    }
+  }
+
   action(v_writeDataToCacheVerify, "v", desc="Write data to cache, assert it was same as before") {
     peek(responseToCache_in, ResponseMsg) {
       assert(is_valid(cache_entry));
@@ -964,56 +1201,104 @@ machine(L1Cache, "AMD Hammer-like protocol")
       cache_entry.Dirty := in_msg.Dirty || cache_entry.Dirty;
     }
   }
-  
+
+  action(vt_writeDataToTBEVerify, "vt", desc="Write data to TBE, assert it was same as before") {
+    peek(responseToCache_in, ResponseMsg) {
+      assert(is_valid(tbe));
+      DPRINTF(RubySlicc, "Cached Data Block: %s, Msg Data Block: %s\n",
+              tbe.DataBlk, in_msg.DataBlk);
+      assert(tbe.DataBlk == in_msg.DataBlk);
+      tbe.DataBlk := in_msg.DataBlk;
+      tbe.Dirty := in_msg.Dirty || tbe.Dirty;
+    }
+  }
+
   action(gg_deallocateL1CacheBlock, "\g", desc="Deallocate cache block.  Sets the cache to invalid, allowing a replacement in parallel with a fetch.") {
-    if (L1DcacheMemory.isTagPresent(address)) {
-      L1DcacheMemory.deallocate(address);
+    if (L1Dcache.isTagPresent(address)) {
+      L1Dcache.deallocate(address);
     } else {
-      L1IcacheMemory.deallocate(address);
+      L1Icache.deallocate(address);
     }
     unset_cache_entry();
   }
-  
+
   action(ii_allocateL1DCacheBlock, "\i", desc="Set L1 D-cache tag equal to tag of block B.") {
     if (is_invalid(cache_entry)) {
-      set_cache_entry(L1DcacheMemory.allocate(address, new Entry));
+      set_cache_entry(L1Dcache.allocate(address, new Entry));
     }
   }
 
   action(jj_allocateL1ICacheBlock, "\j", desc="Set L1 I-cache tag equal to tag of block B.") {
     if (is_invalid(cache_entry)) {
-      set_cache_entry(L1IcacheMemory.allocate(address, new Entry));
+      set_cache_entry(L1Icache.allocate(address, new Entry));
     }
   }
 
   action(vv_allocateL2CacheBlock, "\v", desc="Set L2 cache tag equal to tag of block B.") {
-    set_cache_entry(L2cacheMemory.allocate(address, new Entry));
+    set_cache_entry(L2cache.allocate(address, new Entry));
   }
 
   action(rr_deallocateL2CacheBlock, "\r", desc="Deallocate L2 cache block.  Sets the cache to not present, allowing a replacement in parallel with a fetch.") {
-    L2cacheMemory.deallocate(address);
+    L2cache.deallocate(address);
     unset_cache_entry();
   }
 
-  action(uu_profileMiss, "\u", desc="Profile the demand miss") {
-    peek(mandatoryQueue_in, CacheMsg) {
-      if (L1IcacheMemory.isTagPresent(address)) {
-        L1IcacheMemory.profileMiss(in_msg);
-      } else if (L1DcacheMemory.isTagPresent(address)) {
-        L1DcacheMemory.profileMiss(in_msg);
-      }
-      if (L2cacheMemory.isTagPresent(address) == false) {
-        L2cacheMemory.profileMiss(in_msg);
-      }
+  action(gr_deallocateCacheBlock, "\gr", desc="Deallocate an L1 or L2 cache block.") {
+    if (L1Dcache.isTagPresent(address)) {
+      L1Dcache.deallocate(address);
+    }
+    else if (L1Icache.isTagPresent(address)){
+      L1Icache.deallocate(address);
+    }
+    else {
+      assert(L2cache.isTagPresent(address));
+      L2cache.deallocate(address);
     }
+    unset_cache_entry();
+  }
+
+  action(forward_eviction_to_cpu, "\cc", desc="sends eviction information to the processor") {
+    if (send_evictions) {
+      DPRINTF(RubySlicc, "Sending invalidation for %#x to the CPU\n", address);
+      sequencer.evictionCallback(address);
+    }
+  }
+
+  action(uu_profileL1DataMiss, "\udm", desc="Profile the demand miss") {
+      ++L1Dcache.demand_misses;
+  }
+
+  action(uu_profileL1DataHit, "\udh", desc="Profile the demand hits") {
+      ++L1Dcache.demand_hits;
+  }
+
+  action(uu_profileL1InstMiss, "\uim", desc="Profile the demand miss") {
+      ++L1Icache.demand_misses;
+  }
+
+  action(uu_profileL1InstHit, "\uih", desc="Profile the demand hits") {
+      ++L1Icache.demand_hits;
+  }
+
+  action(uu_profileL2Miss, "\um", desc="Profile the demand miss") {
+      ++L2cache.demand_misses;
+  }
+
+  action(uu_profileL2Hit, "\uh", desc="Profile the demand hits ") {
+      ++L2cache.demand_hits;
   }
 
   action(zz_stallAndWaitMandatoryQueue, "\z", desc="Send the head of the mandatory queue to the back of the queue.") {
     stall_and_wait(mandatoryQueue_in, address);    
   }
 
+  action(z_stall, "z", desc="stall") {
+    // do nothing and the special z_stall action will return a protocol stall
+    // so that the next port is checked
+  }
+
   action(kd_wakeUpDependents, "kd", desc="wake-up dependents") {
-    wake_up_dependents(address);
+    wakeUpBuffers(address);
   }
 
   action(ka_wakeUpAllDependents, "ka", desc="wake-up all dependents") {
@@ -1025,45 +1310,49 @@ machine(L1Cache, "AMD Hammer-like protocol")
   //*****************************************************
 
   // Transitions for Load/Store/L2_Replacement from transient states
-  transition({IM, SM, ISM, OM, IS, SS, OI, MI, II, IT, ST, OT, MT, MMT}, {Store, L2_Replacement}) {
+  transition({IM, IM_F, MM_WF, SM, SM_F, ISM, ISM_F, OM, OM_F, IS, SS, OI, MI, II, ST, OT, MT, MMT}, {Store, L2_Replacement}) {
+    zz_stallAndWaitMandatoryQueue;
+  }
+
+  transition({IM, IM_F, MM_WF, SM, SM_F, ISM, ISM_F, OM, OM_F, IS, SS, OI, MI, II}, {Flush_line}) {
+    zz_stallAndWaitMandatoryQueue;
+  }
+
+  transition({M_W, MM_W}, {L2_Replacement, Flush_line}) {
+    zz_stallAndWaitMandatoryQueue;
+  }
+
+  transition({IM, IS, OI, MI, II, ST, OT, MT, MMT, MI_F, MM_F, OM_F, IM_F, ISM_F, SM_F, MM_WF}, {Load, Ifetch}) {
     zz_stallAndWaitMandatoryQueue;
   }
 
-  transition({M_W, MM_W}, {L2_Replacement}) {
+  transition({IM, SM, ISM, OM, IS, SS, MM_W, M_W, OI, MI, II, ST, OT, MT, MMT, IM_F, SM_F, ISM_F, OM_F, MM_WF, MI_F, MM_F, IR, SR, OR, MR, MMR}, L1_to_L2) {
     zz_stallAndWaitMandatoryQueue;
   }
 
-  transition({IM, IS, OI, MI, II, IT, ST, OT, MT, MMT}, {Load, Ifetch}) {
+  transition({MI_F, MM_F}, {Store}) {
     zz_stallAndWaitMandatoryQueue;
   }
 
-  transition({IM, SM, ISM, OM, IS, SS, MM_W, M_W, OI, MI, II, IT, ST, OT, MT, MMT}, L1_to_L2) {
+  transition({MM_F, MI_F}, {Flush_line}) {
     zz_stallAndWaitMandatoryQueue;
   }
 
-  transition({IT, ST, OT, MT, MMT}, {Other_GETX, NC_DMA_GETS, Other_GETS, Merged_GETS, Other_GETS_No_Mig, Invalidate}) {
-    // stall
+  transition({ST, OT, MT, MMT}, {Other_GETX, NC_DMA_GETS, Other_GETS, Merged_GETS, Other_GETS_No_Mig, Invalidate, Flush_line}) {
+    z_stall;
+  }
+
+  transition({IR, SR, OR, MR, MMR}, {Other_GETX, NC_DMA_GETS, Other_GETS, Merged_GETS, Other_GETS_No_Mig, Invalidate}) {
+    z_stall;
   }
 
   // Transitions moving data between the L1 and L2 caches
-  transition({I, S, O, M, MM}, L1_to_L2) {
+  transition({S, O, M, MM}, L1_to_L2) {
     i_allocateTBE;
     gg_deallocateL1CacheBlock;
     vv_allocateL2CacheBlock;
     hp_copyFromTBEToL2;
     s_deallocateTBE;
-    ka_wakeUpAllDependents;
-  }
-
-  transition(I, Trigger_L2_to_L1D, IT) {
-    i_allocateTBE;
-    rr_deallocateL2CacheBlock;
-    ii_allocateL1DCacheBlock;
-    nb_copyFromTBEToL1; // Not really needed for state I
-    s_deallocateTBE;
-    uu_profileMiss;
-    zz_stallAndWaitMandatoryQueue;
-    ll_L2toL1Transfer;
   }
 
   transition(S, Trigger_L2_to_L1D, ST) {
@@ -1072,7 +1361,6 @@ machine(L1Cache, "AMD Hammer-like protocol")
     ii_allocateL1DCacheBlock;
     nb_copyFromTBEToL1;
     s_deallocateTBE;
-    uu_profileMiss;
     zz_stallAndWaitMandatoryQueue;
     ll_L2toL1Transfer;
   }
@@ -1083,7 +1371,6 @@ machine(L1Cache, "AMD Hammer-like protocol")
     ii_allocateL1DCacheBlock;
     nb_copyFromTBEToL1;
     s_deallocateTBE;
-    uu_profileMiss;
     zz_stallAndWaitMandatoryQueue;
     ll_L2toL1Transfer;
   }
@@ -1094,7 +1381,6 @@ machine(L1Cache, "AMD Hammer-like protocol")
     ii_allocateL1DCacheBlock;
     nb_copyFromTBEToL1;
     s_deallocateTBE;
-    uu_profileMiss;
     zz_stallAndWaitMandatoryQueue;
     ll_L2toL1Transfer;
   }
@@ -1105,18 +1391,6 @@ machine(L1Cache, "AMD Hammer-like protocol")
     ii_allocateL1DCacheBlock;
     nb_copyFromTBEToL1;
     s_deallocateTBE;
-    uu_profileMiss;
-    zz_stallAndWaitMandatoryQueue;
-    ll_L2toL1Transfer;
-  }
-
-  transition(I, Trigger_L2_to_L1I, IT) {
-    i_allocateTBE;
-    rr_deallocateL2CacheBlock;
-    jj_allocateL1ICacheBlock;
-    nb_copyFromTBEToL1;
-    s_deallocateTBE;
-    uu_profileMiss;
     zz_stallAndWaitMandatoryQueue;
     ll_L2toL1Transfer;
   }
@@ -1127,7 +1401,6 @@ machine(L1Cache, "AMD Hammer-like protocol")
     jj_allocateL1ICacheBlock;
     nb_copyFromTBEToL1;
     s_deallocateTBE;
-    uu_profileMiss;
     zz_stallAndWaitMandatoryQueue;
     ll_L2toL1Transfer;
   }
@@ -1138,7 +1411,6 @@ machine(L1Cache, "AMD Hammer-like protocol")
     jj_allocateL1ICacheBlock;
     nb_copyFromTBEToL1;
     s_deallocateTBE;
-    uu_profileMiss;
     zz_stallAndWaitMandatoryQueue;
     ll_L2toL1Transfer;
   }
@@ -1149,7 +1421,6 @@ machine(L1Cache, "AMD Hammer-like protocol")
     jj_allocateL1ICacheBlock;
     nb_copyFromTBEToL1;
     s_deallocateTBE;
-    uu_profileMiss;
     zz_stallAndWaitMandatoryQueue;
     ll_L2toL1Transfer;
   }
@@ -1160,64 +1431,62 @@ machine(L1Cache, "AMD Hammer-like protocol")
     jj_allocateL1ICacheBlock;
     nb_copyFromTBEToL1;
     s_deallocateTBE;
-    uu_profileMiss;
     zz_stallAndWaitMandatoryQueue;
     ll_L2toL1Transfer;
   }
 
-  transition(IT, Complete_L2_to_L1, I) {
+  transition(ST, Complete_L2_to_L1, SR) {
     j_popTriggerQueue;
     kd_wakeUpDependents;
   }
 
-  transition(ST, Complete_L2_to_L1, S) {
+  transition(OT, Complete_L2_to_L1, OR) {
     j_popTriggerQueue;
     kd_wakeUpDependents;
   }
 
-  transition(OT, Complete_L2_to_L1, O) {
+  transition(MT, Complete_L2_to_L1, MR) {
     j_popTriggerQueue;
     kd_wakeUpDependents;
   }
 
-  transition(MT, Complete_L2_to_L1, M) {
-    j_popTriggerQueue;
-    kd_wakeUpDependents;
-  }
-
-  transition(MMT, Complete_L2_to_L1, MM) {
+  transition(MMT, Complete_L2_to_L1, MMR) {
     j_popTriggerQueue;
     kd_wakeUpDependents;
   }
 
   // Transitions from Idle
-  transition(I, Load, IS) {
+  transition({I,IR}, Load, IS) {
     ii_allocateL1DCacheBlock;
     i_allocateTBE;
     a_issueGETS;
-    uu_profileMiss;
+    uu_profileL1DataMiss;
+    uu_profileL2Miss;
     k_popMandatoryQueue;
   }
 
-  transition(I, Ifetch, IS) {
+  transition({I,IR}, Ifetch, IS) {
     jj_allocateL1ICacheBlock;
     i_allocateTBE;
     a_issueGETS;
-    uu_profileMiss;
+    uu_profileL1InstMiss;
+    uu_profileL2Miss;
     k_popMandatoryQueue;
   }
 
-  transition(I, Store, IM) {
+  transition({I,IR}, Store, IM) {
     ii_allocateL1DCacheBlock;
     i_allocateTBE;
     b_issueGETX;
-    uu_profileMiss;
+    uu_profileL1DataMiss;
+    uu_profileL2Miss;
     k_popMandatoryQueue;
   }
 
-  transition(I, L2_Replacement) {
-    rr_deallocateL2CacheBlock;
-    ka_wakeUpAllDependents;
+  transition({I, IR}, Flush_line, IM_F) {
+    it_allocateTBE;
+    bf_issueGETF;
+    k_popMandatoryQueue;
   }
 
   transition(I, {Other_GETX, NC_DMA_GETS, Other_GETS, Other_GETS_No_Mig, Invalidate}) {
@@ -1226,25 +1495,60 @@ machine(L1Cache, "AMD Hammer-like protocol")
   }
 
   // Transitions from Shared
-  transition({S, SM, ISM}, {Load, Ifetch}) {
+  transition({S, SM, ISM}, Load) {
+    h_load_hit;
+    uu_profileL1DataHit;
+    k_popMandatoryQueue;
+  }
+
+  transition({S, SM, ISM}, Ifetch) {
+    h_ifetch_hit;
+    uu_profileL1InstHit;
+    k_popMandatoryQueue;
+  }
+
+  transition(SR, Load, S) {
     h_load_hit;
+    uu_profileL1DataMiss;
+    uu_profileL2Hit;
     k_popMandatoryQueue;
+    ka_wakeUpAllDependents;
   }
 
-  transition(S, Store, SM) {
+  transition(SR, Ifetch, S) {
+    h_ifetch_hit;
+    uu_profileL1InstMiss;
+    uu_profileL2Hit;
+    k_popMandatoryQueue;
+    ka_wakeUpAllDependents;
+  }
+
+  transition({S,SR}, Store, SM) {
     i_allocateTBE;
     b_issueGETX;
-    uu_profileMiss;
+    uu_profileL1DataMiss;
+    uu_profileL2Miss;
+    k_popMandatoryQueue;
+  }
+
+  transition({S, SR}, Flush_line, SM_F) {
+    i_allocateTBE;
+    bf_issueGETF;
+    forward_eviction_to_cpu;
+    gg_deallocateL1CacheBlock;
     k_popMandatoryQueue;
   }
 
   transition(S, L2_Replacement, I) {
+    forward_eviction_to_cpu;
     rr_deallocateL2CacheBlock;
     ka_wakeUpAllDependents;
   }
 
   transition(S, {Other_GETX, Invalidate}, I) {
     f_sendAck;
+    forward_eviction_to_cpu;
+    gr_deallocateCacheBlock;
     l_popForwardQueue;
   }
 
@@ -1254,28 +1558,64 @@ machine(L1Cache, "AMD Hammer-like protocol")
   }
 
   // Transitions from Owned
-  transition({O, OM, SS, MM_W, M_W}, {Load, Ifetch}) {
+  transition({O, OM, SS, MM_W, M_W}, {Load}) {
+    h_load_hit;
+    uu_profileL1DataHit;
+    k_popMandatoryQueue;
+  }
+
+  transition({O, OM, SS, MM_W, M_W}, {Ifetch}) {
+    h_ifetch_hit;
+    uu_profileL1InstHit;
+    k_popMandatoryQueue;
+  }
+
+  transition(OR, Load, O) {
     h_load_hit;
+    uu_profileL1DataMiss;
+    uu_profileL2Hit;
+    k_popMandatoryQueue;
+    ka_wakeUpAllDependents;
+  }
+
+  transition(OR, Ifetch, O) {
+    h_ifetch_hit;
+    uu_profileL1InstMiss;
+    uu_profileL2Hit;
     k_popMandatoryQueue;
+    ka_wakeUpAllDependents;
   }
 
-  transition(O, Store, OM) {
+  transition({O,OR}, Store, OM) {
     i_allocateTBE;
     b_issueGETX;
     p_decrementNumberOfMessagesByOne;
-    uu_profileMiss;
+    uu_profileL1DataMiss;
+    uu_profileL2Miss;
+    k_popMandatoryQueue;
+  }
+
+  transition({O, OR}, Flush_line, OM_F) {
+    i_allocateTBE;
+    bf_issueGETF;
+    p_decrementNumberOfMessagesByOne;
+    forward_eviction_to_cpu;
+    gg_deallocateL1CacheBlock;
     k_popMandatoryQueue;
   }
 
   transition(O, L2_Replacement, OI) {
     i_allocateTBE;
     d_issuePUT;
+    forward_eviction_to_cpu;
     rr_deallocateL2CacheBlock;
     ka_wakeUpAllDependents;
   }
 
   transition(O, {Other_GETX, Invalidate}, I) {
     e_sendData;
+    forward_eviction_to_cpu;
+    gr_deallocateCacheBlock;
     l_popForwardQueue;
   }
 
@@ -1290,68 +1630,143 @@ machine(L1Cache, "AMD Hammer-like protocol")
   }
 
   // Transitions from Modified
-  transition(MM, {Load, Ifetch}) {
+  transition({MM, M}, {Ifetch}) {
+    h_ifetch_hit;
+    uu_profileL1InstHit;
+    k_popMandatoryQueue;
+  }
+
+  transition({MM, M}, {Load}) {
     h_load_hit;
+    uu_profileL1DataHit;
     k_popMandatoryQueue;
   }
 
   transition(MM, Store) {
     hh_store_hit;
+    uu_profileL1DataHit;
+    k_popMandatoryQueue;
+  }
+
+  transition(MMR, Load, MM) {
+    h_load_hit;
+    uu_profileL1DataMiss;
+    uu_profileL2Hit;
+    k_popMandatoryQueue;
+    ka_wakeUpAllDependents;
+  }
+
+  transition(MMR, Ifetch, MM) {
+    h_ifetch_hit;
+    uu_profileL1InstMiss;
+    uu_profileL2Hit;
+    k_popMandatoryQueue;
+    ka_wakeUpAllDependents;
+  }
+
+  transition(MMR, Store, MM) {
+    hh_store_hit;
+    uu_profileL1DataMiss;
+    uu_profileL2Hit;
+    k_popMandatoryQueue;
+    ka_wakeUpAllDependents;
+  }
+
+  transition({MM, M, MMR, MR}, Flush_line, MM_F) {
+    i_allocateTBE;
+    bf_issueGETF;
+    p_decrementNumberOfMessagesByOne;
+    forward_eviction_to_cpu;
+    gg_deallocateL1CacheBlock;
     k_popMandatoryQueue;
   }
 
+  transition(MM_F, Block_Ack, MI_F) {
+    df_issuePUTF;
+    l_popForwardQueue;
+    kd_wakeUpDependents;
+  }
+
   transition(MM, L2_Replacement, MI) {
     i_allocateTBE;
     d_issuePUT;
+    forward_eviction_to_cpu;
     rr_deallocateL2CacheBlock;
     ka_wakeUpAllDependents;
   }
 
   transition(MM, {Other_GETX, Invalidate}, I) {
     c_sendExclusiveData;
+    forward_eviction_to_cpu;
+    gr_deallocateCacheBlock;
     l_popForwardQueue;
   }
 
   transition(MM, Other_GETS, I) {
     c_sendExclusiveData;
+    forward_eviction_to_cpu;
+    gr_deallocateCacheBlock;
     l_popForwardQueue;
   }
-  
-  transition(MM, NC_DMA_GETS) {
-    c_sendExclusiveData;
+
+  transition(MM, NC_DMA_GETS, O) {
+    ee_sendDataShared;
     l_popForwardQueue;
   }
-  
+
   transition(MM, Other_GETS_No_Mig, O) {
     ee_sendDataShared;
     l_popForwardQueue;
   }
-  
+
   transition(MM, Merged_GETS, O) {
     em_sendDataSharedMultiple;
     l_popForwardQueue;
   }
- 
+
   // Transitions from Dirty Exclusive
-  transition(M, {Load, Ifetch}) {
+  transition(M, Store, MM) {
+    hh_store_hit;
+    uu_profileL1DataHit;
+    k_popMandatoryQueue;
+  }
+
+  transition(MR, Load, M) {
     h_load_hit;
+    uu_profileL1DataMiss;
+    uu_profileL2Hit;
+    k_popMandatoryQueue;
+    ka_wakeUpAllDependents;
+  }
+
+  transition(MR, Ifetch, M) {
+    h_ifetch_hit;
+    uu_profileL1InstMiss;
+    uu_profileL2Hit;
     k_popMandatoryQueue;
+    ka_wakeUpAllDependents;
   }
 
-  transition(M, Store, MM) {
+  transition(MR, Store, MM) {
     hh_store_hit;
+    uu_profileL1DataMiss;
+    uu_profileL2Hit;
     k_popMandatoryQueue;
+    ka_wakeUpAllDependents;
   }
 
   transition(M, L2_Replacement, MI) {
     i_allocateTBE;
     d_issuePUT;
+    forward_eviction_to_cpu;
     rr_deallocateL2CacheBlock;
     ka_wakeUpAllDependents;
   }
 
   transition(M, {Other_GETX, Invalidate}, I) {
     c_sendExclusiveData;
+    forward_eviction_to_cpu;
+    gr_deallocateCacheBlock;
     l_popForwardQueue;
   }
 
@@ -1360,7 +1775,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
     l_popForwardQueue;
   }
 
-  transition(M, NC_DMA_GETS) {
+  transition(M, NC_DMA_GETS, O) {
     ee_sendDataShared;
     l_popForwardQueue;
   }
@@ -1372,12 +1787,12 @@ machine(L1Cache, "AMD Hammer-like protocol")
 
   // Transitions from IM
 
-  transition(IM, {Other_GETX, NC_DMA_GETS, Other_GETS, Other_GETS_No_Mig, Invalidate}) {
+  transition({IM, IM_F}, {Other_GETX, NC_DMA_GETS, Other_GETS, Other_GETS_No_Mig, Invalidate}) {
     f_sendAck;
     l_popForwardQueue;
   }
 
-  transition(IM, Ack) {
+  transition({IM, IM_F, MM_F}, Ack) {
     m_decrementNumberOfMessages;
     o_checkForCompletion;
     n_popResponseQueue;
@@ -1385,32 +1800,53 @@ machine(L1Cache, "AMD Hammer-like protocol")
 
   transition(IM, Data, ISM) {
     u_writeDataToCache;
-    m_decrementNumberOfMessages; 
+    m_decrementNumberOfMessages;
     o_checkForCompletion;
     n_popResponseQueue;
   }
 
+  transition(IM_F, Data, ISM_F) {
+      uf_writeDataToCacheTBE;
+      m_decrementNumberOfMessages;
+      o_checkForCompletion;
+      n_popResponseQueue;
+  }
+
   transition(IM, Exclusive_Data, MM_W) {
     u_writeDataToCache;
-    m_decrementNumberOfMessages; 
+    m_decrementNumberOfMessages;
     o_checkForCompletion;
     sx_external_store_hit;
     n_popResponseQueue;
     kd_wakeUpDependents;
   }
 
+  transition(IM_F, Exclusive_Data, MM_WF) {
+      uf_writeDataToCacheTBE;
+      m_decrementNumberOfMessages;
+      o_checkForCompletion;
+      n_popResponseQueue;
+  }
+
   // Transitions from SM
-  transition(SM, {NC_DMA_GETS, Other_GETS, Other_GETS_No_Mig}) {
+  transition({SM, SM_F}, {NC_DMA_GETS, Other_GETS, Other_GETS_No_Mig}) {
     ff_sendAckShared;
     l_popForwardQueue;
   }
 
   transition(SM, {Other_GETX, Invalidate}, IM) {
     f_sendAck;
+    forward_eviction_to_cpu;
     l_popForwardQueue;
   }
 
-  transition(SM, Ack) {
+  transition(SM_F, {Other_GETX, Invalidate}, IM_F) {
+    f_sendAck;
+    forward_eviction_to_cpu;
+    l_popForwardQueue;
+  }
+
+  transition({SM, SM_F}, Ack) {
     m_decrementNumberOfMessages;
     o_checkForCompletion;
     n_popResponseQueue;
@@ -1418,13 +1854,20 @@ machine(L1Cache, "AMD Hammer-like protocol")
 
   transition(SM, {Data, Exclusive_Data}, ISM) {
     v_writeDataToCacheVerify;
-    m_decrementNumberOfMessages; 
+    m_decrementNumberOfMessages;
+    o_checkForCompletion;
+    n_popResponseQueue;
+  }
+
+  transition(SM_F, {Data, Exclusive_Data}, ISM_F) {
+    vt_writeDataToTBEVerify;
+    m_decrementNumberOfMessages;
     o_checkForCompletion;
     n_popResponseQueue;
   }
 
   // Transitions from ISM
-  transition(ISM, Ack) {
+  transition({ISM, ISM_F}, Ack) {
     m_decrementNumberOfMessages;
     o_checkForCompletion;
     n_popResponseQueue;
@@ -1438,11 +1881,25 @@ machine(L1Cache, "AMD Hammer-like protocol")
     kd_wakeUpDependents;
   }
 
+  transition(ISM_F, All_acks_no_sharers, MI_F) {
+    df_issuePUTF;
+    j_popTriggerQueue;
+    kd_wakeUpDependents;
+  }
+
   // Transitions from OM
 
   transition(OM, {Other_GETX, Invalidate}, IM) {
     e_sendData;
     pp_incrementNumberOfMessagesByOne;
+    forward_eviction_to_cpu;
+    l_popForwardQueue;
+  }
+
+  transition(OM_F, {Other_GETX, Invalidate}, IM_F) {
+    q_sendDataFromTBEToCache;
+    pp_incrementNumberOfMessagesByOne;
+    forward_eviction_to_cpu;
     l_popForwardQueue;
   }
 
@@ -1456,7 +1913,17 @@ machine(L1Cache, "AMD Hammer-like protocol")
     l_popForwardQueue;
   }
 
-  transition(OM, Ack) {
+  transition(OM_F, {NC_DMA_GETS, Other_GETS, Other_GETS_No_Mig}) {
+    et_sendDataSharedFromTBE;
+    l_popForwardQueue;
+  }
+
+  transition(OM_F, Merged_GETS) {
+    emt_sendDataSharedMultipleFromTBE;
+    l_popForwardQueue;
+  }
+
+  transition({OM, OM_F}, Ack) {
     m_decrementNumberOfMessages;
     o_checkForCompletion;
     n_popResponseQueue;
@@ -1470,6 +1937,11 @@ machine(L1Cache, "AMD Hammer-like protocol")
     kd_wakeUpDependents;
   }
 
+  transition({MM_F, OM_F}, {All_acks, All_acks_no_sharers}, MI_F) {
+    df_issuePUTF;
+    j_popTriggerQueue;
+    kd_wakeUpDependents;
+  }
   // Transitions from IS
 
   transition(IS, {Other_GETX, NC_DMA_GETS, Other_GETS, Other_GETS_No_Mig, Invalidate}) {
@@ -1477,13 +1949,13 @@ machine(L1Cache, "AMD Hammer-like protocol")
     l_popForwardQueue;
   }
 
-  transition(IS, Ack) {  
+  transition(IS, Ack) {
     m_decrementNumberOfMessages;
     o_checkForCompletion;
     n_popResponseQueue;
   }
 
-  transition(IS, Shared_Ack) {  
+  transition(IS, Shared_Ack) {
     m_decrementNumberOfMessages;
     r_setSharerBit;
     o_checkForCompletion;
@@ -1522,13 +1994,13 @@ machine(L1Cache, "AMD Hammer-like protocol")
 
   // Transitions from SS
 
-  transition(SS, Ack) {  
+  transition(SS, Ack) {
     m_decrementNumberOfMessages;
     o_checkForCompletion;
     n_popResponseQueue;
   }
 
-  transition(SS, Shared_Ack) {  
+  transition(SS, Shared_Ack) {
     m_decrementNumberOfMessages;
     r_setSharerBit;
     o_checkForCompletion;
@@ -1554,10 +2026,11 @@ machine(L1Cache, "AMD Hammer-like protocol")
 
   transition(MM_W, Store) {
     hh_store_hit;
+    uu_profileL1DataHit;
     k_popMandatoryQueue;
   }
 
-  transition(MM_W, Ack) {  
+  transition({MM_W, MM_WF}, Ack) {
     m_decrementNumberOfMessages;
     o_checkForCompletion;
     n_popResponseQueue;
@@ -1570,14 +2043,20 @@ machine(L1Cache, "AMD Hammer-like protocol")
     kd_wakeUpDependents;
   }
 
+  transition(MM_WF, All_acks_no_sharers, MI_F) {
+    df_issuePUTF;
+    j_popTriggerQueue;
+    kd_wakeUpDependents;
+  }
   // Transitions from M_W
 
   transition(M_W, Store, MM_W) {
     hh_store_hit;
+    uu_profileL1DataHit;
     k_popMandatoryQueue;
   }
 
-  transition(M_W, Ack) {  
+  transition(M_W, Ack) {
     m_decrementNumberOfMessages;
     o_checkForCompletion;
     n_popResponseQueue;
@@ -1598,7 +2077,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
   }
 
   transition({OI, MI}, {NC_DMA_GETS, Other_GETS, Other_GETS_No_Mig}, OI) {
-    q_sendDataFromTBEToCache;
+    sq_sendSharedDataFromTBEToCache;
     l_popForwardQueue;
   }
 
@@ -1614,6 +2093,14 @@ machine(L1Cache, "AMD Hammer-like protocol")
     kd_wakeUpDependents;
   }
 
+  transition(MI_F, Writeback_Ack, I) {
+      hh_flush_hit;
+      t_sendExclusiveDataFromTBEToMemory;
+      s_deallocateTBE;
+      l_popForwardQueue;
+      kd_wakeUpDependents;
+  }
+
   transition(OI, Writeback_Ack, I) {
     qq_sendDataFromTBEToMemory;
     s_deallocateTBE;
@@ -1639,4 +2126,31 @@ machine(L1Cache, "AMD Hammer-like protocol")
     l_popForwardQueue;
     kd_wakeUpDependents;
   }
+
+  transition(MM_F, {Other_GETX, Invalidate}, IM_F) {
+    ct_sendExclusiveDataFromTBE;
+    pp_incrementNumberOfMessagesByOne;
+    l_popForwardQueue;
+  }
+
+  transition(MM_F, Other_GETS, IM_F) {
+    ct_sendExclusiveDataFromTBE;
+    pp_incrementNumberOfMessagesByOne;
+    l_popForwardQueue;
+  }
+
+  transition(MM_F, NC_DMA_GETS, OM_F) {
+    sq_sendSharedDataFromTBEToCache;
+    l_popForwardQueue;
+  }
+
+  transition(MM_F, Other_GETS_No_Mig, OM_F) {
+    et_sendDataSharedFromTBE;
+    l_popForwardQueue;
+  }
+
+  transition(MM_F, Merged_GETS, OM_F) {
+    emt_sendDataSharedMultipleFromTBE;
+    l_popForwardQueue;
+  }
 }