mem-cache: Fix non-virtual base destructor of Repl Entry
[gem5.git] / src / mem / protocol / MOESI_hammer-cache.sm
index 219096d26e3e9eaf4f32de6a5a31b5ac4fcff508..9cbd277d4fb82f22a0d7f2c79c08b1612aaab943 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
+ * Copyright (c) 1999-2013 Mark D. Hill and David A. Wood
  * Copyright (c) 2009 Advanced Micro Devices, Inc.
  * All rights reserved.
  *
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
- * AMD's contributions to the MOESI hammer protocol do not constitute an 
+ * AMD's contributions to the MOESI hammer protocol do not constitute an
  * endorsement of its similarity to any AMD products.
  *
  * Authors: Milo Martin
  *          Brad Beckmann
  */
 
-machine(L1Cache, "AMD Hammer-like protocol") 
-: Sequencer * sequencer,
-  CacheMemory * L1IcacheMemory,
-  CacheMemory * L1DcacheMemory,
-  CacheMemory * L2cacheMemory,
-  int cache_response_latency = 10,
-  int issue_latency = 2,
-  int l2_cache_hit_latency = 10,
-  bool no_mig_atomic = true,
-  bool send_evictions
+machine(MachineType:L1Cache, "AMD Hammer-like protocol")
+    : Sequencer * sequencer;
+      CacheMemory * L1Icache;
+      CacheMemory * L1Dcache;
+      CacheMemory * L2cache;
+      Cycles cache_response_latency := 10;
+      Cycles issue_latency := 2;
+      Cycles l2_cache_hit_latency := 10;
+      bool no_mig_atomic := "True";
+      bool send_evictions;
+
+      // NETWORK BUFFERS
+      MessageBuffer * requestFromCache, network="To", virtual_network="2",
+            vnet_type="request";
+      MessageBuffer * responseFromCache, network="To", virtual_network="4",
+            vnet_type="response";
+      MessageBuffer * unblockFromCache, network="To", virtual_network="5",
+            vnet_type="unblock";
+
+      MessageBuffer * forwardToCache, network="From", virtual_network="3",
+            vnet_type="forward";
+      MessageBuffer * responseToCache, network="From", virtual_network="4",
+            vnet_type="response";
+
+      MessageBuffer * mandatoryQueue;
+
+      MessageBuffer * triggerQueue;
 {
-
-  // NETWORK BUFFERS
-  MessageBuffer requestFromCache, network="To", virtual_network="2", ordered="false", vnet_type="request";
-  MessageBuffer responseFromCache, network="To", virtual_network="4", ordered="false", vnet_type="response";
-  MessageBuffer unblockFromCache, network="To", virtual_network="5", ordered="false", vnet_type="unblock";
-
-  MessageBuffer forwardToCache, network="From", virtual_network="3", ordered="false", vnet_type="forward";
-  MessageBuffer responseToCache, network="From", virtual_network="4", ordered="false", vnet_type="response";
-
-
   // STATES
   state_declaration(State, desc="Cache states", default="L1Cache_State_I") {
     // Base states
@@ -82,7 +89,6 @@ machine(L1Cache, "AMD Hammer-like protocol")
     OI, AccessPermission:Busy, "OI", desc="Issued PutO, waiting for ack";
     MI, AccessPermission:Busy, "MI", desc="Issued PutX, waiting for ack";
     II, AccessPermission:Busy, "II", desc="Issued PutX/O, saw Other_GETS or Other_GETX, waiting for ack";
-    IT, AccessPermission:Busy, "IT", desc="Invalid block transferring to L1";
     ST, AccessPermission:Busy, "ST", desc="S block transferring to L1";
     OT, AccessPermission:Busy, "OT", desc="O block transferring to L1";
     MT, AccessPermission:Busy, "MT", desc="M block transferring to L1";
@@ -136,12 +142,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
     Block_Ack,                   desc="the directory is blocked and ready for the flush";
   }
 
-  // TYPES
-
   // STRUCTURE DEFINITIONS
-
-  MessageBuffer mandatoryQueue, ordered="false";
-
   // CacheEntry
   structure(Entry, desc="...", interface="AbstractCacheEntry") {
     State CacheState,        desc="cache state";
@@ -161,62 +162,95 @@ machine(L1Cache, "AMD Hammer-like protocol")
     bool AppliedSilentAcks, default="false", desc="for full-bit dir, does the pending msg count reflect the silent acks";
     MachineID LastResponder, desc="last machine to send a response for this request";
     MachineID CurOwner,      desc="current owner of the block, used for UnblockS responses";
-    Time InitialRequestTime, default="0", desc="time the initial requests was sent from the L1Cache";
-    Time ForwardRequestTime, default="0", desc="time the dir forwarded the request";
-    Time FirstResponseTime, default="0", desc="the time the first response was received";
+
+    Cycles InitialRequestTime, default="Cycles(0)",
+            desc="time the initial requests was sent from the L1Cache";
+    Cycles ForwardRequestTime, default="Cycles(0)",
+            desc="time the dir forwarded the request";
+    Cycles FirstResponseTime, default="Cycles(0)",
+            desc="the time the first response was received";
   }
 
   structure(TBETable, external="yes") {
-    TBE lookup(Address);
-    void allocate(Address);
-    void deallocate(Address);
-    bool isPresent(Address);
+    TBE lookup(Addr);
+    void allocate(Addr);
+    void deallocate(Addr);
+    bool isPresent(Addr);
   }
 
-  TBETable TBEs, template_hack="<L1Cache_TBE>";
+  TBETable TBEs, template="<L1Cache_TBE>", constructor="m_number_of_TBEs";
 
+  Tick clockEdge();
   void set_cache_entry(AbstractCacheEntry b);
   void unset_cache_entry();
   void set_tbe(TBE b);
   void unset_tbe();
   void wakeUpAllBuffers();
-  void wakeUpBuffers(Address a);
+  void wakeUpBuffers(Addr a);
+  Cycles curCycle();
+  MachineID mapAddressToMachine(Addr addr, MachineType mtype);
 
-  Entry getCacheEntry(Address address), return_by_pointer="yes" {
-    Entry L2cache_entry := static_cast(Entry, "pointer", L2cacheMemory.lookup(address));
+  Entry getCacheEntry(Addr address), return_by_pointer="yes" {
+    Entry L2cache_entry := static_cast(Entry, "pointer", L2cache.lookup(address));
     if(is_valid(L2cache_entry)) {
       return L2cache_entry;
     }
 
-    Entry L1Dcache_entry := static_cast(Entry, "pointer", L1DcacheMemory.lookup(address));
+    Entry L1Dcache_entry := static_cast(Entry, "pointer", L1Dcache.lookup(address));
     if(is_valid(L1Dcache_entry)) {
       return L1Dcache_entry;
     }
 
-    Entry L1Icache_entry := static_cast(Entry, "pointer", L1IcacheMemory.lookup(address));
+    Entry L1Icache_entry := static_cast(Entry, "pointer", L1Icache.lookup(address));
     return L1Icache_entry;
   }
 
-  DataBlock getDataBlock(Address addr), return_by_ref="yes" {
-    return getCacheEntry(addr).DataBlk;
+  void functionalRead(Addr addr, Packet *pkt) {
+    Entry cache_entry := getCacheEntry(addr);
+    if(is_valid(cache_entry)) {
+      testAndRead(addr, cache_entry.DataBlk, pkt);
+    } else {
+      TBE tbe := TBEs[addr];
+      if(is_valid(tbe)) {
+        testAndRead(addr, tbe.DataBlk, pkt);
+      } else {
+        error("Missing data block");
+      }
+    }
+  }
+
+  int functionalWrite(Addr addr, Packet *pkt) {
+    int num_functional_writes := 0;
+
+    Entry cache_entry := getCacheEntry(addr);
+    if(is_valid(cache_entry)) {
+      num_functional_writes := num_functional_writes +
+        testAndWrite(addr, cache_entry.DataBlk, pkt);
+      return num_functional_writes;
+    }
+
+    TBE tbe := TBEs[addr];
+    num_functional_writes := num_functional_writes +
+      testAndWrite(addr, tbe.DataBlk, pkt);
+    return num_functional_writes;
   }
 
-  Entry getL2CacheEntry(Address address), return_by_pointer="yes" {
-    Entry L2cache_entry := static_cast(Entry, "pointer", L2cacheMemory.lookup(address));
+  Entry getL2CacheEntry(Addr address), return_by_pointer="yes" {
+    Entry L2cache_entry := static_cast(Entry, "pointer", L2cache.lookup(address));
     return L2cache_entry;
   }
 
-  Entry getL1DCacheEntry(Address address), return_by_pointer="yes" {
-    Entry L1Dcache_entry := static_cast(Entry, "pointer", L1DcacheMemory.lookup(address));
+  Entry getL1DCacheEntry(Addr address), return_by_pointer="yes" {
+    Entry L1Dcache_entry := static_cast(Entry, "pointer", L1Dcache.lookup(address));
     return L1Dcache_entry;
   }
 
-  Entry getL1ICacheEntry(Address address), return_by_pointer="yes" {
-    Entry L1Icache_entry := static_cast(Entry, "pointer", L1IcacheMemory.lookup(address));
+  Entry getL1ICacheEntry(Addr address), return_by_pointer="yes" {
+    Entry L1Icache_entry := static_cast(Entry, "pointer", L1Icache.lookup(address));
     return L1Icache_entry;
   }
 
-  State getState(TBE tbe, Entry cache_entry, Address addr) {
+  State getState(TBE tbe, Entry cache_entry, Addr addr) {
     if(is_valid(tbe)) {
       return tbe.TBEState;
     } else if (is_valid(cache_entry)) {
@@ -225,10 +259,10 @@ machine(L1Cache, "AMD Hammer-like protocol")
     return State:I;
   }
 
-  void setState(TBE tbe, Entry cache_entry, Address addr, State state) {
-    assert((L1DcacheMemory.isTagPresent(addr) && L1IcacheMemory.isTagPresent(addr)) == false);
-    assert((L1IcacheMemory.isTagPresent(addr) && L2cacheMemory.isTagPresent(addr)) == false);
-    assert((L1DcacheMemory.isTagPresent(addr) && L2cacheMemory.isTagPresent(addr)) == false);
+  void setState(TBE tbe, Entry cache_entry, Addr addr, State state) {
+    assert((L1Dcache.isTagPresent(addr) && L1Icache.isTagPresent(addr)) == false);
+    assert((L1Icache.isTagPresent(addr) && L2cache.isTagPresent(addr)) == false);
+    assert((L1Dcache.isTagPresent(addr) && L2cache.isTagPresent(addr)) == false);
 
     if (is_valid(tbe)) {
       tbe.TBEState := state;
@@ -239,7 +273,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
     }
   }
 
-  AccessPermission getAccessPermission(Address addr) {
+  AccessPermission getAccessPermission(Addr addr) {
     TBE tbe := TBEs[addr];
     if(is_valid(tbe)) {
       return L1Cache_State_to_permission(tbe.TBEState);
@@ -253,7 +287,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
     return AccessPermission:NotPresent;
   }
 
-  void setAccessPermission(Entry cache_entry, Address addr, State state) {
+  void setAccessPermission(Entry cache_entry, Addr addr, State state) {
     if (is_valid(cache_entry)) {
       cache_entry.changePermission(L1Cache_State_to_permission(state));
     }
@@ -273,24 +307,12 @@ machine(L1Cache, "AMD Hammer-like protocol")
     }
   }
 
-  GenericMachineType getNondirectHitMachType(Address addr, MachineID sender) {
-    if (machineIDToMachineType(sender) == MachineType:L1Cache) {
-      //
-      // NOTE direct local hits should not call this
-      //
-      return GenericMachineType:L1Cache_wCC; 
-    } else {
-      return ConvertMachToGenericMach(machineIDToMachineType(sender));
-    }
-  }
-
-  GenericMachineType testAndClearLocalHit(Entry cache_entry) {
+  MachineType testAndClearLocalHit(Entry cache_entry) {
     if (is_valid(cache_entry) && cache_entry.FromL2) {
       cache_entry.FromL2 := false;
-      return GenericMachineType:L2Cache;
-    } else {
-      return GenericMachineType:L1Cache; 
+      return MachineType:L2Cache;
     }
+    return MachineType:L1Cache;
   }
 
   bool IsAtomicAccessed(Entry cache_entry) {
@@ -298,10 +320,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
     return cache_entry.AtomicAccessed;
   }
 
-  MessageBuffer triggerQueue, ordered="false";
-
   // ** OUT_PORTS **
-
   out_port(requestNetwork_out, RequestMsg, requestFromCache);
   out_port(responseNetwork_out, ResponseMsg, responseFromCache);
   out_port(unblockNetwork_out, ResponseMsg, unblockFromCache);
@@ -311,18 +330,18 @@ machine(L1Cache, "AMD Hammer-like protocol")
 
   // Trigger Queue
   in_port(triggerQueue_in, TriggerMsg, triggerQueue, rank=3) {
-    if (triggerQueue_in.isReady()) {
+    if (triggerQueue_in.isReady(clockEdge())) {
       peek(triggerQueue_in, TriggerMsg) {
 
-        Entry cache_entry := getCacheEntry(in_msg.Address);
-        TBE tbe := TBEs[in_msg.Address];
+        Entry cache_entry := getCacheEntry(in_msg.addr);
+        TBE tbe := TBEs[in_msg.addr];
 
         if (in_msg.Type == TriggerType:L2_to_L1) {
-          trigger(Event:Complete_L2_to_L1, in_msg.Address, cache_entry, tbe);
+          trigger(Event:Complete_L2_to_L1, in_msg.addr, cache_entry, tbe);
         } else if (in_msg.Type == TriggerType:ALL_ACKS) {
-          trigger(Event:All_acks, in_msg.Address, cache_entry, tbe);
+          trigger(Event:All_acks, in_msg.addr, cache_entry, tbe);
         } else if (in_msg.Type == TriggerType:ALL_ACKS_NO_SHARERS) {
-          trigger(Event:All_acks_no_sharers, in_msg.Address, cache_entry, tbe);
+          trigger(Event:All_acks_no_sharers, in_msg.addr, cache_entry, tbe);
         } else {
           error("Unexpected message");
         }
@@ -334,22 +353,22 @@ machine(L1Cache, "AMD Hammer-like protocol")
 
   // Response Network
   in_port(responseToCache_in, ResponseMsg, responseToCache, rank=2) {
-    if (responseToCache_in.isReady()) {
-      peek(responseToCache_in, ResponseMsg, block_on="Address") {
+    if (responseToCache_in.isReady(clockEdge())) {
+      peek(responseToCache_in, ResponseMsg, block_on="addr") {
 
-        Entry cache_entry := getCacheEntry(in_msg.Address);
-        TBE tbe := TBEs[in_msg.Address];
+        Entry cache_entry := getCacheEntry(in_msg.addr);
+        TBE tbe := TBEs[in_msg.addr];
 
         if (in_msg.Type == CoherenceResponseType:ACK) {
-          trigger(Event:Ack, in_msg.Address, cache_entry, tbe);
+          trigger(Event:Ack, in_msg.addr, cache_entry, tbe);
         } else if (in_msg.Type == CoherenceResponseType:ACK_SHARED) {
-          trigger(Event:Shared_Ack, in_msg.Address, cache_entry, tbe);
+          trigger(Event:Shared_Ack, in_msg.addr, cache_entry, tbe);
         } else if (in_msg.Type == CoherenceResponseType:DATA) {
-          trigger(Event:Data, in_msg.Address, cache_entry, tbe);
+          trigger(Event:Data, in_msg.addr, cache_entry, tbe);
         } else if (in_msg.Type == CoherenceResponseType:DATA_SHARED) {
-          trigger(Event:Shared_Data, in_msg.Address, cache_entry, tbe);
+          trigger(Event:Shared_Data, in_msg.addr, cache_entry, tbe);
         } else if (in_msg.Type == CoherenceResponseType:DATA_EXCLUSIVE) {
-          trigger(Event:Exclusive_Data, in_msg.Address, cache_entry, tbe);
+          trigger(Event:Exclusive_Data, in_msg.addr, cache_entry, tbe);
         } else {
           error("Unexpected message");
         }
@@ -359,38 +378,39 @@ machine(L1Cache, "AMD Hammer-like protocol")
 
   // Forward Network
   in_port(forwardToCache_in, RequestMsg, forwardToCache, rank=1) {
-    if (forwardToCache_in.isReady()) {
-      peek(forwardToCache_in, RequestMsg, block_on="Address") {
+    if (forwardToCache_in.isReady(clockEdge())) {
+      peek(forwardToCache_in, RequestMsg, block_on="addr") {
 
-        Entry cache_entry := getCacheEntry(in_msg.Address);
-        TBE tbe := TBEs[in_msg.Address];
+        Entry cache_entry := getCacheEntry(in_msg.addr);
+        TBE tbe := TBEs[in_msg.addr];
 
-        if ((in_msg.Type == CoherenceRequestType:GETX) || (in_msg.Type == CoherenceRequestType:GETF)) {
-          trigger(Event:Other_GETX, in_msg.Address, cache_entry, tbe);
+        if ((in_msg.Type == CoherenceRequestType:GETX) ||
+            (in_msg.Type == CoherenceRequestType:GETF)) {
+          trigger(Event:Other_GETX, in_msg.addr, cache_entry, tbe);
         } else if (in_msg.Type == CoherenceRequestType:MERGED_GETS) {
-          trigger(Event:Merged_GETS, in_msg.Address, cache_entry, tbe);
+          trigger(Event:Merged_GETS, in_msg.addr, cache_entry, tbe);
         } else if (in_msg.Type == CoherenceRequestType:GETS) {
           if (machineCount(MachineType:L1Cache) > 1) {
             if (is_valid(cache_entry)) {
               if (IsAtomicAccessed(cache_entry) && no_mig_atomic) {
-                trigger(Event:Other_GETS_No_Mig, in_msg.Address, cache_entry, tbe);
+                trigger(Event:Other_GETS_No_Mig, in_msg.addr, cache_entry, tbe);
               } else {
-                trigger(Event:Other_GETS, in_msg.Address, cache_entry, tbe);
+                trigger(Event:Other_GETS, in_msg.addr, cache_entry, tbe);
               }
             } else {
-              trigger(Event:Other_GETS, in_msg.Address, cache_entry, tbe);
+              trigger(Event:Other_GETS, in_msg.addr, cache_entry, tbe);
             }
           } else {
-            trigger(Event:NC_DMA_GETS, in_msg.Address, cache_entry, tbe);
+            trigger(Event:NC_DMA_GETS, in_msg.addr, cache_entry, tbe);
           }
         } else if (in_msg.Type == CoherenceRequestType:INV) {
-          trigger(Event:Invalidate, in_msg.Address, cache_entry, tbe);
+          trigger(Event:Invalidate, in_msg.addr, cache_entry, tbe);
         } else if (in_msg.Type == CoherenceRequestType:WB_ACK) {
-          trigger(Event:Writeback_Ack, in_msg.Address, cache_entry, tbe);
+          trigger(Event:Writeback_Ack, in_msg.addr, cache_entry, tbe);
         } else if (in_msg.Type == CoherenceRequestType:WB_NACK) {
-          trigger(Event:Writeback_Nack, in_msg.Address, cache_entry, tbe);
+          trigger(Event:Writeback_Nack, in_msg.addr, cache_entry, tbe);
         } else if (in_msg.Type == CoherenceRequestType:BLOCK_ACK) {
-          trigger(Event:Block_Ack, in_msg.Address, cache_entry, tbe);
+          trigger(Event:Block_Ack, in_msg.addr, cache_entry, tbe);
         } else {
           error("Unexpected message");
         }
@@ -402,7 +422,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
 
   // Mandatory Queue
   in_port(mandatoryQueue_in, RubyRequest, mandatoryQueue, desc="...", rank=0) {
-    if (mandatoryQueue_in.isReady()) {
+    if (mandatoryQueue_in.isReady(clockEdge())) {
       peek(mandatoryQueue_in, RubyRequest, block_on="LineAddress") {
 
         // Check for data access to blocks in I-cache and ifetchs to blocks in D-cache
@@ -413,7 +433,8 @@ machine(L1Cache, "AMD Hammer-like protocol")
 
           Entry L1Icache_entry := getL1ICacheEntry(in_msg.LineAddress);
           if (is_valid(L1Icache_entry)) {
-            // The tag matches for the L1, so the L1 fetches the line.  We know it can't be in the L2 due to exclusion
+            // The tag matches for the L1, so the L1 fetches the line.
+            // We know it can't be in the L2 due to exclusion
             trigger(mandatory_request_type_to_event(in_msg.Type),
                     in_msg.LineAddress, L1Icache_entry, tbe);
           } else {
@@ -421,18 +442,18 @@ machine(L1Cache, "AMD Hammer-like protocol")
             Entry L1Dcache_entry := getL1DCacheEntry(in_msg.LineAddress);
             if (is_valid(L1Dcache_entry)) {
               // The block is in the wrong L1, try to write it to the L2
-              if (L2cacheMemory.cacheAvail(in_msg.LineAddress)) {
+              if (L2cache.cacheAvail(in_msg.LineAddress)) {
                 trigger(Event:L1_to_L2, in_msg.LineAddress, L1Dcache_entry, tbe);
               } else {
-                Address l2_victim_addr := L2cacheMemory.cacheProbe(in_msg.LineAddress);
+                Addr l2_victim_addr := L2cache.cacheProbe(in_msg.LineAddress);
                 trigger(Event:L2_Replacement,
-                        l2_victim_addr, 
+                        l2_victim_addr,
                         getL2CacheEntry(l2_victim_addr),
                         TBEs[l2_victim_addr]);
               }
             }
 
-            if (L1IcacheMemory.cacheAvail(in_msg.LineAddress)) {
+            if (L1Icache.cacheAvail(in_msg.LineAddress)) {
               // L1 does't have the line, but we have space for it in the L1
 
               Entry L2cache_entry := getL2CacheEntry(in_msg.LineAddress);
@@ -447,15 +468,15 @@ machine(L1Cache, "AMD Hammer-like protocol")
               }
             } else {
               // No room in the L1, so we need to make room
-              Address l1i_victim_addr := L1IcacheMemory.cacheProbe(in_msg.LineAddress);
-              if (L2cacheMemory.cacheAvail(l1i_victim_addr)) {
+              Addr l1i_victim_addr := L1Icache.cacheProbe(in_msg.LineAddress);
+              if (L2cache.cacheAvail(l1i_victim_addr)) {
                 // The L2 has room, so we move the line from the L1 to the L2
                 trigger(Event:L1_to_L2,
                         l1i_victim_addr,
                         getL1ICacheEntry(l1i_victim_addr),
                         TBEs[l1i_victim_addr]);
               } else {
-                Address l2_victim_addr := L2cacheMemory.cacheProbe(l1i_victim_addr);
+                Addr l2_victim_addr := L2cache.cacheProbe(l1i_victim_addr);
                 // The L2 does not have room, so we replace a line from the L2
                 trigger(Event:L2_Replacement,
                         l2_victim_addr,
@@ -469,7 +490,8 @@ machine(L1Cache, "AMD Hammer-like protocol")
 
           Entry L1Dcache_entry := getL1DCacheEntry(in_msg.LineAddress);
           if (is_valid(L1Dcache_entry)) {
-            // The tag matches for the L1, so the L1 fetches the line.  We know it can't be in the L2 due to exclusion
+            // The tag matches for the L1, so the L1 fetches the line.
+            // We know it can't be in the L2 due to exclusion
             trigger(mandatory_request_type_to_event(in_msg.Type),
                     in_msg.LineAddress, L1Dcache_entry, tbe);
           } else {
@@ -478,10 +500,10 @@ machine(L1Cache, "AMD Hammer-like protocol")
             Entry L1Icache_entry := getL1ICacheEntry(in_msg.LineAddress);
             if (is_valid(L1Icache_entry)) {
               // The block is in the wrong L1, try to write it to the L2
-              if (L2cacheMemory.cacheAvail(in_msg.LineAddress)) {
+              if (L2cache.cacheAvail(in_msg.LineAddress)) {
                 trigger(Event:L1_to_L2, in_msg.LineAddress, L1Icache_entry, tbe);
               } else {
-                Address l2_victim_addr := L2cacheMemory.cacheProbe(in_msg.LineAddress);
+                Addr l2_victim_addr := L2cache.cacheProbe(in_msg.LineAddress);
                 trigger(Event:L2_Replacement,
                         l2_victim_addr,
                         getL2CacheEntry(l2_victim_addr),
@@ -489,7 +511,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
               }
             }
 
-            if (L1DcacheMemory.cacheAvail(in_msg.LineAddress)) {
+            if (L1Dcache.cacheAvail(in_msg.LineAddress)) {
               // L1 does't have the line, but we have space for it in the L1
               Entry L2cache_entry := getL2CacheEntry(in_msg.LineAddress);
               if (is_valid(L2cache_entry)) {
@@ -503,15 +525,15 @@ machine(L1Cache, "AMD Hammer-like protocol")
               }
             } else {
               // No room in the L1, so we need to make room
-              Address l1d_victim_addr := L1DcacheMemory.cacheProbe(in_msg.LineAddress);
-              if (L2cacheMemory.cacheAvail(l1d_victim_addr)) {
+              Addr l1d_victim_addr := L1Dcache.cacheProbe(in_msg.LineAddress);
+              if (L2cache.cacheAvail(l1d_victim_addr)) {
                 // The L2 has room, so we move the line from the L1 to the L2
                 trigger(Event:L1_to_L2,
                         l1d_victim_addr,
                         getL1DCacheEntry(l1d_victim_addr),
                         TBEs[l1d_victim_addr]);
               } else {
-                Address l2_victim_addr := L2cacheMemory.cacheProbe(l1d_victim_addr);
+                Addr l2_victim_addr := L2cache.cacheProbe(l1d_victim_addr);
                 // The L2 does not have room, so we replace a line from the L2
                 trigger(Event:L2_Replacement,
                         l2_victim_addr,
@@ -524,68 +546,76 @@ machine(L1Cache, "AMD Hammer-like protocol")
       }
     }
   }
-  
+
   // ACTIONS
 
   action(a_issueGETS, "a", desc="Issue GETS") {
-    enqueue(requestNetwork_out, RequestMsg, latency=issue_latency) {
+    enqueue(requestNetwork_out, RequestMsg, issue_latency) {
       assert(is_valid(tbe));
-      out_msg.Address := address;
+      out_msg.addr := address;
       out_msg.Type := CoherenceRequestType:GETS;
       out_msg.Requestor := machineID;
-      out_msg.Destination.add(map_Address_to_Directory(address));
+      out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory));
       out_msg.MessageSize := MessageSizeType:Request_Control;
-      out_msg.InitialRequestTime := get_time();
-      tbe.NumPendingMsgs := machineCount(MachineType:L1Cache); // One from each other cache (n-1) plus the memory (+1)
+      out_msg.InitialRequestTime := curCycle();
+
+      // One from each other cache (n-1) plus the memory (+1)
+      tbe.NumPendingMsgs := machineCount(MachineType:L1Cache);
     }
   }
 
   action(b_issueGETX, "b", desc="Issue GETX") {
-    enqueue(requestNetwork_out, RequestMsg, latency=issue_latency) {
+    enqueue(requestNetwork_out, RequestMsg, issue_latency) {
       assert(is_valid(tbe));
-      out_msg.Address := address;
+      out_msg.addr := address;
       out_msg.Type := CoherenceRequestType:GETX;
       out_msg.Requestor := machineID;
-      out_msg.Destination.add(map_Address_to_Directory(address));
+      out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory));
       out_msg.MessageSize := MessageSizeType:Request_Control;
-      out_msg.InitialRequestTime := get_time();
-      tbe.NumPendingMsgs := machineCount(MachineType:L1Cache); // One from each other cache (n-1) plus the memory (+1)
+      out_msg.InitialRequestTime := curCycle();
+
+      // One from each other cache (n-1) plus the memory (+1)
+      tbe.NumPendingMsgs := machineCount(MachineType:L1Cache);
     }
   }
 
   action(b_issueGETXIfMoreThanOne, "bo", desc="Issue GETX") {
     if (machineCount(MachineType:L1Cache) > 1) {
-      enqueue(requestNetwork_out, RequestMsg, latency=issue_latency) {
+      enqueue(requestNetwork_out, RequestMsg, issue_latency) {
         assert(is_valid(tbe));
-        out_msg.Address := address;
+        out_msg.addr := address;
         out_msg.Type := CoherenceRequestType:GETX;
         out_msg.Requestor := machineID;
-        out_msg.Destination.add(map_Address_to_Directory(address));
+        out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory));
         out_msg.MessageSize := MessageSizeType:Request_Control;
-        out_msg.InitialRequestTime := get_time();
+        out_msg.InitialRequestTime := curCycle();
       }
     }
-    tbe.NumPendingMsgs := machineCount(MachineType:L1Cache); // One from each other cache (n-1) plus the memory (+1)
+
+    // One from each other cache (n-1) plus the memory (+1)
+    tbe.NumPendingMsgs := machineCount(MachineType:L1Cache);
   }
 
   action(bf_issueGETF, "bf", desc="Issue GETF") {
-    enqueue(requestNetwork_out, RequestMsg, latency=issue_latency) {
+    enqueue(requestNetwork_out, RequestMsg, issue_latency) {
       assert(is_valid(tbe));
-      out_msg.Address := address;
+      out_msg.addr := address;
       out_msg.Type := CoherenceRequestType:GETF;
       out_msg.Requestor := machineID;
-      out_msg.Destination.add(map_Address_to_Directory(address));
+      out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory));
       out_msg.MessageSize := MessageSizeType:Request_Control;
-      out_msg.InitialRequestTime := get_time();
-      tbe.NumPendingMsgs := machineCount(MachineType:L1Cache); // One from each other cache (n-1) plus the memory (+1)
+      out_msg.InitialRequestTime := curCycle();
+
+      // One from each other cache (n-1) plus the memory (+1)
+      tbe.NumPendingMsgs := machineCount(MachineType:L1Cache);
     }
   }
 
   action(c_sendExclusiveData, "c", desc="Send exclusive data from cache to requestor") {
     peek(forwardToCache_in, RequestMsg) {
-      enqueue(responseNetwork_out, ResponseMsg, latency=cache_response_latency) {
+      enqueue(responseNetwork_out, ResponseMsg, cache_response_latency) {
         assert(is_valid(cache_entry));
-        out_msg.Address := address;
+        out_msg.addr := address;
         out_msg.Type := CoherenceResponseType:DATA_EXCLUSIVE;
         out_msg.Sender := machineID;
         out_msg.Destination.add(in_msg.Requestor);
@@ -606,9 +636,9 @@ machine(L1Cache, "AMD Hammer-like protocol")
 
   action(ct_sendExclusiveDataFromTBE, "ct", desc="Send exclusive data from tbe to requestor") {
     peek(forwardToCache_in, RequestMsg) {
-      enqueue(responseNetwork_out, ResponseMsg, latency=cache_response_latency) {
+      enqueue(responseNetwork_out, ResponseMsg, cache_response_latency) {
         assert(is_valid(tbe));
-        out_msg.Address := address;
+        out_msg.addr := address;
         out_msg.Type := CoherenceResponseType:DATA_EXCLUSIVE;
         out_msg.Sender := machineID;
         out_msg.Destination.add(in_msg.Requestor);
@@ -628,30 +658,30 @@ machine(L1Cache, "AMD Hammer-like protocol")
   }
 
   action(d_issuePUT, "d", desc="Issue PUT") {
-    enqueue(requestNetwork_out, RequestMsg, latency=issue_latency) {
-      out_msg.Address := address;
+    enqueue(requestNetwork_out, RequestMsg, issue_latency) {
+      out_msg.addr := address;
       out_msg.Type := CoherenceRequestType:PUT;
       out_msg.Requestor := machineID;
-      out_msg.Destination.add(map_Address_to_Directory(address));
+      out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory));
       out_msg.MessageSize := MessageSizeType:Writeback_Control;
     }
   }
 
   action(df_issuePUTF, "df", desc="Issue PUTF") {
-    enqueue(requestNetwork_out, RequestMsg, latency=issue_latency) {
-      out_msg.Address := address;
+    enqueue(requestNetwork_out, RequestMsg, issue_latency) {
+      out_msg.addr := address;
       out_msg.Type := CoherenceRequestType:PUTF;
       out_msg.Requestor := machineID;
-      out_msg.Destination.add(map_Address_to_Directory(address));
+      out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory));
       out_msg.MessageSize := MessageSizeType:Writeback_Control;
     }
   }
 
   action(e_sendData, "e", desc="Send data from cache to requestor") {
     peek(forwardToCache_in, RequestMsg) {
-      enqueue(responseNetwork_out, ResponseMsg, latency=cache_response_latency) {
+      enqueue(responseNetwork_out, ResponseMsg, cache_response_latency) {
         assert(is_valid(cache_entry));
-        out_msg.Address := address;
+        out_msg.addr := address;
         out_msg.Type := CoherenceResponseType:DATA;
         out_msg.Sender := machineID;
         out_msg.Destination.add(in_msg.Requestor);
@@ -672,9 +702,9 @@ machine(L1Cache, "AMD Hammer-like protocol")
 
   action(ee_sendDataShared, "\e", desc="Send data from cache to requestor, remaining the owner") {
     peek(forwardToCache_in, RequestMsg) {
-      enqueue(responseNetwork_out, ResponseMsg, latency=cache_response_latency) {
+      enqueue(responseNetwork_out, ResponseMsg, cache_response_latency) {
         assert(is_valid(cache_entry));
-        out_msg.Address := address;
+        out_msg.addr := address;
         out_msg.Type := CoherenceResponseType:DATA_SHARED;
         out_msg.Sender := machineID;
         out_msg.Destination.add(in_msg.Requestor);
@@ -696,9 +726,9 @@ machine(L1Cache, "AMD Hammer-like protocol")
 
   action(et_sendDataSharedFromTBE, "\et", desc="Send data from TBE to requestor, keep a shared copy") {
     peek(forwardToCache_in, RequestMsg) {
-      enqueue(responseNetwork_out, ResponseMsg, latency=cache_response_latency) {
+      enqueue(responseNetwork_out, ResponseMsg, cache_response_latency) {
         assert(is_valid(tbe));
-        out_msg.Address := address;
+        out_msg.addr := address;
         out_msg.Type := CoherenceResponseType:DATA_SHARED;
         out_msg.Sender := machineID;
         out_msg.Destination.add(in_msg.Requestor);
@@ -720,9 +750,9 @@ machine(L1Cache, "AMD Hammer-like protocol")
 
   action(em_sendDataSharedMultiple, "em", desc="Send data from cache to all requestors, still the owner") {
     peek(forwardToCache_in, RequestMsg) {
-      enqueue(responseNetwork_out, ResponseMsg, latency=cache_response_latency) {
+      enqueue(responseNetwork_out, ResponseMsg, cache_response_latency) {
         assert(is_valid(cache_entry));
-        out_msg.Address := address;
+        out_msg.addr := address;
         out_msg.Type := CoherenceResponseType:DATA_SHARED;
         out_msg.Sender := machineID;
         out_msg.Destination := in_msg.MergedRequestors;
@@ -737,12 +767,12 @@ machine(L1Cache, "AMD Hammer-like protocol")
       }
     }
   }
-  
+
   action(emt_sendDataSharedMultipleFromTBE, "emt", desc="Send data from tbe to all requestors") {
     peek(forwardToCache_in, RequestMsg) {
-      enqueue(responseNetwork_out, ResponseMsg, latency=cache_response_latency) {
+      enqueue(responseNetwork_out, ResponseMsg, cache_response_latency) {
         assert(is_valid(tbe));
-        out_msg.Address := address;
+        out_msg.addr := address;
         out_msg.Type := CoherenceResponseType:DATA_SHARED;
         out_msg.Sender := machineID;
         out_msg.Destination := in_msg.MergedRequestors;
@@ -760,8 +790,8 @@ machine(L1Cache, "AMD Hammer-like protocol")
 
   action(f_sendAck, "f", desc="Send ack from cache to requestor") {
     peek(forwardToCache_in, RequestMsg) {
-      enqueue(responseNetwork_out, ResponseMsg, latency=cache_response_latency) {
-        out_msg.Address := address;
+      enqueue(responseNetwork_out, ResponseMsg, cache_response_latency) {
+        out_msg.addr := address;
         out_msg.Type := CoherenceResponseType:ACK;
         out_msg.Sender := machineID;
         out_msg.Destination.add(in_msg.Requestor);
@@ -777,8 +807,8 @@ machine(L1Cache, "AMD Hammer-like protocol")
 
   action(ff_sendAckShared, "\f", desc="Send shared ack from cache to requestor") {
     peek(forwardToCache_in, RequestMsg) {
-      enqueue(responseNetwork_out, ResponseMsg, latency=cache_response_latency) {
-        out_msg.Address := address;
+      enqueue(responseNetwork_out, ResponseMsg, cache_response_latency) {
+        out_msg.addr := address;
         out_msg.Type := CoherenceResponseType:ACK_SHARED;
         out_msg.Sender := machineID;
         out_msg.Destination.add(in_msg.Requestor);
@@ -793,42 +823,51 @@ machine(L1Cache, "AMD Hammer-like protocol")
   }
 
   action(g_sendUnblock, "g", desc="Send unblock to memory") {
-    enqueue(unblockNetwork_out, ResponseMsg, latency=cache_response_latency) {
-      out_msg.Address := address;
+    enqueue(unblockNetwork_out, ResponseMsg, cache_response_latency) {
+      out_msg.addr := address;
       out_msg.Type := CoherenceResponseType:UNBLOCK;
       out_msg.Sender := machineID;
-      out_msg.Destination.add(map_Address_to_Directory(address));
+      out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory));
       out_msg.MessageSize := MessageSizeType:Unblock_Control;
     }
   }
 
   action(gm_sendUnblockM, "gm", desc="Send unblock to memory and indicate M/O/E state") {
-    enqueue(unblockNetwork_out, ResponseMsg, latency=cache_response_latency) {
-      out_msg.Address := address;
+    enqueue(unblockNetwork_out, ResponseMsg, cache_response_latency) {
+      out_msg.addr := address;
       out_msg.Type := CoherenceResponseType:UNBLOCKM;
       out_msg.Sender := machineID;
-      out_msg.Destination.add(map_Address_to_Directory(address));
+      out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory));
       out_msg.MessageSize := MessageSizeType:Unblock_Control;
     }
   }
 
   action(gs_sendUnblockS, "gs", desc="Send unblock to memory and indicate S state") {
-    enqueue(unblockNetwork_out, ResponseMsg, latency=cache_response_latency) {
+    enqueue(unblockNetwork_out, ResponseMsg, cache_response_latency) {
       assert(is_valid(tbe));
-      out_msg.Address := address;
+      out_msg.addr := address;
       out_msg.Type := CoherenceResponseType:UNBLOCKS;
       out_msg.Sender := machineID;
       out_msg.CurOwner := tbe.CurOwner;
-      out_msg.Destination.add(map_Address_to_Directory(address));
+      out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory));
       out_msg.MessageSize := MessageSizeType:Unblock_Control;
     }
   }
 
-  action(h_load_hit, "h", desc="Notify sequencer the load completed.") {
+  action(h_load_hit, "hd", desc="Notify sequencer the load completed.") {
     assert(is_valid(cache_entry));
     DPRINTF(RubySlicc, "%s\n", cache_entry.DataBlk);
-    sequencer.readCallback(address, testAndClearLocalHit(cache_entry),
-                           cache_entry.DataBlk);
+    L1Dcache.setMRU(cache_entry);
+    sequencer.readCallback(address, cache_entry.DataBlk, false,
+                           testAndClearLocalHit(cache_entry));
+  }
+
+  action(h_ifetch_hit, "hi", desc="Notify sequencer the ifetch completed.") {
+    assert(is_valid(cache_entry));
+    DPRINTF(RubySlicc, "%s\n", cache_entry.DataBlk);
+    L1Icache.setMRU(cache_entry);
+    sequencer.readCallback(address, cache_entry.DataBlk, false,
+                           testAndClearLocalHit(cache_entry));
   }
 
   action(hx_external_load_hit, "hx", desc="load required external msgs") {
@@ -836,13 +875,11 @@ machine(L1Cache, "AMD Hammer-like protocol")
     assert(is_valid(tbe));
     DPRINTF(RubySlicc, "%s\n", cache_entry.DataBlk);
     peek(responseToCache_in, ResponseMsg) {
-
-      sequencer.readCallback(address, 
-                             getNondirectHitMachType(in_msg.Address, in_msg.Sender),
-                             cache_entry.DataBlk,
-                             tbe.InitialRequestTime,
-                             tbe.ForwardRequestTime,
-                             tbe.FirstResponseTime);
+      L1Icache.setMRU(address);
+      L1Dcache.setMRU(address);
+      sequencer.readCallback(address, cache_entry.DataBlk, true,
+                 machineIDToMachineType(in_msg.Sender), tbe.InitialRequestTime,
+                 tbe.ForwardRequestTime, tbe.FirstResponseTime);
     }
   }
 
@@ -850,8 +887,9 @@ machine(L1Cache, "AMD Hammer-like protocol")
     assert(is_valid(cache_entry));
     DPRINTF(RubySlicc, "%s\n", cache_entry.DataBlk);
     peek(mandatoryQueue_in, RubyRequest) {
-      sequencer.writeCallback(address, testAndClearLocalHit(cache_entry),
-                              cache_entry.DataBlk);
+      L1Dcache.setMRU(cache_entry);
+      sequencer.writeCallback(address, cache_entry.DataBlk, false,
+                              testAndClearLocalHit(cache_entry));
 
       cache_entry.Dirty := true;
       if (in_msg.Type == RubyRequestType:ATOMIC) {
@@ -863,7 +901,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
   action(hh_flush_hit, "\hf", desc="Notify sequencer that flush completed.") {
     assert(is_valid(tbe));
     DPRINTF(RubySlicc, "%s\n", tbe.DataBlk);
-    sequencer.writeCallback(address, GenericMachineType:L1Cache,tbe.DataBlk);
+    sequencer.writeCallback(address, tbe.DataBlk, false, MachineType:L1Cache);
   }
 
   action(sx_external_store_hit, "sx", desc="store required external msgs.") {
@@ -871,14 +909,13 @@ machine(L1Cache, "AMD Hammer-like protocol")
     assert(is_valid(tbe));
     DPRINTF(RubySlicc, "%s\n", cache_entry.DataBlk);
     peek(responseToCache_in, ResponseMsg) {
-
-      sequencer.writeCallback(address, 
-                              getNondirectHitMachType(address, in_msg.Sender),
-                              cache_entry.DataBlk,
-                              tbe.InitialRequestTime,
-                              tbe.ForwardRequestTime,
-                              tbe.FirstResponseTime);
+      L1Icache.setMRU(address);
+      L1Dcache.setMRU(address);
+      sequencer.writeCallback(address, cache_entry.DataBlk, true,
+              machineIDToMachineType(in_msg.Sender), tbe.InitialRequestTime,
+              tbe.ForwardRequestTime, tbe.FirstResponseTime);
     }
+    DPRINTF(RubySlicc, "%s\n", cache_entry.DataBlk);
     cache_entry.Dirty := true;
   }
 
@@ -886,13 +923,11 @@ machine(L1Cache, "AMD Hammer-like protocol")
     assert(is_valid(cache_entry));
     assert(is_valid(tbe));
     DPRINTF(RubySlicc, "%s\n", cache_entry.DataBlk);
-
-    sequencer.writeCallback(address, 
-                            getNondirectHitMachType(address, tbe.LastResponder),
-                            cache_entry.DataBlk,
-                            tbe.InitialRequestTime,
-                            tbe.ForwardRequestTime,
-                            tbe.FirstResponseTime);
+    L1Icache.setMRU(address);
+    L1Dcache.setMRU(address);
+    sequencer.writeCallback(address, cache_entry.DataBlk, true,
+            machineIDToMachineType(tbe.LastResponder), tbe.InitialRequestTime,
+            tbe.ForwardRequestTime, tbe.FirstResponseTime);
 
     cache_entry.Dirty := true;
   }
@@ -916,15 +951,15 @@ machine(L1Cache, "AMD Hammer-like protocol")
   }
 
   action(j_popTriggerQueue, "j", desc="Pop trigger queue.") {
-    triggerQueue_in.dequeue();
+    triggerQueue_in.dequeue(clockEdge());
   }
 
   action(k_popMandatoryQueue, "k", desc="Pop mandatory queue.") {
-    mandatoryQueue_in.dequeue();
+    mandatoryQueue_in.dequeue(clockEdge());
   }
 
   action(l_popForwardQueue, "l", desc="Pop forwareded request queue.") {
-    forwardToCache_in.dequeue();
+    forwardToCache_in.dequeue(clockEdge());
   }
 
   action(hp_copyFromTBEToL2, "li", desc="Copy data from TBE to L2 cache entry.") {
@@ -971,7 +1006,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
         tbe.ForwardRequestTime := in_msg.ForwardRequestTime;
       }
       if (tbe.FirstResponseTime == zero_time()) {
-        tbe.FirstResponseTime := get_time();
+        tbe.FirstResponseTime := curCycle();
       }
     }
   }
@@ -983,12 +1018,12 @@ machine(L1Cache, "AMD Hammer-like protocol")
   }
 
   action(n_popResponseQueue, "n", desc="Pop response queue") {
-    responseToCache_in.dequeue();
+    responseToCache_in.dequeue(clockEdge());
   }
 
   action(ll_L2toL1Transfer, "ll", desc="") {
-    enqueue(triggerQueue_out, TriggerMsg, latency=l2_cache_hit_latency) {
-      out_msg.Address := address;
+    enqueue(triggerQueue_out, TriggerMsg, l2_cache_hit_latency) {
+      out_msg.addr := address;
       out_msg.Type := TriggerType:L2_to_L1;
     }
   }
@@ -997,7 +1032,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
     assert(is_valid(tbe));
     if (tbe.NumPendingMsgs == 0) {
       enqueue(triggerQueue_out, TriggerMsg) {
-        out_msg.Address := address;
+        out_msg.addr := address;
         if (tbe.Sharers) {
           out_msg.Type := TriggerType:ALL_ACKS;
         } else {
@@ -1020,9 +1055,9 @@ machine(L1Cache, "AMD Hammer-like protocol")
   action(q_sendDataFromTBEToCache, "q", desc="Send data from TBE to cache") {
     peek(forwardToCache_in, RequestMsg) {
         assert(in_msg.Requestor != machineID);
-      enqueue(responseNetwork_out, ResponseMsg, latency=cache_response_latency) {
+      enqueue(responseNetwork_out, ResponseMsg, cache_response_latency) {
         assert(is_valid(tbe));
-        out_msg.Address := address;
+        out_msg.addr := address;
         out_msg.Type := CoherenceResponseType:DATA;
         out_msg.Sender := machineID;
         out_msg.Destination.add(in_msg.Requestor);
@@ -1045,9 +1080,9 @@ machine(L1Cache, "AMD Hammer-like protocol")
   action(sq_sendSharedDataFromTBEToCache, "sq", desc="Send shared data from TBE to cache, still the owner") {
     peek(forwardToCache_in, RequestMsg) {
         assert(in_msg.Requestor != machineID);
-      enqueue(responseNetwork_out, ResponseMsg, latency=cache_response_latency) {
+      enqueue(responseNetwork_out, ResponseMsg, cache_response_latency) {
         assert(is_valid(tbe));
-        out_msg.Address := address;
+        out_msg.addr := address;
         out_msg.Type := CoherenceResponseType:DATA_SHARED;
         out_msg.Sender := machineID;
         out_msg.Destination.add(in_msg.Requestor);
@@ -1069,9 +1104,9 @@ machine(L1Cache, "AMD Hammer-like protocol")
 
   action(qm_sendDataFromTBEToCache, "qm", desc="Send data from TBE to cache, multiple sharers, still the owner") {
     peek(forwardToCache_in, RequestMsg) {
-      enqueue(responseNetwork_out, ResponseMsg, latency=cache_response_latency) {
+      enqueue(responseNetwork_out, ResponseMsg, cache_response_latency) {
         assert(is_valid(tbe));
-        out_msg.Address := address;
+        out_msg.addr := address;
         out_msg.Type := CoherenceResponseType:DATA_SHARED;
         out_msg.Sender := machineID;
         out_msg.Destination := in_msg.MergedRequestors;
@@ -1088,11 +1123,11 @@ machine(L1Cache, "AMD Hammer-like protocol")
   }
 
   action(qq_sendDataFromTBEToMemory, "\q", desc="Send data from TBE to memory") {
-    enqueue(unblockNetwork_out, ResponseMsg, latency=cache_response_latency) {
+    enqueue(unblockNetwork_out, ResponseMsg, cache_response_latency) {
       assert(is_valid(tbe));
-      out_msg.Address := address;
+      out_msg.addr := address;
       out_msg.Sender := machineID;
-      out_msg.Destination.add(map_Address_to_Directory(address));
+      out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory));
       out_msg.Dirty := tbe.Dirty;
       if (tbe.Dirty) {
         out_msg.Type := CoherenceResponseType:WB_DIRTY;
@@ -1102,7 +1137,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
         out_msg.Type := CoherenceResponseType:WB_CLEAN;
         // NOTE: in a real system this would not send data.  We send
         // data here only so we can check it at the memory
-        out_msg.DataBlk := tbe.DataBlk; 
+        out_msg.DataBlk := tbe.DataBlk;
         out_msg.MessageSize := MessageSizeType:Writeback_Control;
       }
     }
@@ -1119,12 +1154,12 @@ machine(L1Cache, "AMD Hammer-like protocol")
   }
 
   action(t_sendExclusiveDataFromTBEToMemory, "t", desc="Send exclusive data from TBE to memory") {
-    enqueue(unblockNetwork_out, ResponseMsg, latency=cache_response_latency) {
+    enqueue(unblockNetwork_out, ResponseMsg, cache_response_latency) {
       assert(is_valid(tbe));
-      out_msg.Address := address;
+      out_msg.addr := address;
       out_msg.Sender := machineID;
-      out_msg.Destination.add(map_Address_to_Directory(address));
-      out_msg.DataBlk := tbe.DataBlk; 
+      out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory));
+      out_msg.DataBlk := tbe.DataBlk;
       out_msg.Dirty := tbe.Dirty;
       if (tbe.Dirty) {
         out_msg.Type := CoherenceResponseType:WB_EXCLUSIVE_DIRTY;
@@ -1177,55 +1212,80 @@ machine(L1Cache, "AMD Hammer-like protocol")
       tbe.Dirty := in_msg.Dirty || tbe.Dirty;
     }
   }
-  
+
   action(gg_deallocateL1CacheBlock, "\g", desc="Deallocate cache block.  Sets the cache to invalid, allowing a replacement in parallel with a fetch.") {
-    if (L1DcacheMemory.isTagPresent(address)) {
-      L1DcacheMemory.deallocate(address);
+    if (L1Dcache.isTagPresent(address)) {
+      L1Dcache.deallocate(address);
     } else {
-      L1IcacheMemory.deallocate(address);
+      L1Icache.deallocate(address);
     }
     unset_cache_entry();
   }
-  
+
   action(ii_allocateL1DCacheBlock, "\i", desc="Set L1 D-cache tag equal to tag of block B.") {
     if (is_invalid(cache_entry)) {
-      set_cache_entry(L1DcacheMemory.allocate(address, new Entry));
+      set_cache_entry(L1Dcache.allocate(address, new Entry));
     }
   }
 
   action(jj_allocateL1ICacheBlock, "\j", desc="Set L1 I-cache tag equal to tag of block B.") {
     if (is_invalid(cache_entry)) {
-      set_cache_entry(L1IcacheMemory.allocate(address, new Entry));
+      set_cache_entry(L1Icache.allocate(address, new Entry));
     }
   }
 
   action(vv_allocateL2CacheBlock, "\v", desc="Set L2 cache tag equal to tag of block B.") {
-    set_cache_entry(L2cacheMemory.allocate(address, new Entry));
+    set_cache_entry(L2cache.allocate(address, new Entry));
   }
 
   action(rr_deallocateL2CacheBlock, "\r", desc="Deallocate L2 cache block.  Sets the cache to not present, allowing a replacement in parallel with a fetch.") {
-    L2cacheMemory.deallocate(address);
+    L2cache.deallocate(address);
+    unset_cache_entry();
+  }
+
+  action(gr_deallocateCacheBlock, "\gr", desc="Deallocate an L1 or L2 cache block.") {
+    if (L1Dcache.isTagPresent(address)) {
+      L1Dcache.deallocate(address);
+    }
+    else if (L1Icache.isTagPresent(address)){
+      L1Icache.deallocate(address);
+    }
+    else {
+      assert(L2cache.isTagPresent(address));
+      L2cache.deallocate(address);
+    }
     unset_cache_entry();
   }
 
   action(forward_eviction_to_cpu, "\cc", desc="sends eviction information to the processor") {
     if (send_evictions) {
-      DPRINTF(RubySlicc, "Sending invalidation for %s to the CPU\n", address);
+      DPRINTF(RubySlicc, "Sending invalidation for %#x to the CPU\n", address);
       sequencer.evictionCallback(address);
     }
   }
 
-  action(uu_profileMiss, "\u", desc="Profile the demand miss") {
-    peek(mandatoryQueue_in, RubyRequest) {
-      if (L1IcacheMemory.isTagPresent(address)) {
-        L1IcacheMemory.profileMiss(in_msg);
-      } else if (L1DcacheMemory.isTagPresent(address)) {
-        L1DcacheMemory.profileMiss(in_msg);
-      }
-      if (L2cacheMemory.isTagPresent(address) == false) {
-        L2cacheMemory.profileMiss(in_msg);
-      }
-    }
+  action(uu_profileL1DataMiss, "\udm", desc="Profile the demand miss") {
+      ++L1Dcache.demand_misses;
+  }
+
+  action(uu_profileL1DataHit, "\udh", desc="Profile the demand hits") {
+      ++L1Dcache.demand_hits;
+  }
+
+  action(uu_profileL1InstMiss, "\uim", desc="Profile the demand miss") {
+      ++L1Icache.demand_misses;
+  }
+
+  action(uu_profileL1InstHit, "\uih", desc="Profile the demand hits") {
+      ++L1Icache.demand_hits;
+  }
+
+  action(uu_profileL2Miss, "\um", desc="Profile the demand miss") {
+      ++L2cache.demand_misses;
+  }
+
+  action(uu_profileL2Hit, "\uh", desc="Profile the demand hits ") {
+      ++L2cache.demand_hits;
   }
 
   action(zz_stallAndWaitMandatoryQueue, "\z", desc="Send the head of the mandatory queue to the back of the queue.") {
@@ -1250,7 +1310,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
   //*****************************************************
 
   // Transitions for Load/Store/L2_Replacement from transient states
-  transition({IM, IM_F, MM_WF, SM, SM_F, ISM, ISM_F, OM, OM_F, IS, SS, OI, MI, II, IT, ST, OT, MT, MMT}, {Store, L2_Replacement}) {
+  transition({IM, IM_F, MM_WF, SM, SM_F, ISM, ISM_F, OM, OM_F, IS, SS, OI, MI, II, ST, OT, MT, MMT}, {Store, L2_Replacement}) {
     zz_stallAndWaitMandatoryQueue;
   }
 
@@ -1262,11 +1322,11 @@ machine(L1Cache, "AMD Hammer-like protocol")
     zz_stallAndWaitMandatoryQueue;
   }
 
-  transition({IM, IS, OI, MI, II, IT, ST, OT, MT, MMT, MI_F, MM_F, OM_F, IM_F, ISM_F, SM_F, MM_WF}, {Load, Ifetch}) {
+  transition({IM, IS, OI, MI, II, ST, OT, MT, MMT, MI_F, MM_F, OM_F, IM_F, ISM_F, SM_F, MM_WF}, {Load, Ifetch}) {
     zz_stallAndWaitMandatoryQueue;
   }
 
-  transition({IM, SM, ISM, OM, IS, SS, MM_W, M_W, OI, MI, II, IT, ST, OT, MT, MMT, IM_F, SM_F, ISM_F, OM_F, MM_WF, MI_F, MM_F, IR, SR, OR, MR, MMR}, L1_to_L2) {
+  transition({IM, SM, ISM, OM, IS, SS, MM_W, M_W, OI, MI, II, ST, OT, MT, MMT, IM_F, SM_F, ISM_F, OM_F, MM_WF, MI_F, MM_F, IR, SR, OR, MR, MMR}, L1_to_L2) {
     zz_stallAndWaitMandatoryQueue;
   }
 
@@ -1278,7 +1338,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
     zz_stallAndWaitMandatoryQueue;
   }
 
-  transition({IT, ST, OT, MT, MMT}, {Other_GETX, NC_DMA_GETS, Other_GETS, Merged_GETS, Other_GETS_No_Mig, Invalidate, Flush_line}) {
+  transition({ST, OT, MT, MMT}, {Other_GETX, NC_DMA_GETS, Other_GETS, Merged_GETS, Other_GETS_No_Mig, Invalidate, Flush_line}) {
     z_stall;
   }
 
@@ -1287,7 +1347,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
   }
 
   // Transitions moving data between the L1 and L2 caches
-  transition({I, S, O, M, MM}, L1_to_L2) {
+  transition({S, O, M, MM}, L1_to_L2) {
     i_allocateTBE;
     gg_deallocateL1CacheBlock;
     vv_allocateL2CacheBlock;
@@ -1295,24 +1355,12 @@ machine(L1Cache, "AMD Hammer-like protocol")
     s_deallocateTBE;
   }
 
-  transition(I, Trigger_L2_to_L1D, IT) {
-    i_allocateTBE;
-    rr_deallocateL2CacheBlock;
-    ii_allocateL1DCacheBlock;
-    nb_copyFromTBEToL1; // Not really needed for state I
-    s_deallocateTBE;
-    uu_profileMiss;
-    zz_stallAndWaitMandatoryQueue;
-    ll_L2toL1Transfer;
-  }
-
   transition(S, Trigger_L2_to_L1D, ST) {
     i_allocateTBE;
     rr_deallocateL2CacheBlock;
     ii_allocateL1DCacheBlock;
     nb_copyFromTBEToL1;
     s_deallocateTBE;
-    uu_profileMiss;
     zz_stallAndWaitMandatoryQueue;
     ll_L2toL1Transfer;
   }
@@ -1323,7 +1371,6 @@ machine(L1Cache, "AMD Hammer-like protocol")
     ii_allocateL1DCacheBlock;
     nb_copyFromTBEToL1;
     s_deallocateTBE;
-    uu_profileMiss;
     zz_stallAndWaitMandatoryQueue;
     ll_L2toL1Transfer;
   }
@@ -1334,7 +1381,6 @@ machine(L1Cache, "AMD Hammer-like protocol")
     ii_allocateL1DCacheBlock;
     nb_copyFromTBEToL1;
     s_deallocateTBE;
-    uu_profileMiss;
     zz_stallAndWaitMandatoryQueue;
     ll_L2toL1Transfer;
   }
@@ -1345,18 +1391,6 @@ machine(L1Cache, "AMD Hammer-like protocol")
     ii_allocateL1DCacheBlock;
     nb_copyFromTBEToL1;
     s_deallocateTBE;
-    uu_profileMiss;
-    zz_stallAndWaitMandatoryQueue;
-    ll_L2toL1Transfer;
-  }
-
-  transition(I, Trigger_L2_to_L1I, IT) {
-    i_allocateTBE;
-    rr_deallocateL2CacheBlock;
-    jj_allocateL1ICacheBlock;
-    nb_copyFromTBEToL1;
-    s_deallocateTBE;
-    uu_profileMiss;
     zz_stallAndWaitMandatoryQueue;
     ll_L2toL1Transfer;
   }
@@ -1367,7 +1401,6 @@ machine(L1Cache, "AMD Hammer-like protocol")
     jj_allocateL1ICacheBlock;
     nb_copyFromTBEToL1;
     s_deallocateTBE;
-    uu_profileMiss;
     zz_stallAndWaitMandatoryQueue;
     ll_L2toL1Transfer;
   }
@@ -1378,7 +1411,6 @@ machine(L1Cache, "AMD Hammer-like protocol")
     jj_allocateL1ICacheBlock;
     nb_copyFromTBEToL1;
     s_deallocateTBE;
-    uu_profileMiss;
     zz_stallAndWaitMandatoryQueue;
     ll_L2toL1Transfer;
   }
@@ -1389,7 +1421,6 @@ machine(L1Cache, "AMD Hammer-like protocol")
     jj_allocateL1ICacheBlock;
     nb_copyFromTBEToL1;
     s_deallocateTBE;
-    uu_profileMiss;
     zz_stallAndWaitMandatoryQueue;
     ll_L2toL1Transfer;
   }
@@ -1400,16 +1431,10 @@ machine(L1Cache, "AMD Hammer-like protocol")
     jj_allocateL1ICacheBlock;
     nb_copyFromTBEToL1;
     s_deallocateTBE;
-    uu_profileMiss;
     zz_stallAndWaitMandatoryQueue;
     ll_L2toL1Transfer;
   }
 
-  transition(IT, Complete_L2_to_L1, IR) {
-    j_popTriggerQueue;
-    kd_wakeUpDependents;
-  }
-
   transition(ST, Complete_L2_to_L1, SR) {
     j_popTriggerQueue;
     kd_wakeUpDependents;
@@ -1431,69 +1456,84 @@ machine(L1Cache, "AMD Hammer-like protocol")
   }
 
   // Transitions from Idle
-  transition({I, IR}, Load, IS) {
+  transition({I,IR}, Load, IS) {
     ii_allocateL1DCacheBlock;
     i_allocateTBE;
     a_issueGETS;
-    uu_profileMiss;
+    uu_profileL1DataMiss;
+    uu_profileL2Miss;
     k_popMandatoryQueue;
   }
 
-  transition({I, IR}, Ifetch, IS) {
+  transition({I,IR}, Ifetch, IS) {
     jj_allocateL1ICacheBlock;
     i_allocateTBE;
     a_issueGETS;
-    uu_profileMiss;
+    uu_profileL1InstMiss;
+    uu_profileL2Miss;
     k_popMandatoryQueue;
   }
 
-  transition({I, IR}, Store, IM) {
+  transition({I,IR}, Store, IM) {
     ii_allocateL1DCacheBlock;
     i_allocateTBE;
     b_issueGETX;
-    uu_profileMiss;
+    uu_profileL1DataMiss;
+    uu_profileL2Miss;
     k_popMandatoryQueue;
   }
 
   transition({I, IR}, Flush_line, IM_F) {
     it_allocateTBE;
     bf_issueGETF;
-    uu_profileMiss;
     k_popMandatoryQueue;
   }
 
-  transition(I, L2_Replacement) {
-    rr_deallocateL2CacheBlock;
-    ka_wakeUpAllDependents;
-  }
-
   transition(I, {Other_GETX, NC_DMA_GETS, Other_GETS, Other_GETS_No_Mig, Invalidate}) {
     f_sendAck;
     l_popForwardQueue;
   }
 
   // Transitions from Shared
-  transition({S, SM, ISM}, {Load, Ifetch}) {
+  transition({S, SM, ISM}, Load) {
     h_load_hit;
+    uu_profileL1DataHit;
+    k_popMandatoryQueue;
+  }
+
+  transition({S, SM, ISM}, Ifetch) {
+    h_ifetch_hit;
+    uu_profileL1InstHit;
     k_popMandatoryQueue;
   }
 
-  transition(SR, {Load, Ifetch}, S) {
+  transition(SR, Load, S) {
     h_load_hit;
+    uu_profileL1DataMiss;
+    uu_profileL2Hit;
+    k_popMandatoryQueue;
+    ka_wakeUpAllDependents;
+  }
+
+  transition(SR, Ifetch, S) {
+    h_ifetch_hit;
+    uu_profileL1InstMiss;
+    uu_profileL2Hit;
     k_popMandatoryQueue;
+    ka_wakeUpAllDependents;
   }
 
-  transition({S, SR}, Store, SM) {
+  transition({S,SR}, Store, SM) {
     i_allocateTBE;
     b_issueGETX;
-    uu_profileMiss;
+    uu_profileL1DataMiss;
+    uu_profileL2Miss;
     k_popMandatoryQueue;
   }
 
   transition({S, SR}, Flush_line, SM_F) {
     i_allocateTBE;
     bf_issueGETF;
-    uu_profileMiss;
     forward_eviction_to_cpu;
     gg_deallocateL1CacheBlock;
     k_popMandatoryQueue;
@@ -1508,6 +1548,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
   transition(S, {Other_GETX, Invalidate}, I) {
     f_sendAck;
     forward_eviction_to_cpu;
+    gr_deallocateCacheBlock;
     l_popForwardQueue;
   }
 
@@ -1517,28 +1558,47 @@ machine(L1Cache, "AMD Hammer-like protocol")
   }
 
   // Transitions from Owned
-  transition({O, OM, SS, MM_W, M_W}, {Load, Ifetch}) {
+  transition({O, OM, SS, MM_W, M_W}, {Load}) {
     h_load_hit;
+    uu_profileL1DataHit;
     k_popMandatoryQueue;
   }
 
-  transition(OR, {Load, Ifetch}, O) {
+  transition({O, OM, SS, MM_W, M_W}, {Ifetch}) {
+    h_ifetch_hit;
+    uu_profileL1InstHit;
+    k_popMandatoryQueue;
+  }
+
+  transition(OR, Load, O) {
     h_load_hit;
+    uu_profileL1DataMiss;
+    uu_profileL2Hit;
     k_popMandatoryQueue;
+    ka_wakeUpAllDependents;
+  }
+
+  transition(OR, Ifetch, O) {
+    h_ifetch_hit;
+    uu_profileL1InstMiss;
+    uu_profileL2Hit;
+    k_popMandatoryQueue;
+    ka_wakeUpAllDependents;
   }
 
-  transition({O, OR}, Store, OM) {
+  transition({O,OR}, Store, OM) {
     i_allocateTBE;
     b_issueGETX;
     p_decrementNumberOfMessagesByOne;
-    uu_profileMiss;
+    uu_profileL1DataMiss;
+    uu_profileL2Miss;
     k_popMandatoryQueue;
   }
+
   transition({O, OR}, Flush_line, OM_F) {
     i_allocateTBE;
     bf_issueGETF;
     p_decrementNumberOfMessagesByOne;
-    uu_profileMiss;
     forward_eviction_to_cpu;
     gg_deallocateL1CacheBlock;
     k_popMandatoryQueue;
@@ -1555,6 +1615,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
   transition(O, {Other_GETX, Invalidate}, I) {
     e_sendData;
     forward_eviction_to_cpu;
+    gr_deallocateCacheBlock;
     l_popForwardQueue;
   }
 
@@ -1569,14 +1630,46 @@ machine(L1Cache, "AMD Hammer-like protocol")
   }
 
   // Transitions from Modified
-  transition({MM, MMR}, {Load, Ifetch}, MM) {
+  transition({MM, M}, {Ifetch}) {
+    h_ifetch_hit;
+    uu_profileL1InstHit;
+    k_popMandatoryQueue;
+  }
+
+  transition({MM, M}, {Load}) {
+    h_load_hit;
+    uu_profileL1DataHit;
+    k_popMandatoryQueue;
+  }
+
+  transition(MM, Store) {
+    hh_store_hit;
+    uu_profileL1DataHit;
+    k_popMandatoryQueue;
+  }
+
+  transition(MMR, Load, MM) {
     h_load_hit;
+    uu_profileL1DataMiss;
+    uu_profileL2Hit;
     k_popMandatoryQueue;
+    ka_wakeUpAllDependents;
   }
 
-  transition({MM, MMR}, Store, MM) {
+  transition(MMR, Ifetch, MM) {
+    h_ifetch_hit;
+    uu_profileL1InstMiss;
+    uu_profileL2Hit;
+    k_popMandatoryQueue;
+    ka_wakeUpAllDependents;
+  }
+
+  transition(MMR, Store, MM) {
     hh_store_hit;
+    uu_profileL1DataMiss;
+    uu_profileL2Hit;
     k_popMandatoryQueue;
+    ka_wakeUpAllDependents;
   }
 
   transition({MM, M, MMR, MR}, Flush_line, MM_F) {
@@ -1605,39 +1698,61 @@ machine(L1Cache, "AMD Hammer-like protocol")
   transition(MM, {Other_GETX, Invalidate}, I) {
     c_sendExclusiveData;
     forward_eviction_to_cpu;
+    gr_deallocateCacheBlock;
     l_popForwardQueue;
   }
 
   transition(MM, Other_GETS, I) {
     c_sendExclusiveData;
     forward_eviction_to_cpu;
+    gr_deallocateCacheBlock;
     l_popForwardQueue;
   }
-  
+
   transition(MM, NC_DMA_GETS, O) {
     ee_sendDataShared;
     l_popForwardQueue;
   }
-  
+
   transition(MM, Other_GETS_No_Mig, O) {
     ee_sendDataShared;
     l_popForwardQueue;
   }
-  
+
   transition(MM, Merged_GETS, O) {
     em_sendDataSharedMultiple;
     l_popForwardQueue;
   }
+
   // Transitions from Dirty Exclusive
-  transition({M, MR}, {Load, Ifetch}, M) {
+  transition(M, Store, MM) {
+    hh_store_hit;
+    uu_profileL1DataHit;
+    k_popMandatoryQueue;
+  }
+
+  transition(MR, Load, M) {
     h_load_hit;
+    uu_profileL1DataMiss;
+    uu_profileL2Hit;
     k_popMandatoryQueue;
+    ka_wakeUpAllDependents;
   }
 
-  transition({M, MR}, Store, MM) {
+  transition(MR, Ifetch, M) {
+    h_ifetch_hit;
+    uu_profileL1InstMiss;
+    uu_profileL2Hit;
+    k_popMandatoryQueue;
+    ka_wakeUpAllDependents;
+  }
+
+  transition(MR, Store, MM) {
     hh_store_hit;
+    uu_profileL1DataMiss;
+    uu_profileL2Hit;
     k_popMandatoryQueue;
+    ka_wakeUpAllDependents;
   }
 
   transition(M, L2_Replacement, MI) {
@@ -1651,6 +1766,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
   transition(M, {Other_GETX, Invalidate}, I) {
     c_sendExclusiveData;
     forward_eviction_to_cpu;
+    gr_deallocateCacheBlock;
     l_popForwardQueue;
   }
 
@@ -1684,7 +1800,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
 
   transition(IM, Data, ISM) {
     u_writeDataToCache;
-    m_decrementNumberOfMessages; 
+    m_decrementNumberOfMessages;
     o_checkForCompletion;
     n_popResponseQueue;
   }
@@ -1698,7 +1814,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
 
   transition(IM, Exclusive_Data, MM_W) {
     u_writeDataToCache;
-    m_decrementNumberOfMessages; 
+    m_decrementNumberOfMessages;
     o_checkForCompletion;
     sx_external_store_hit;
     n_popResponseQueue;
@@ -1738,7 +1854,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
 
   transition(SM, {Data, Exclusive_Data}, ISM) {
     v_writeDataToCacheVerify;
-    m_decrementNumberOfMessages; 
+    m_decrementNumberOfMessages;
     o_checkForCompletion;
     n_popResponseQueue;
   }
@@ -1833,13 +1949,13 @@ machine(L1Cache, "AMD Hammer-like protocol")
     l_popForwardQueue;
   }
 
-  transition(IS, Ack) {  
+  transition(IS, Ack) {
     m_decrementNumberOfMessages;
     o_checkForCompletion;
     n_popResponseQueue;
   }
 
-  transition(IS, Shared_Ack) {  
+  transition(IS, Shared_Ack) {
     m_decrementNumberOfMessages;
     r_setSharerBit;
     o_checkForCompletion;
@@ -1878,13 +1994,13 @@ machine(L1Cache, "AMD Hammer-like protocol")
 
   // Transitions from SS
 
-  transition(SS, Ack) {  
+  transition(SS, Ack) {
     m_decrementNumberOfMessages;
     o_checkForCompletion;
     n_popResponseQueue;
   }
 
-  transition(SS, Shared_Ack) {  
+  transition(SS, Shared_Ack) {
     m_decrementNumberOfMessages;
     r_setSharerBit;
     o_checkForCompletion;
@@ -1910,10 +2026,11 @@ machine(L1Cache, "AMD Hammer-like protocol")
 
   transition(MM_W, Store) {
     hh_store_hit;
+    uu_profileL1DataHit;
     k_popMandatoryQueue;
   }
 
-  transition({MM_W, MM_WF}, Ack) {  
+  transition({MM_W, MM_WF}, Ack) {
     m_decrementNumberOfMessages;
     o_checkForCompletion;
     n_popResponseQueue;
@@ -1935,10 +2052,11 @@ machine(L1Cache, "AMD Hammer-like protocol")
 
   transition(M_W, Store, MM_W) {
     hh_store_hit;
+    uu_profileL1DataHit;
     k_popMandatoryQueue;
   }
 
-  transition(M_W, Ack) {  
+  transition(M_W, Ack) {
     m_decrementNumberOfMessages;
     o_checkForCompletion;
     n_popResponseQueue;