Somayeh's MESI protocol with Polina's bug fixes
authorPolina Dudnik <pdudnik@gmail.com>
Fri, 11 Sep 2009 16:04:55 +0000 (11:04 -0500)
committerPolina Dudnik <pdudnik@gmail.com>
Fri, 11 Sep 2009 16:04:55 +0000 (11:04 -0500)
src/mem/protocol/MESI_CMP_directory-L1cache.sm
src/mem/protocol/MESI_CMP_directory-L2cache.sm
src/mem/protocol/MESI_CMP_directory-mem.sm
src/mem/protocol/MESI_CMP_directory-msg.sm

index efdc58e1bc3a85285a012a061322742d9a559944..32669190f2b22a96cd313548dffe7237524acb12 100644 (file)
  */
 
 
-machine(L1Cache, "MSI Directory L1 Cache CMP") : LATENCY_L1_REQUEST_LATENCY LATENCY_L1_RESPONSE_LATENCY LATENCY_TO_L2_LATENCY {
+machine(L1Cache, "MSI Directory L1 Cache CMP")
+ : int l1_request_latency,
+   int l1_response_latency,
+   int to_l2_latency,
+   int l2_select_low_bit,
+   int l2_select_num_bits
+{
+
 
   // NODE L1 CACHE
   // From this node's L1 cache TO the network
@@ -120,7 +127,7 @@ machine(L1Cache, "MSI Directory L1 Cache CMP") : LATENCY_L1_REQUEST_LATENCY LATE
   external_type(CacheMemory) {
     bool cacheAvail(Address);
     Address cacheProbe(Address);
-    void allocate(Address);
+    void allocate(Address, Entry);
     void deallocate(Address);
     Entry lookup(Address);
     void changePermission(Address, AccessPermission);
@@ -139,9 +146,9 @@ machine(L1Cache, "MSI Directory L1 Cache CMP") : LATENCY_L1_REQUEST_LATENCY LATE
 //  CacheMemory L1IcacheMemory, template_hack="<L1Cache_Entry>", constructor_hack='L1_CACHE_NUM_SETS_BITS,L1_CACHE_ASSOC,MachineType_L1Cache,int_to_string(i)+"_L1I"', abstract_chip_ptr="true";
 //  CacheMemory L1DcacheMemory, template_hack="<L1Cache_Entry>", constructor_hack='L1_CACHE_NUM_SETS_BITS,L1_CACHE_ASSOC,MachineType_L1Cache,int_to_string(i)+"_L1D"', abstract_chip_ptr="true";
 
-  CacheMemory L1IcacheMemory, factory='RubySystem::getCache(m_cfg["L1Icache"])';
+  CacheMemory L1IcacheMemory, factory='RubySystem::getCache(m_cfg["icache"])';
 
-  CacheMemory L1DcacheMemory, factory='RubySystem::getCache(m_cfg["L1Dcache"])';
+  CacheMemory L1DcacheMemory, factory='RubySystem::getCache(m_cfg["dcache"])';
 
 
 //  MessageBuffer mandatoryQueue, ordered="false", rank="100", abstract_chip_ptr="true";
@@ -178,10 +185,10 @@ machine(L1Cache, "MSI Directory L1 Cache CMP") : LATENCY_L1_REQUEST_LATENCY LATE
   }
 
   State getState(Address addr) {
-    if((L1DcacheMemory.isTagPresent(addr) && L1IcacheMemory.isTagPresent(addr)) == true){
-      DEBUG_EXPR(id);
-      DEBUG_EXPR(addr);
-    }
+//    if((L1DcacheMemory.isTagPresent(addr) && L1IcacheMemory.isTagPresent(addr)) == true){
+//      DEBUG_EXPR(id);
+//      DEBUG_EXPR(addr);
+//    }
     assert((L1DcacheMemory.isTagPresent(addr) && L1IcacheMemory.isTagPresent(addr)) == false);
 
     if(L1_TBEs.isPresent(addr)) {
@@ -343,13 +350,14 @@ machine(L1Cache, "MSI Directory L1 Cache CMP") : LATENCY_L1_REQUEST_LATENCY LATE
   // ACTIONS
   action(a_issueGETS, "a", desc="Issue GETS") {
     peek(mandatoryQueue_in, CacheMsg) {
-      enqueue(requestIntraChipL1Network_out, RequestMsg, latency="L1_REQUEST_LATENCY") {
+      enqueue(requestIntraChipL1Network_out, RequestMsg, latency=l1_request_latency) {
         out_msg.Address := address;
         out_msg.Type := CoherenceRequestType:GETS;
         out_msg.Requestor := machineID;
-        out_msg.Destination.add(map_L1CacheMachId_to_L2Cache(address, machineID));
+        out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
+                                                  l2_select_low_bit, l2_select_num_bits));
         DEBUG_EXPR(address);
-        DEBUG_EXPR(out_msg.Destination);
+        //DEBUG_EXPR(out_msg.Destination);
         out_msg.MessageSize := MessageSizeType:Control;
         out_msg.Prefetch := in_msg.Prefetch;
         out_msg.AccessMode := in_msg.AccessMode;
@@ -359,13 +367,14 @@ machine(L1Cache, "MSI Directory L1 Cache CMP") : LATENCY_L1_REQUEST_LATENCY LATE
 
   action(ai_issueGETINSTR, "ai", desc="Issue GETINSTR") {
     peek(mandatoryQueue_in, CacheMsg) {
-      enqueue(requestIntraChipL1Network_out, RequestMsg, latency="L1_REQUEST_LATENCY") {
+      enqueue(requestIntraChipL1Network_out, RequestMsg, latency=l1_request_latency) {
         out_msg.Address := address;
         out_msg.Type := CoherenceRequestType:GET_INSTR;
         out_msg.Requestor := machineID;
-        out_msg.Destination.add(map_L1CacheMachId_to_L2Cache(address, machineID));
+        out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
+                                                  l2_select_low_bit, l2_select_num_bits));
         DEBUG_EXPR(address);
-        DEBUG_EXPR(out_msg.Destination);
+        //DEBUG_EXPR(out_msg.Destination);
         out_msg.MessageSize := MessageSizeType:Control;
         out_msg.Prefetch := in_msg.Prefetch;
         out_msg.AccessMode := in_msg.AccessMode;
@@ -376,14 +385,15 @@ machine(L1Cache, "MSI Directory L1 Cache CMP") : LATENCY_L1_REQUEST_LATENCY LATE
 
   action(b_issueGETX, "b", desc="Issue GETX") {
     peek(mandatoryQueue_in, CacheMsg) {
-      enqueue(requestIntraChipL1Network_out, RequestMsg, latency="L1_REQUEST_LATENCY") {
+      enqueue(requestIntraChipL1Network_out, RequestMsg, latency=l1_request_latency) {
         out_msg.Address := address;
         out_msg.Type := CoherenceRequestType:GETX;
         out_msg.Requestor := machineID;
-        DEBUG_EXPR(machineID);
-        out_msg.Destination.add(map_L1CacheMachId_to_L2Cache(address, machineID));
+        //DEBUG_EXPR(machineID);
+        out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
+                                                  l2_select_low_bit, l2_select_num_bits));
         DEBUG_EXPR(address);
-        DEBUG_EXPR(out_msg.Destination);
+        //DEBUG_EXPR(out_msg.Destination);
         out_msg.MessageSize := MessageSizeType:Control;
         out_msg.Prefetch := in_msg.Prefetch;
         out_msg.AccessMode := in_msg.AccessMode;
@@ -393,13 +403,14 @@ machine(L1Cache, "MSI Directory L1 Cache CMP") : LATENCY_L1_REQUEST_LATENCY LATE
 
   action(c_issueUPGRADE, "c", desc="Issue GETX") {
     peek(mandatoryQueue_in, CacheMsg) {
-      enqueue(requestIntraChipL1Network_out, RequestMsg, latency="L1_REQUEST_LATENCY") {
+      enqueue(requestIntraChipL1Network_out, RequestMsg, latency= l1_request_latency) {
         out_msg.Address := address;
         out_msg.Type := CoherenceRequestType:UPGRADE;
         out_msg.Requestor := machineID;
-        out_msg.Destination.add(map_L1CacheMachId_to_L2Cache(address, machineID));
+        out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
+                                                  l2_select_low_bit, l2_select_num_bits));
         DEBUG_EXPR(address);
-        DEBUG_EXPR(out_msg.Destination);
+        //DEBUG_EXPR(out_msg.Destination);
         out_msg.MessageSize := MessageSizeType:Control;
         out_msg.Prefetch := in_msg.Prefetch;
         out_msg.AccessMode := in_msg.AccessMode;
@@ -409,7 +420,7 @@ machine(L1Cache, "MSI Directory L1 Cache CMP") : LATENCY_L1_REQUEST_LATENCY LATE
 
   action(d_sendDataToRequestor, "d", desc="send data to requestor") {
     peek(requestIntraChipL1Network_in, RequestMsg) {
-      enqueue(responseIntraChipL1Network_out, ResponseMsg, latency="L1_RESPONSE_LATENCY") {
+      enqueue(responseIntraChipL1Network_out, ResponseMsg, latency=l1_response_latency) {
         out_msg.Address := address;
         out_msg.Type := CoherenceResponseType:DATA;
         out_msg.DataBlk := getL1CacheEntry(address).DataBlk;
@@ -422,20 +433,21 @@ machine(L1Cache, "MSI Directory L1 Cache CMP") : LATENCY_L1_REQUEST_LATENCY LATE
   }
 
   action(d2_sendDataToL2, "d2", desc="send data to the L2 cache because of M downgrade") {
-    enqueue(responseIntraChipL1Network_out, ResponseMsg, latency="L1_RESPONSE_LATENCY") {
+    enqueue(responseIntraChipL1Network_out, ResponseMsg, latency=l1_response_latency) {
       out_msg.Address := address;
       out_msg.Type := CoherenceResponseType:DATA;
       out_msg.DataBlk := getL1CacheEntry(address).DataBlk;
       out_msg.Dirty := getL1CacheEntry(address).Dirty;
       out_msg.Sender := machineID;
-      out_msg.Destination.add(map_L1CacheMachId_to_L2Cache(address, machineID));
+      out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
+                                                  l2_select_low_bit, l2_select_num_bits));
       out_msg.MessageSize := MessageSizeType:Response_Data;
     }
   }
 
   action(dt_sendDataToRequestor_fromTBE, "dt", desc="send data to requestor") {
     peek(requestIntraChipL1Network_in, RequestMsg) {
-      enqueue(responseIntraChipL1Network_out, ResponseMsg, latency="L1_RESPONSE_LATENCY") {
+      enqueue(responseIntraChipL1Network_out, ResponseMsg, latency=l1_response_latency) {
         out_msg.Address := address;
         out_msg.Type := CoherenceResponseType:DATA;
         out_msg.DataBlk := L1_TBEs[address].DataBlk;
@@ -448,20 +460,21 @@ machine(L1Cache, "MSI Directory L1 Cache CMP") : LATENCY_L1_REQUEST_LATENCY LATE
   }
 
   action(d2t_sendDataToL2_fromTBE, "d2t", desc="send data to the L2 cache") {
-    enqueue(responseIntraChipL1Network_out, ResponseMsg, latency="L1_RESPONSE_LATENCY") {
+    enqueue(responseIntraChipL1Network_out, ResponseMsg, latency=l1_response_latency) {
       out_msg.Address := address;
       out_msg.Type := CoherenceResponseType:DATA;
       out_msg.DataBlk := L1_TBEs[address].DataBlk;
       out_msg.Dirty := L1_TBEs[address].Dirty;
       out_msg.Sender := machineID;
-      out_msg.Destination.add(map_L1CacheMachId_to_L2Cache(address, machineID));
+      out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
+                                                  l2_select_low_bit, l2_select_num_bits));
       out_msg.MessageSize := MessageSizeType:Response_Data;
     }
   }
 
   action(e_sendAckToRequestor, "e", desc="send invalidate ack to requestor (could be L2 or L1)") {
     peek(requestIntraChipL1Network_in, RequestMsg) {
-      enqueue(responseIntraChipL1Network_out, ResponseMsg, latency="L1_RESPONSE_LATENCY") {
+      enqueue(responseIntraChipL1Network_out, ResponseMsg, latency=l1_response_latency) {
         out_msg.Address := address;
         out_msg.Type := CoherenceResponseType:ACK;
         out_msg.Sender := machineID;
@@ -472,32 +485,34 @@ machine(L1Cache, "MSI Directory L1 Cache CMP") : LATENCY_L1_REQUEST_LATENCY LATE
   }
 
   action(f_sendDataToL2, "f", desc="send data to the L2 cache") {
-    enqueue(responseIntraChipL1Network_out, ResponseMsg, latency="L1_RESPONSE_LATENCY") {
+    enqueue(responseIntraChipL1Network_out, ResponseMsg, latency=l1_response_latency) {
       out_msg.Address := address;
       out_msg.Type := CoherenceResponseType:DATA;
       out_msg.DataBlk := getL1CacheEntry(address).DataBlk;
       out_msg.Dirty := getL1CacheEntry(address).Dirty;
       out_msg.Sender := machineID;
-      out_msg.Destination.add(map_L1CacheMachId_to_L2Cache(address, machineID));
+      out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
+                                                  l2_select_low_bit, l2_select_num_bits));
       out_msg.MessageSize := MessageSizeType:Writeback_Data;
     }
   }
 
   action(ft_sendDataToL2_fromTBE, "ft", desc="send data to the L2 cache") {
-    enqueue(responseIntraChipL1Network_out, ResponseMsg, latency="L1_RESPONSE_LATENCY") {
+    enqueue(responseIntraChipL1Network_out, ResponseMsg, latency=l1_response_latency) {
       out_msg.Address := address;
       out_msg.Type := CoherenceResponseType:DATA;
       out_msg.DataBlk := L1_TBEs[address].DataBlk;
       out_msg.Dirty := L1_TBEs[address].Dirty;
       out_msg.Sender := machineID;
-      out_msg.Destination.add(map_L1CacheMachId_to_L2Cache(address, machineID));
+      out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
+                                                  l2_select_low_bit, l2_select_num_bits));
       out_msg.MessageSize := MessageSizeType:Writeback_Data;
     }
   }
 
   action(fi_sendInvAck, "fi", desc="send data to the L2 cache") {
     peek(requestIntraChipL1Network_in, RequestMsg) {
-      enqueue(responseIntraChipL1Network_out, ResponseMsg, latency="L1_RESPONSE_LATENCY") {
+      enqueue(responseIntraChipL1Network_out, ResponseMsg, latency=l1_response_latency) {
         out_msg.Address := address;
         out_msg.Type := CoherenceResponseType:ACK;
         out_msg.Sender := machineID;
@@ -510,13 +525,14 @@ machine(L1Cache, "MSI Directory L1 Cache CMP") : LATENCY_L1_REQUEST_LATENCY LATE
 
 
   action(g_issuePUTX, "g", desc="send data to the L2 cache") {
-    enqueue(requestIntraChipL1Network_out, RequestMsg, latency="L1_RESPONSE_LATENCY") {
+    enqueue(requestIntraChipL1Network_out, RequestMsg, latency=l1_response_latency) {
       out_msg.Address := address;
       out_msg.Type := CoherenceRequestType:PUTX;
       out_msg.DataBlk := getL1CacheEntry(address).DataBlk;
       out_msg.Dirty := getL1CacheEntry(address).Dirty;
       out_msg.Requestor:= machineID;
-      out_msg.Destination.add(map_L1CacheMachId_to_L2Cache(address, machineID));
+      out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
+                                                  l2_select_low_bit, l2_select_num_bits));
       if (getL1CacheEntry(address).Dirty) {
         out_msg.MessageSize := MessageSizeType:Writeback_Data;
       } else {
@@ -526,34 +542,40 @@ machine(L1Cache, "MSI Directory L1 Cache CMP") : LATENCY_L1_REQUEST_LATENCY LATE
   }
 
   action(j_sendUnblock, "j", desc="send unblock to the L2 cache") {
-    enqueue(unblockNetwork_out, ResponseMsg, latency="TO_L2_LATENCY") {
+    enqueue(unblockNetwork_out, ResponseMsg, latency=to_l2_latency) {
       out_msg.Address := address;
       out_msg.Type := CoherenceResponseType:UNBLOCK;
       out_msg.Sender := machineID;
-      out_msg.Destination.add(map_L1CacheMachId_to_L2Cache(address, machineID));
+      out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
+                                                  l2_select_low_bit, l2_select_num_bits));
       out_msg.MessageSize := MessageSizeType:Response_Control;
+      DEBUG_EXPR(address);
+      
     }
   }
 
   action(jj_sendExclusiveUnblock, "\j", desc="send unblock to the L2 cache") {
-    enqueue(unblockNetwork_out, ResponseMsg, latency="TO_L2_LATENCY") {
+    enqueue(unblockNetwork_out, ResponseMsg, latency=to_l2_latency) {
       out_msg.Address := address;
       out_msg.Type := CoherenceResponseType:EXCLUSIVE_UNBLOCK;
       out_msg.Sender := machineID;
-      out_msg.Destination.add(map_L1CacheMachId_to_L2Cache(address, machineID));
+      out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
+                                                  l2_select_low_bit, l2_select_num_bits));
       out_msg.MessageSize := MessageSizeType:Response_Control;
+      DEBUG_EXPR(address);
+
     }
   }
 
 
 
   action(h_load_hit, "h", desc="If not prefetch, notify sequencer the load completed.") {
-    DEBUG_EXPR(getL1CacheEntry(address).DataBlk);
+    //DEBUG_EXPR(getL1CacheEntry(address).DataBlk);
     sequencer.readCallback(address, getL1CacheEntry(address).DataBlk);
   }
 
   action(hh_store_hit, "\h", desc="If not prefetch, notify sequencer that store completed.") {
-    DEBUG_EXPR(getL1CacheEntry(address).DataBlk);
+    //DEBUG_EXPR(getL1CacheEntry(address).DataBlk);
     sequencer.writeCallback(address, getL1CacheEntry(address).DataBlk);
     getL1CacheEntry(address).Dirty := true;
   }
@@ -611,13 +633,13 @@ machine(L1Cache, "MSI Directory L1 Cache CMP") : LATENCY_L1_REQUEST_LATENCY LATE
 
   action(oo_allocateL1DCacheBlock, "\o", desc="Set L1 D-cache tag equal to tag of block B.") {
     if (L1DcacheMemory.isTagPresent(address) == false) {
-      L1DcacheMemory.allocate(address);
+      L1DcacheMemory.allocate(address, new Entry);
     }
   }
 
   action(pp_allocateL1ICacheBlock, "\p", desc="Set L1 I-cache tag equal to tag of block B.") {
     if (L1IcacheMemory.isTagPresent(address) == false) {
-      L1IcacheMemory.allocate(address);
+      L1IcacheMemory.allocate(address, new Entry);
     }
   }
 
index 2bd9b3ce77c9ef24a84091993c1bb2796484a76b..6439e4fb32637aa3ac0770ba372adcb1fab14b48 100644 (file)
  *
  */
 
-machine(L2Cache, "MOSI Directory L2 Cache CMP") {
+machine(L2Cache, "MESI Directory L2 Cache CMP")
+ : int l2_request_latency,  
+   int l2_response_latency,
+   int to_l1_latency
+{
 
   // L2 BANK QUEUES
   // From local bank of L2 cache TO the network
@@ -41,9 +45,10 @@ machine(L2Cache, "MOSI Directory L2 Cache CMP") {
   MessageBuffer responseFromL2Cache, network="To", virtual_network="3", ordered="false";  // this L2 bank -> a local L1 || Memory
 
   // FROM the network to this local bank of L2 cache
+  MessageBuffer unblockToL2Cache, network="From", virtual_network="4", ordered="false";  // a local L1 || Memory -> this L2 bank
   MessageBuffer L1RequestToL2Cache, network="From", virtual_network="0", ordered="false";  // a local L1 -> this L2 bank
   MessageBuffer responseToL2Cache, network="From", virtual_network="3", ordered="false";  // a local L1 || Memory -> this L2 bank
-  MessageBuffer unblockToL2Cache, network="From", virtual_network="4", ordered="false";  // a local L1 || Memory -> this L2 bank
+//  MessageBuffer unblockToL2Cache, network="From", virtual_network="4", ordered="false";  // a local L1 || Memory -> this L2 bank
 
   // STATES
   enumeration(State, desc="L2 Cache states", default="L2Cache_State_NP") {
@@ -73,7 +78,7 @@ machine(L2Cache, "MOSI Directory L2 Cache CMP") {
     MT_IIB, desc="Blocked for L1_GETS from MT, waiting for unblock and data";
     MT_IB, desc="Blocked for L1_GETS from MT, got unblock, waiting for data";
     MT_SB, desc="Blocked for L1_GETS from MT, got data,  waiting for unblock";
-
   }
 
   // EVENTS
@@ -111,6 +116,8 @@ machine(L2Cache, "MOSI Directory L2 Cache CMP") {
     Unblock_Cancel, desc="Unblock from L1 requestor (FOR XACT MEMORY)";
     Exclusive_Unblock, desc="Unblock from L1 requestor";
 
+    MEM_Inv, desc="Invalidation from directory";
+
   }
 
   // TYPES
@@ -141,7 +148,7 @@ machine(L2Cache, "MOSI Directory L2 Cache CMP") {
   external_type(CacheMemory) {
     bool cacheAvail(Address);
     Address cacheProbe(Address);
-    void allocate(Address);
+    void allocate(Address, Entry);
     void deallocate(Address);
     Entry lookup(Address);
     void changePermission(Address, AccessPermission);
@@ -156,12 +163,12 @@ machine(L2Cache, "MOSI Directory L2 Cache CMP") {
     bool isPresent(Address);
   }
 
-  TBETable L2_TBEs, template_hack="<L2Cache_TBE>", no_vector="true";
+  TBETable L2_TBEs, template_hack="<L2Cache_TBE>";
 
 //  CacheMemory L2cacheMemory, template_hack="<L2Cache_Entry>", constructor_hack='L2_CACHE_NUM_SETS_BITS,L2_CACHE_ASSOC,MachineType_L2Cache,int_to_string(i)';
 
 
-  CacheMemory L2cacheMemory, factory='RubySystem::getCache(m_cfg["cache"])', no_vector="true";
+  CacheMemory L2cacheMemory, factory='RubySystem::getCache(m_cfg["cache"])';
 
   // inclusive cache, returns L2 entries only
   Entry getL2CacheEntry(Address addr), return_by_ref="yes" {
@@ -196,10 +203,9 @@ machine(L2Cache, "MOSI Directory L2 Cache CMP") {
   }
 
   void addSharer(Address addr, MachineID requestor) {
-    DEBUG_EXPR(machineID);
-    DEBUG_EXPR(requestor);
-    DEBUG_EXPR(addr);
-    assert(map_L1CacheMachId_to_L2Cache(addr, requestor) == machineID);
+    //DEBUG_EXPR(machineID);
+    //DEBUG_EXPR(requestor);
+    //DEBUG_EXPR(addr);
     L2cacheMemory[addr].Sharers.add(requestor);
   }
 
@@ -273,6 +279,29 @@ machine(L2Cache, "MOSI Directory L2 Cache CMP") {
   out_port(responseIntraChipL2Network_out, ResponseMsg, responseFromL2Cache);
 
 
+  in_port(L1unblockNetwork_in, ResponseMsg, unblockToL2Cache) {
+    if(L1unblockNetwork_in.isReady()) {
+      peek(L1unblockNetwork_in,  ResponseMsg) {
+        DEBUG_EXPR(in_msg.Address);
+       DEBUG_EXPR(getState(in_msg.Address));
+        DEBUG_EXPR(in_msg.Sender);
+        DEBUG_EXPR(in_msg.Type);
+        DEBUG_EXPR(in_msg.Destination);
+
+        assert(in_msg.Destination.isElement(machineID));
+        if (in_msg.Type == CoherenceResponseType:EXCLUSIVE_UNBLOCK) {
+          trigger(Event:Exclusive_Unblock, in_msg.Address);
+        } else if (in_msg.Type == CoherenceResponseType:UNBLOCK) {
+          trigger(Event:Unblock, in_msg.Address);
+        } else {
+          error("unknown unblock message");
+        }
+      }
+    }
+  }
+
+
+
   // Response IntraChip L2 Network - response msg to this particular L2 bank
   in_port(responseIntraChipL2Network_in, ResponseMsg, responseToL2Cache) {
     if (responseIntraChipL2Network_in.isReady()) {
@@ -301,6 +330,8 @@ machine(L2Cache, "MOSI Directory L2 Cache CMP") {
             trigger(Event:Mem_Data, in_msg.Address);  // L2 now has data and all off-chip acks
           } else if(in_msg.Type == CoherenceResponseType:MEMORY_ACK) {
             trigger(Event:Mem_Ack, in_msg.Address);  // L2 now has data and all off-chip acks
+          } else if(in_msg.Type == CoherenceResponseType:INV) {
+            trigger(Event:MEM_Inv, in_msg.Address);  // L2 now has data and all off-chip acks
           } else {
             error("unknown message type");
           }
@@ -314,11 +345,11 @@ machine(L2Cache, "MOSI Directory L2 Cache CMP") {
     if(L1RequestIntraChipL2Network_in.isReady()) {
       peek(L1RequestIntraChipL2Network_in,  RequestMsg) {
         DEBUG_EXPR(in_msg.Address);
-        DEBUG_EXPR(id);
+        //DEBUG_EXPR(id);
         DEBUG_EXPR(getState(in_msg.Address));
-        DEBUG_EXPR(in_msg.Requestor);
+        //DEBUG_EXPR(in_msg.Requestor);
         DEBUG_EXPR(in_msg.Type);
-        DEBUG_EXPR(in_msg.Destination);
+        //DEBUG_EXPR(in_msg.Destination);
         assert(machineIDToMachineType(in_msg.Requestor) == MachineType:L1Cache);
         assert(in_msg.Destination.isElement(machineID));
         if (L2cacheMemory.isTagPresent(in_msg.Address)) {
@@ -341,26 +372,12 @@ machine(L2Cache, "MOSI Directory L2 Cache CMP") {
     }
   }
 
-  in_port(L1unblockNetwork_in, ResponseMsg, unblockToL2Cache) {
-    if(L1unblockNetwork_in.isReady()) {
-      peek(L1unblockNetwork_in,  ResponseMsg) {
-        assert(in_msg.Destination.isElement(machineID));
-        if (in_msg.Type == CoherenceResponseType:EXCLUSIVE_UNBLOCK) {
-          trigger(Event:Exclusive_Unblock, in_msg.Address);
-        } else if (in_msg.Type == CoherenceResponseType:UNBLOCK) {
-          trigger(Event:Unblock, in_msg.Address);
-        } else {
-          error("unknown unblock message");
-        }
-      }
-    }
-  }
 
   // ACTIONS
 
   action(a_issueFetchToMemory, "a", desc="fetch data from memory") {
     peek(L1RequestIntraChipL2Network_in, RequestMsg) {
-      enqueue(DirRequestIntraChipL2Network_out, RequestMsg, latency="L2_REQUEST_LATENCY") {
+      enqueue(DirRequestIntraChipL2Network_out, RequestMsg, latency=l2_request_latency) {
         out_msg.Address := address;
         out_msg.Type := CoherenceRequestType:GETS;
         out_msg.Requestor := machineID;
@@ -372,7 +389,7 @@ machine(L2Cache, "MOSI Directory L2 Cache CMP") {
 
   action(b_forwardRequestToExclusive, "b", desc="Forward request to the exclusive L1") {
     peek(L1RequestIntraChipL2Network_in, RequestMsg) {
-      enqueue(L1RequestIntraChipL2Network_out, RequestMsg, latency="1") {
+      enqueue(L1RequestIntraChipL2Network_out, RequestMsg, latency=to_l1_latency) {
         out_msg.Address := address;
         out_msg.Type := in_msg.Type;
         out_msg.Requestor := in_msg.Requestor;
@@ -383,7 +400,7 @@ machine(L2Cache, "MOSI Directory L2 Cache CMP") {
   }
 
   action(c_exclusiveReplacement, "c", desc="Send data to memory") {
-    enqueue(responseIntraChipL2Network_out, ResponseMsg, latency="L2_RESPONSE_LATENCY") {
+    enqueue(responseIntraChipL2Network_out, ResponseMsg, latency=l2_response_latency) {
       out_msg.Address := address;
       out_msg.Type := CoherenceResponseType:MEMORY_DATA;
       out_msg.Sender := machineID;
@@ -394,8 +411,19 @@ machine(L2Cache, "MOSI Directory L2 Cache CMP") {
     }
   }
 
+  action(c_exclusiveCleanReplacement, "cc", desc="Send ack to memory for clean replacement") {
+    enqueue(responseIntraChipL2Network_out, ResponseMsg, latency=l2_response_latency) {
+      out_msg.Address := address;
+      out_msg.Type := CoherenceResponseType:ACK;
+      out_msg.Sender := machineID;
+      out_msg.Destination.add(map_Address_to_Directory(address));
+      out_msg.MessageSize := MessageSizeType:Response_Control;
+    }
+  }
+
+
   action(ct_exclusiveReplacementFromTBE, "ct", desc="Send data to memory") {
-    enqueue(responseIntraChipL2Network_out, ResponseMsg, latency="L2_RESPONSE_LATENCY") {
+    enqueue(responseIntraChipL2Network_out, ResponseMsg, latency=l2_response_latency) {
       out_msg.Address := address;
       out_msg.Type := CoherenceResponseType:MEMORY_DATA;
       out_msg.Sender := machineID;
@@ -409,7 +437,7 @@ machine(L2Cache, "MOSI Directory L2 Cache CMP") {
 
   action(d_sendDataToRequestor, "d", desc="Send data from cache to reqeustor") {
     peek(L1RequestIntraChipL2Network_in, RequestMsg) {
-      enqueue(responseIntraChipL2Network_out, ResponseMsg, latency="L2_RESPONSE_LATENCY") {
+      enqueue(responseIntraChipL2Network_out, ResponseMsg, latency=l2_response_latency) {
         out_msg.Address := address;
         out_msg.Type := CoherenceResponseType:DATA;
         out_msg.Sender := machineID;
@@ -428,7 +456,7 @@ machine(L2Cache, "MOSI Directory L2 Cache CMP") {
 
   action(dd_sendExclusiveDataToRequestor, "dd", desc="Send data from cache to reqeustor") {
     peek(L1RequestIntraChipL2Network_in, RequestMsg) {
-      enqueue(responseIntraChipL2Network_out, ResponseMsg, latency="L2_RESPONSE_LATENCY") {
+      enqueue(responseIntraChipL2Network_out, ResponseMsg, latency=l2_response_latency) {
         out_msg.Address := address;
         out_msg.Type := CoherenceResponseType:DATA_EXCLUSIVE;
         out_msg.Sender := machineID;
@@ -447,7 +475,7 @@ machine(L2Cache, "MOSI Directory L2 Cache CMP") {
 
   action(ds_sendSharedDataToRequestor, "ds", desc="Send data from cache to reqeustor") {
     peek(L1RequestIntraChipL2Network_in, RequestMsg) {
-      enqueue(responseIntraChipL2Network_out, ResponseMsg, latency="L2_RESPONSE_LATENCY") {
+      enqueue(responseIntraChipL2Network_out, ResponseMsg, latency=l2_response_latency) {
         out_msg.Address := address;
         out_msg.Type := CoherenceResponseType:DATA;
         out_msg.Sender := machineID;
@@ -462,7 +490,7 @@ machine(L2Cache, "MOSI Directory L2 Cache CMP") {
 
   action(e_sendDataToGetSRequestors, "e", desc="Send data from cache to all GetS IDs") {
     assert(L2_TBEs[address].L1_GetS_IDs.count() > 0);
-    enqueue(responseIntraChipL2Network_out, ResponseMsg, latency="1") {
+    enqueue(responseIntraChipL2Network_out, ResponseMsg, latency=to_l1_latency) {
       out_msg.Address := address;
       out_msg.Type := CoherenceResponseType:DATA;
       out_msg.Sender := machineID;
@@ -475,7 +503,7 @@ machine(L2Cache, "MOSI Directory L2 Cache CMP") {
 
   action(ex_sendExclusiveDataToGetSRequestors, "ex", desc="Send data from cache to all GetS IDs") {
     assert(L2_TBEs[address].L1_GetS_IDs.count() == 1);
-    enqueue(responseIntraChipL2Network_out, ResponseMsg, latency="1") {
+    enqueue(responseIntraChipL2Network_out, ResponseMsg, latency=to_l1_latency) {
       out_msg.Address := address;
       out_msg.Type := CoherenceResponseType:DATA_EXCLUSIVE;
       out_msg.Sender := machineID;
@@ -488,24 +516,24 @@ machine(L2Cache, "MOSI Directory L2 Cache CMP") {
 
 
   action(ee_sendDataToGetXRequestor, "ee", desc="Send data from cache to GetX ID") {
-    enqueue(responseIntraChipL2Network_out, ResponseMsg, latency="1") {
+    enqueue(responseIntraChipL2Network_out, ResponseMsg, latency=to_l1_latency) {
       out_msg.Address := address;
       out_msg.Type := CoherenceResponseType:DATA;
       out_msg.Sender := machineID;
       out_msg.Destination.add(L2_TBEs[address].L1_GetX_ID);
-      DEBUG_EXPR(out_msg.Destination);
+      //DEBUG_EXPR(out_msg.Destination);
       out_msg.DataBlk := getL2CacheEntry(address).DataBlk;
       out_msg.Dirty := getL2CacheEntry(address).Dirty;
       DEBUG_EXPR(out_msg.Address);
-      DEBUG_EXPR(out_msg.Destination);
-      DEBUG_EXPR(out_msg.DataBlk);
+      //DEBUG_EXPR(out_msg.Destination);
+      //DEBUG_EXPR(out_msg.DataBlk);
       out_msg.MessageSize := MessageSizeType:Response_Data;
     }
   }
 
 
   action(f_sendInvToSharers, "f", desc="invalidate sharers for L2 replacement") {
-    enqueue(L1RequestIntraChipL2Network_out, RequestMsg, latency="1") {
+    enqueue(L1RequestIntraChipL2Network_out, RequestMsg, latency=to_l1_latency) {
       out_msg.Address := address;
       out_msg.Type := CoherenceRequestType:INV;
       out_msg.Requestor := machineID;
@@ -516,7 +544,7 @@ machine(L2Cache, "MOSI Directory L2 Cache CMP") {
 
   action(fw_sendFwdInvToSharers, "fw", desc="invalidate sharers for request") {
     peek(L1RequestIntraChipL2Network_in, RequestMsg) {
-      enqueue(L1RequestIntraChipL2Network_out, RequestMsg, latency="1") {
+      enqueue(L1RequestIntraChipL2Network_out, RequestMsg, latency=to_l1_latency) {
         out_msg.Address := address;
         out_msg.Type := CoherenceRequestType:INV;
         out_msg.Requestor := in_msg.Requestor;
@@ -529,7 +557,7 @@ machine(L2Cache, "MOSI Directory L2 Cache CMP") {
 
   action(fwm_sendFwdInvToSharersMinusRequestor, "fwm", desc="invalidate sharers for request, requestor is sharer") {
     peek(L1RequestIntraChipL2Network_in, RequestMsg) {
-      enqueue(L1RequestIntraChipL2Network_out, RequestMsg, latency="1") {
+      enqueue(L1RequestIntraChipL2Network_out, RequestMsg, latency=to_l1_latency) {
         out_msg.Address := address;
         out_msg.Type := CoherenceRequestType:INV;
         out_msg.Requestor := in_msg.Requestor;
@@ -621,7 +649,7 @@ machine(L2Cache, "MOSI Directory L2 Cache CMP") {
 
   action(qq_allocateL2CacheBlock, "\q", desc="Set L2 cache tag equal to tag of block B.") {
     if (L2cacheMemory.isTagPresent(address) == false) {
-      L2cacheMemory.allocate(address);
+      L2cacheMemory.allocate(address, new Entry);
     }
   }
 
@@ -631,7 +659,7 @@ machine(L2Cache, "MOSI Directory L2 Cache CMP") {
 
   action(t_sendWBAck, "t", desc="Send writeback ACK") {
     peek(L1RequestIntraChipL2Network_in, RequestMsg) {
-      enqueue(responseIntraChipL2Network_out, ResponseMsg, latency="1") {
+      enqueue(responseIntraChipL2Network_out, ResponseMsg, latency=to_l1_latency) {
         out_msg.Address := address;
         out_msg.Type := CoherenceResponseType:WB_ACK;
         out_msg.Sender := machineID;
@@ -643,7 +671,7 @@ machine(L2Cache, "MOSI Directory L2 Cache CMP") {
 
   action(ts_sendInvAckToUpgrader, "ts", desc="Send ACK to upgrader") {
     peek(L1RequestIntraChipL2Network_in, RequestMsg) {
-      enqueue(responseIntraChipL2Network_out, ResponseMsg, latency="1") {
+      enqueue(responseIntraChipL2Network_out, ResponseMsg, latency=to_l1_latency) {
         out_msg.Address := address;
         out_msg.Type := CoherenceResponseType:ACK;
         out_msg.Sender := machineID;
@@ -715,6 +743,11 @@ machine(L2Cache, "MOSI Directory L2 Cache CMP") {
     L1RequestIntraChipL2Network_in.recycle();
   }
 
+  action(zn_recycleResponseNetwork, "zn", desc="recycle memory request") {
+    responseIntraChipL2Network_in.recycle();
+  }
+
+
   //*****************************************************
   // TRANSITIONS
   //*****************************************************
@@ -736,6 +769,15 @@ machine(L2Cache, "MOSI Directory L2 Cache CMP") {
     zz_recycleL1RequestQueue;
   }
 
+  transition({IM, IS, ISS, SS_MB, M_MB, MT_MB, MT_IIB, MT_IB, MT_SB}, MEM_Inv) {         
+    zn_recycleResponseNetwork;
+  }
+
+  transition({S_I, M_I, MT_I}, MEM_Inv) {         
+    o_popIncomingResponseQueue;
+  }
+
+
   transition({SS_MB, M_MB, MT_MB, MT_IIB, MT_IB, MT_SB}, {L1_GETS, L1_GET_INSTR, L1_GETX, L1_UPGRADE}) {
     zz_recycleL1RequestQueue;
   }
@@ -846,12 +888,13 @@ machine(L2Cache, "MOSI Directory L2 Cache CMP") {
     rr_deallocateL2CacheBlock;
   }
 
-  transition(SS, L2_Replacement, S_I) {
+  transition(SS, {L2_Replacement, MEM_Inv}, S_I) {
     i_allocateTBE;
     f_sendInvToSharers;
     rr_deallocateL2CacheBlock;
   }
 
+
   transition(M, L1_GETX, MT_MB) {
     d_sendDataToRequestor;
     uu_profileMiss;
@@ -874,13 +917,15 @@ machine(L2Cache, "MOSI Directory L2 Cache CMP") {
     jj_popL1RequestQueue;
   }
 
-  transition(M, L2_Replacement, M_I) {
+  transition(M, {L2_Replacement, MEM_Inv}, M_I) {
     i_allocateTBE;
     c_exclusiveReplacement;
     rr_deallocateL2CacheBlock;
   }
 
   transition(M, L2_Replacement_clean, M_I) {
+    i_allocateTBE;
+    c_exclusiveCleanReplacement;
     rr_deallocateL2CacheBlock;
   }
 
@@ -902,7 +947,7 @@ machine(L2Cache, "MOSI Directory L2 Cache CMP") {
     jj_popL1RequestQueue;
   }
 
-  transition(MT, L2_Replacement, MT_I) {
+  transition(MT, {L2_Replacement, MEM_Inv}, MT_I) {
     i_allocateTBE;
     f_sendInvToSharers;
     rr_deallocateL2CacheBlock;
@@ -977,8 +1022,8 @@ machine(L2Cache, "MOSI Directory L2 Cache CMP") {
     o_popIncomingResponseQueue;
   }
 
-  transition(I_I, Ack_all, NP) {
-    s_deallocateTBE;
+  transition(I_I, Ack_all, M_I) {
+    c_exclusiveCleanReplacement;
     o_popIncomingResponseQueue;
   }
 
@@ -988,8 +1033,8 @@ machine(L2Cache, "MOSI Directory L2 Cache CMP") {
     o_popIncomingResponseQueue;
   }
 
-  transition(MCT_I, WB_Data_clean, NP) {
-    s_deallocateTBE;
+  transition(MCT_I, {WB_Data_clean, Ack_all}, M_I) {
+    c_exclusiveCleanReplacement;   
     o_popIncomingResponseQueue;
   }
 
@@ -999,11 +1044,6 @@ machine(L2Cache, "MOSI Directory L2 Cache CMP") {
     o_popIncomingResponseQueue;
   }
 
-  // clean data that L1 exclusive never wrote
-  transition(MCT_I, Ack_all, NP) {
-    s_deallocateTBE;
-    o_popIncomingResponseQueue;
-  }
 
   // drop this because L1 will send data again
   //  the reason we don't accept is that the request virtual network may be completely backed up
@@ -1037,3 +1077,5 @@ machine(L2Cache, "MOSI Directory L2 Cache CMP") {
   }
 }
 
+
+
index 84768c3339d947aeb000510822aebf3ca90c529f..f5a2e431b51a3f83622c03b8e4e92430bacad1ea 100644 (file)
 // Copied here by aep 12/14/07
 
 
-machine(Directory, "MESI_CMP_filter_directory protocol")  : LATENCY_MEMORY_LATENCY LATENCY_TO_MEM_CTRL_LATENCY {
+machine(Directory, "MESI_CMP_filter_directory protocol") 
+ : int to_mem_ctrl_latency,
+   int directory_latency
+{
 
   MessageBuffer requestToDir, network="From", virtual_network="2", ordered="false";
   MessageBuffer responseToDir, network="From", virtual_network="3", ordered="false";
   MessageBuffer responseFromDir, network="To", virtual_network="3", ordered="false";
 
-  MessageBuffer dmaRequestFromDir, network="To", virtual_network="4", ordered="true", no_vector="true";
-  MessageBuffer dmaRequestToDir, network="From", virtual_network="5", ordered="true", no_vector="true";
+  MessageBuffer dmaRequestFromDir, network="To", virtual_network="6", ordered="true";
+  MessageBuffer dmaRequestToDir, network="From", virtual_network="7", ordered="true";
 
 
   // STATES
@@ -50,7 +53,15 @@ machine(Directory, "MESI_CMP_filter_directory protocol")  : LATENCY_MEMORY_LATEN
     // Base states
     I, desc="Owner";
     ID, desc="Intermediate state for DMA_READ when in I";
-    ID_W, desc="Intermediate state for DMA_WRITE when in I";
+    ID_W, desc="Intermediate state for DMA_WRITE when in I";    
+
+    M, desc="Modified";
+    IM, desc="Intermediate State I>M";
+    MI, desc="Intermediate State M>I";
+    M_DRD, desc="Intermediate State when there is a dma read";
+    M_DRDI, desc="Intermediate State when there is a dma read";
+    M_DWR, desc="Intermediate State when there is a dma write";
+    M_DWRI, desc="Intermediate State when there is a dma write";
   }
 
   // Events
@@ -62,7 +73,7 @@ machine(Directory, "MESI_CMP_filter_directory protocol")  : LATENCY_MEMORY_LATEN
 //added by SS for dma
     DMA_READ, desc="A DMA Read memory request";
     DMA_WRITE, desc="A DMA Write memory request";
-
+    CleanReplacement, desc="Clean Replacement in L2 cache";
 
   }
 
@@ -70,7 +81,10 @@ machine(Directory, "MESI_CMP_filter_directory protocol")  : LATENCY_MEMORY_LATEN
 
   // DirectoryEntry
   structure(Entry, desc="...") {
+    State DirectoryState,          desc="Directory state";
     DataBlock DataBlk,             desc="data for the block";
+    NetDest Sharers,                   desc="Sharers for this block";
+    NetDest Owner,                     desc="Owner of this block"; 
   }
 
   external_type(DirectoryMemory) {
@@ -83,6 +97,21 @@ machine(Directory, "MESI_CMP_filter_directory protocol")  : LATENCY_MEMORY_LATEN
 
   }
 
+  // TBE entries for DMA requests
+  structure(TBE, desc="TBE entries for outstanding DMA requests") {
+    Address PhysicalAddress, desc="physical address";
+    State TBEState,        desc="Transient State";
+    DataBlock DataBlk,     desc="Data to be written (DMA write only)";
+    int Len,               desc="...";
+  }
+    
+  external_type(TBETable) {
+    TBE lookup(Address);  
+    void allocate(Address); 
+    void deallocate(Address);
+    bool isPresent(Address);
+  } 
+
 
   // ** OBJECTS **
 
@@ -94,13 +123,40 @@ machine(Directory, "MESI_CMP_filter_directory protocol")  : LATENCY_MEMORY_LATEN
   MemoryControl memBuffer, factory='RubySystem::getMemoryControl(m_cfg["memory_controller_name"])';
 
 
+  TBETable TBEs, template_hack="<Directory_TBE>";
+      
   State getState(Address addr) {
-    return State:I;
-  }
+    if (TBEs.isPresent(addr)) {
+      return TBEs[addr].TBEState;   
+    } else if (directory.isPresent(addr)) {
+      return directory[addr].DirectoryState;
+    } else {
+      return State:I;
+    }
+  }  
+   
 
   void setState(Address addr, State state) {
+
+    if (TBEs.isPresent(addr)) {
+      TBEs[addr].TBEState := state;
+    }
+  
+    if (directory.isPresent(addr)) {
+  
+      if (state == State:I)  {
+        assert(directory[addr].Owner.count() == 0);
+        assert(directory[addr].Sharers.count() == 0);
+      } else if (state == State:M) {
+        assert(directory[addr].Owner.count() == 1);
+        assert(directory[addr].Sharers.count() == 0);
+      }
+      
+      directory[addr].DirectoryState := state;
+    }
   }
 
+
   bool isGETRequest(CoherenceRequestType type) {
     return (type == CoherenceRequestType:GETS) ||
       (type == CoherenceRequestType:GET_INSTR) ||
@@ -120,9 +176,9 @@ machine(Directory, "MESI_CMP_filter_directory protocol")  : LATENCY_MEMORY_LATEN
     if (dmaRequestQueue_in.isReady()) {
       peek(dmaRequestQueue_in, DMARequestMsg) {
         if (in_msg.Type == DMARequestType:READ) {
-          trigger(Event:DMA_READ, in_msg.PhysicalAddress);
+          trigger(Event:DMA_READ, in_msg.LineAddress);
         } else if (in_msg.Type == DMARequestType:WRITE) {
-          trigger(Event:DMA_WRITE, in_msg.PhysicalAddress);
+          trigger(Event:DMA_WRITE, in_msg.LineAddress);
         } else {
           error("Invalid message");
         }
@@ -151,6 +207,8 @@ machine(Directory, "MESI_CMP_filter_directory protocol")  : LATENCY_MEMORY_LATEN
         assert(in_msg.Destination.isElement(machineID));
         if (in_msg.Type == CoherenceResponseType:MEMORY_DATA) {
           trigger(Event:Data, in_msg.Address);
+        } else if (in_msg.Type == CoherenceResponseType:ACK) {
+          trigger(Event:CleanReplacement, in_msg.Address);
         } else {
           DEBUG_EXPR(in_msg.Type);
           error("Invalid message");
@@ -179,12 +237,12 @@ machine(Directory, "MESI_CMP_filter_directory protocol")  : LATENCY_MEMORY_LATEN
 
   // Actions
   action(a_sendAck, "a", desc="Send ack to L2") {
-    peek(memQueue_in, MemoryMsg) {
-      enqueue(responseNetwork_out, ResponseMsg, latency="TO_MEM_CTRL_LATENCY") {
+    peek(responseNetwork_in, ResponseMsg) {
+      enqueue(responseNetwork_out, ResponseMsg, latency=to_mem_ctrl_latency) {
         out_msg.Address := address;
         out_msg.Type := CoherenceResponseType:MEMORY_ACK;
         out_msg.Sender := machineID;
-        out_msg.Destination.add(in_msg.OriginalRequestorMachId);
+        out_msg.Destination.add(in_msg.Sender);
         out_msg.MessageSize := MessageSizeType:Response_Control;
       }
     }
@@ -192,7 +250,7 @@ machine(Directory, "MESI_CMP_filter_directory protocol")  : LATENCY_MEMORY_LATEN
 
   action(d_sendData, "d", desc="Send data to requestor") {
     peek(memQueue_in, MemoryMsg) {
-      enqueue(responseNetwork_out, ResponseMsg, latency="TO_MEM_CTRL_LATENCY") {
+      enqueue(responseNetwork_out, ResponseMsg, latency=to_mem_ctrl_latency) {
         out_msg.Address := address;
         out_msg.Type := CoherenceResponseType:MEMORY_DATA;
         out_msg.Sender := machineID;
@@ -204,6 +262,19 @@ machine(Directory, "MESI_CMP_filter_directory protocol")  : LATENCY_MEMORY_LATEN
     }
   }
 
+  // Actions
+  action(aa_sendAck, "aa", desc="Send ack to L2") {
+    peek(memQueue_in, MemoryMsg) {
+      enqueue(responseNetwork_out, ResponseMsg, latency=to_mem_ctrl_latency) {
+        out_msg.Address := address;
+        out_msg.Type := CoherenceResponseType:MEMORY_ACK;
+        out_msg.Sender := machineID;
+        out_msg.Destination.add(in_msg.OriginalRequestorMachId);
+        out_msg.MessageSize := MessageSizeType:Response_Control;
+      }
+    }
+  }
+
   action(j_popIncomingRequestQueue, "j", desc="Pop incoming request queue") {
     requestNetwork_in.dequeue();
   }
@@ -218,7 +289,7 @@ machine(Directory, "MESI_CMP_filter_directory protocol")  : LATENCY_MEMORY_LATEN
 
   action(qf_queueMemoryFetchRequest, "qf", desc="Queue off-chip fetch request") {
     peek(requestNetwork_in, RequestMsg) {
-      enqueue(memQueue_out, MemoryMsg, latency="TO_MEM_CTRL_LATENCY") {
+      enqueue(memQueue_out, MemoryMsg, latency=to_mem_ctrl_latency) {
         out_msg.Address := address;
         out_msg.Type := MemoryRequestType:MEMORY_READ;
         out_msg.Sender := machineID;
@@ -234,7 +305,7 @@ machine(Directory, "MESI_CMP_filter_directory protocol")  : LATENCY_MEMORY_LATEN
 
   action(qw_queueMemoryWBRequest, "qw", desc="Queue off-chip writeback request") {
     peek(responseNetwork_in, ResponseMsg) {
-      enqueue(memQueue_out, MemoryMsg, latency="TO_MEM_CTRL_LATENCY") {
+      enqueue(memQueue_out, MemoryMsg, latency=to_mem_ctrl_latency) {
         out_msg.Address := address;
         out_msg.Type := MemoryRequestType:MEMORY_WB;
         out_msg.Sender := machineID;
@@ -258,7 +329,7 @@ machine(Directory, "MESI_CMP_filter_directory protocol")  : LATENCY_MEMORY_LATEN
 //added by SS for dma
   action(qf_queueMemoryFetchRequestDMA, "qfd", desc="Queue off-chip fetch request") {
     peek(dmaRequestQueue_in, DMARequestMsg) {
-      enqueue(memQueue_out, MemoryMsg, latency="TO_MEM_CTRL_LATENCY") {
+      enqueue(memQueue_out, MemoryMsg, latency=to_mem_ctrl_latency) {
         out_msg.Address := address;
         out_msg.Type := MemoryRequestType:MEMORY_READ;
         out_msg.Sender := machineID;
@@ -276,7 +347,7 @@ machine(Directory, "MESI_CMP_filter_directory protocol")  : LATENCY_MEMORY_LATEN
 
   action(dr_sendDMAData, "dr", desc="Send Data to DMA controller from directory") {
     peek(memQueue_in, MemoryMsg) {
-      enqueue(dmaResponseNetwork_out, DMAResponseMsg, latency="MEMORY_LATENCY") {
+      enqueue(dmaResponseNetwork_out, DMAResponseMsg, latency=to_mem_ctrl_latency) {
         out_msg.PhysicalAddress := address;
         out_msg.Type := DMAResponseType:DATA;
         out_msg.DataBlk := in_msg.DataBlk;   // we send the entire data block and rely on the dma controller to split it up if need be
@@ -288,18 +359,22 @@ machine(Directory, "MESI_CMP_filter_directory protocol")  : LATENCY_MEMORY_LATEN
 
   action(dw_writeDMAData, "dw", desc="DMA Write data to memory") {
     peek(dmaRequestQueue_in, DMARequestMsg) {
-      directory[in_msg.PhysicalAddress].DataBlk.copyPartial(in_msg.DataBlk, in_msg.Offset, in_msg.Len);
+      //directory[in_msg.PhysicalAddress].DataBlk.copyPartial(in_msg.DataBlk, in_msg.Offset, in_msg.Len);
+
+      directory[in_msg.PhysicalAddress].DataBlk.copyPartial(in_msg.DataBlk, addressOffset(in_msg.PhysicalAddress), in_msg.Len);
     }
   }
 
   action(qw_queueMemoryWBRequest_partial, "qwp", desc="Queue off-chip writeback request") {
      peek(dmaRequestQueue_in, DMARequestMsg) {
-      enqueue(memQueue_out, MemoryMsg, latency="TO_MEM_CTRL_LATENCY") {
+      enqueue(memQueue_out, MemoryMsg, latency=to_mem_ctrl_latency) {
         out_msg.Address := address;
         out_msg.Type := MemoryRequestType:MEMORY_WB;
         out_msg.OriginalRequestorMachId := machineID;
         //out_msg.DataBlk := in_msg.DataBlk;
-        out_msg.DataBlk.copyPartial(in_msg.DataBlk, in_msg.Offset, in_msg.Len);
+        out_msg.DataBlk.copyPartial(in_msg.DataBlk, addressOffset(address), in_msg.Len);
+
+
         out_msg.MessageSize := in_msg.MessageSize;
         //out_msg.Prefetch := in_msg.Prefetch;
 
@@ -309,7 +384,7 @@ machine(Directory, "MESI_CMP_filter_directory protocol")  : LATENCY_MEMORY_LATEN
   }
 
   action(da_sendDMAAck, "da", desc="Send Ack to DMA controller") {
-      enqueue(dmaResponseNetwork_out, DMAResponseMsg, latency="MEMORY_LATENCY") {
+      enqueue(dmaResponseNetwork_out, DMAResponseMsg, latency=to_mem_ctrl_latency) {
         out_msg.PhysicalAddress := address;
         out_msg.Type := DMAResponseType:ACK;
         out_msg.Destination.add(map_Address_to_DMA(address));
@@ -318,33 +393,123 @@ machine(Directory, "MESI_CMP_filter_directory protocol")  : LATENCY_MEMORY_LATEN
   }
 
   action(z_recycleRequestQueue, "z", desc="recycle request queue") {
-    requestNetwork_in.dequeue();
+    requestNetwork_in.recycle();
+  }
+
+  action(zz_recycleDMAQueue, "zz", desc="recycle DMA queue") {
+    dmaRequestQueue_in.recycle();
+  }
+
+
+  action(e_ownerIsRequestor, "e", desc="The owner is now the requestor") {
+    peek(requestNetwork_in, RequestMsg) {
+      directory[address].Owner.clear();
+      directory[address].Owner.add(in_msg.Requestor);
+    }
   }
 
+
+  action(inv_sendCacheInvalidate, "inv", desc="Invalidate a cache block") {
+    peek(dmaRequestQueue_in, DMARequestMsg) {
+      enqueue(responseNetwork_out, ResponseMsg, latency=directory_latency) {
+      out_msg.Address := address;
+      out_msg.Type := CoherenceResponseType:INV;
+      out_msg.Sender := machineID;
+      out_msg.Destination := directory[in_msg.PhysicalAddress].Owner;
+      out_msg.MessageSize := MessageSizeType:Response_Control;
+      }
+    }
+  }
+
+
+  action(drp_sendDMAData, "drp", desc="Send Data to DMA controller from incoming PUTX") {
+    peek(responseNetwork_in, ResponseMsg) {
+      enqueue(dmaResponseNetwork_out, DMAResponseMsg, latency=to_mem_ctrl_latency) {
+        out_msg.PhysicalAddress := address;
+        out_msg.Type := DMAResponseType:DATA;
+        out_msg.DataBlk := in_msg.DataBlk;   // we send the entire data block and rely on the dma controller to split it up if need be
+        out_msg.Destination.add(map_Address_to_DMA(address));
+        out_msg.MessageSize := MessageSizeType:Response_Data;
+      }
+    }
+  }
+
+  action(c_clearOwner, "c", desc="Clear the owner field") {
+    directory[address].Owner.clear();
+  }
+
+  action(v_allocateTBE, "v", desc="Allocate TBE") {
+    peek(dmaRequestQueue_in, DMARequestMsg) {
+      TBEs.allocate(address);
+      TBEs[address].DataBlk := in_msg.DataBlk;
+      TBEs[address].PhysicalAddress := in_msg.PhysicalAddress;
+      TBEs[address].Len := in_msg.Len; 
+    }
+  }
+
+  action(dwt_writeDMADataFromTBE, "dwt", desc="DMA Write data to memory from TBE") {
+    //directory[address].DataBlk.copyPartial(TBEs[address].DataBlk, TBEs[address].Offset, TBEs[address].Len);
+    directory[address].DataBlk.copyPartial(TBEs[address].DataBlk, addressOffset(TBEs[address].PhysicalAddress), TBEs[address].Len);
+
+
+  }
+
+
+  action(qw_queueMemoryWBRequest_partialTBE, "qwt", desc="Queue off-chip writeback request") {
+    peek(responseNetwork_in, ResponseMsg) {
+      enqueue(memQueue_out, MemoryMsg, latency=to_mem_ctrl_latency) {
+        out_msg.Address := address;
+        out_msg.Type := MemoryRequestType:MEMORY_WB;
+        out_msg.OriginalRequestorMachId := in_msg.Sender;
+        //out_msg.DataBlk := in_msg.DataBlk;
+        //out_msg.DataBlk.copyPartial(TBEs[address].DataBlk, TBEs[address].Offset, TBEs[address].Len);
+        out_msg.DataBlk.copyPartial(TBEs[address].DataBlk, addressOffset(TBEs[address].PhysicalAddress), TBEs[address].Len);
+        out_msg.MessageSize := in_msg.MessageSize;
+        //out_msg.Prefetch := in_msg.Prefetch;
+   
+        DEBUG_EXPR(out_msg);
+      }
+    }
+  }
+
+  action(w_deallocateTBE, "w", desc="Deallocate TBE") {
+    TBEs.deallocate(address); 
+  }
+
+
   // TRANSITIONS
 
-  transition(I, Fetch) {
-    //d_sendData;
+
+  transition(I, Fetch, IM) {
     qf_queueMemoryFetchRequest;
+    e_ownerIsRequestor;
     j_popIncomingRequestQueue;
   }
 
-  transition(I, Data) {
+  transition(IM, Memory_Data, M) {
+    d_sendData;
+    l_popMemQueue;
+  }
+//added by SS
+  transition(M, CleanReplacement, I) {
+    c_clearOwner;
+    a_sendAck;
+    k_popIncomingResponseQueue;
+  }
+
+  transition(M, Data, MI) {
     m_writeDataToMemory;
-    //a_sendAck;
     qw_queueMemoryWBRequest;
     k_popIncomingResponseQueue;
   }
 
-  transition(I, Memory_Data) {
-    d_sendData;
+  transition(MI, Memory_Ack, I) {
+    c_clearOwner;
+    aa_sendAck;
     l_popMemQueue;
   }
 
-  transition(I, Memory_Ack) {
-    a_sendAck;
-    l_popMemQueue;
-  }
 
 //added by SS for dma support
   transition(I, DMA_READ, ID) {
@@ -368,9 +533,52 @@ machine(Directory, "MESI_CMP_filter_directory protocol")  : LATENCY_MEMORY_LATEN
     l_popMemQueue;
   }
 
-  transition({ID, ID_W}, {Fetch, Data} ) {
+  transition({ID, ID_W, M_DRDI, M_DWRI, IM, MI}, {Fetch, Data} ) {
     z_recycleRequestQueue;
   }
 
+  transition({ID, ID_W, M_DRD, M_DRDI, M_DWR, M_DWRI, IM, MI}, {DMA_WRITE, DMA_READ} ) {
+    zz_recycleDMAQueue;
+  }
+
+
+  transition(M, DMA_READ, M_DRD) {
+    inv_sendCacheInvalidate;
+    p_popIncomingDMARequestQueue;
+  }
+
+  transition(M_DRD, Data, M_DRDI) {
+    drp_sendDMAData;
+    m_writeDataToMemory;
+    qw_queueMemoryWBRequest;
+    k_popIncomingResponseQueue;
+  }
+
+  transition(M_DRDI, Memory_Ack, I) {
+    aa_sendAck;
+    c_clearOwner;
+    l_popMemQueue;
+  }
+
+  transition(M, DMA_WRITE, M_DWR) {
+    v_allocateTBE;
+    inv_sendCacheInvalidate;
+    p_popIncomingDMARequestQueue;
+  }
+
+  transition(M_DWR, Data, M_DWRI) {
+    m_writeDataToMemory;
+    qw_queueMemoryWBRequest_partialTBE;
+    k_popIncomingResponseQueue; 
+  }
+
+  transition(M_DWRI, Memory_Ack, I) {
+    dwt_writeDMADataFromTBE;
+    aa_sendAck;
+    c_clearOwner;
+    da_sendDMAAck;
+    w_deallocateTBE;
+    l_popMemQueue;
+  }
 
 }
index e726b062c5cf567ec58995280412c8c4133e1ace..15934e6b2c33441568b6f62b630b5cd7f35e8e11 100644 (file)
@@ -1,32 +1,57 @@
 
 /*
- * Copyright (c) 1999-2005 Mark D. Hill and David A. Wood
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
+    Copyright (C) 1999-2005 by Mark D. Hill and David A. Wood for the
+    Wisconsin Multifacet Project.  Contact: gems@cs.wisc.edu
+    http://www.cs.wisc.edu/gems/
+
+    --------------------------------------------------------------------
+
+    This file is part of the SLICC (Specification Language for
+    Implementing Cache Coherence), a component of the Multifacet GEMS
+    (General Execution-driven Multiprocessor Simulator) software
+    toolset originally developed at the University of Wisconsin-Madison.
+                                                                                
+    SLICC was originally developed by Milo Martin with substantial
+    contributions from Daniel Sorin.
+
+    Substantial further development of Multifacet GEMS at the
+    University of Wisconsin was performed by Alaa Alameldeen, Brad
+    Beckmann, Jayaram Bobba, Ross Dickson, Dan Gibson, Pacia Harper,
+    Derek Hower, Milo Martin, Michael Marty, Carl Mauer, Michelle Moravan,
+    Kevin Moore, Manoj Plakal, Daniel Sorin, Haris Volos, Min Xu, and Luke Yen.
+
+    --------------------------------------------------------------------
+
+    If your use of this software contributes to a published paper, we
+    request that you (1) cite our summary paper that appears on our
+    website (http://www.cs.wisc.edu/gems/) and (2) e-mail a citation
+    for your published paper to gems@cs.wisc.edu.
+
+    If you redistribute derivatives of this software, we request that
+    you notify us and either (1) ask people to register with us at our
+    website (http://www.cs.wisc.edu/gems/) or (2) collect registration
+    information and periodically send it to us.
 
+    --------------------------------------------------------------------
+
+    Multifacet GEMS is free software; you can redistribute it and/or
+    modify it under the terms of version 2 of the GNU General Public
+    License as published by the Free Software Foundation.
+
+    Multifacet GEMS is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with the Multifacet GEMS; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+    02111-1307, USA
+
+    The GNU General Public License is contained in the file LICENSE.
+
+### END HEADER ###
+*/
 /*
  * $Id: MSI_MOSI_CMP_directory-msg.sm 1.5 05/01/19 15:48:37-06:00 mikem@royal16.cs.wisc.edu $
  *
@@ -40,6 +65,12 @@ enumeration(CoherenceRequestType, desc="...") {
   GET_INSTR, desc="Get Instruction";
   INV,       desc="INValidate";
   PUTX,      desc="replacement message";
+
+  WB_ACK,    desc="Writeback ack";
+  WB_NACK,   desc="Writeback neg. ack";
+  FWD,      desc="Generic FWD";
+
+
 }
 
 // CoherenceResponseType
@@ -52,6 +83,7 @@ enumeration(CoherenceResponseType, desc="...") {
   WB_ACK,            desc="writeback ack";
   UNBLOCK,         desc="unblock";
   EXCLUSIVE_UNBLOCK,         desc="exclusive unblock";
+  INV, desc="Invalidate from directory";
 }
 
 // RequestMsg
@@ -94,6 +126,7 @@ enumeration(DMAResponseType, desc="...", default="DMAResponseType_NULL") {
 structure(DMARequestMsg, desc="...", interface="NetworkMessage") {
   DMARequestType Type,       desc="Request type (read/write)";
   Address PhysicalAddress,   desc="Physical address for this request";
+  Address LineAddress,       desc="Line address for this request";
   NetDest Destination,       desc="Destination";
   DataBlock DataBlk,         desc="DataBlk attached to this request";
   int Offset,                desc="The offset into the datablock";
@@ -104,6 +137,7 @@ structure(DMARequestMsg, desc="...", interface="NetworkMessage") {
 structure(DMAResponseMsg, desc="...", interface="NetworkMessage") {
   DMAResponseType Type,      desc="Response type (DATA/ACK)";
   Address PhysicalAddress,   desc="Physical address for this request";
+  Address LineAddress,       desc="Line address for this request";
   NetDest Destination,       desc="Destination";
   DataBlock DataBlk,         desc="DataBlk attached to this request";
   MessageSizeType MessageSize, desc="size category of the message";