MESI: Add queues for stalled requests

[gem5.git] / src / mem / protocol / MESI_CMP_directory-L1cache.sm
diff --git a/src/mem/protocol/MESI_CMP_directory-L1cache.sm b/src/mem/protocol/MESI_CMP_directory-L1cache.sm

index 8744a71225abaf234d29eabec7fc6adb708f8f56..91be3933f08899988cf340df5ffedc7f76cf9177 100644 (file)
--- a/src/mem/protocol/MESI_CMP_directory-L1cache.sm
+++ b/src/mem/protocol/MESI_CMP_directory-L1cache.sm
@@ -27,48 +27,48 @@
   * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   */
  
-machine(L1Cache, "MSI Directory L1 Cache CMP")
+machine(L1Cache, "MESI Directory L1 Cache CMP")
   : Sequencer * sequencer,
     CacheMemory * L1IcacheMemory,
     CacheMemory * L1DcacheMemory,
     int l2_select_num_bits,
     int l1_request_latency = 2,
     int l1_response_latency = 2,
-   int to_l2_latency = 1
+   int to_l2_latency = 1,
+   bool send_evictions
  {
    // NODE L1 CACHE
    // From this node's L1 cache TO the network
    // a local L1 -> this L2 bank, currently ordered with directory forwarded requests
-  MessageBuffer requestFromL1Cache, network="To", virtual_network="0", ordered="false";
+  MessageBuffer requestFromL1Cache, network="To", virtual_network="0", ordered="false", vnet_type="request";
    // a local L1 -> this L2 bank
-  MessageBuffer responseFromL1Cache, network="To", virtual_network="1", ordered="false";
-  MessageBuffer unblockFromL1Cache, network="To", virtual_network="2", ordered="false";
+  MessageBuffer responseFromL1Cache, network="To", virtual_network="1", ordered="false", vnet_type="response";
+  MessageBuffer unblockFromL1Cache, network="To", virtual_network="2", ordered="false", vnet_type="unblock";
  
  
    // To this node's L1 cache FROM the network
    // a L2 bank -> this L1
-  MessageBuffer requestToL1Cache, network="From", virtual_network="0", ordered="false";
+  MessageBuffer requestToL1Cache, network="From", virtual_network="0", ordered="false", vnet_type="request";
    // a L2 bank -> this L1
-  MessageBuffer responseToL1Cache, network="From", virtual_network="1", ordered="false";
+  MessageBuffer responseToL1Cache, network="From", virtual_network="1", ordered="false", vnet_type="response";
  
    // STATES
-  enumeration(State, desc="Cache states", default="L1Cache_State_I") {
+  state_declaration(State, desc="Cache states", default="L1Cache_State_I") {
      // Base states
-    NP, desc="Not present in either cache";
-    I, desc="a L1 cache entry Idle";
-    S, desc="a L1 cache entry Shared";
-    E, desc="a L1 cache entry Exclusive";
-    M, desc="a L1 cache entry Modified", format="!b";
+    NP, AccessPermission:Invalid, desc="Not present in either cache";
+    I, AccessPermission:Invalid, desc="a L1 cache entry Idle";
+    S, AccessPermission:Read_Only, desc="a L1 cache entry Shared";
+    E, AccessPermission:Read_Only, desc="a L1 cache entry Exclusive";
+    M, AccessPermission:Read_Write, desc="a L1 cache entry Modified", format="!b";
  
      // Transient States
-    IS, desc="L1 idle, issued GETS, have not seen response yet";
-    IM, desc="L1 idle, issued GETX, have not seen response yet";
-    SM, desc="L1 idle, issued GETX, have not seen response yet";
-    IS_I, desc="L1 idle, issued GETS, saw Inv before data because directory doesn't block on GETS hit";
+    IS, AccessPermission:Busy, desc="L1 idle, issued GETS, have not seen response yet";
+    IM, AccessPermission:Busy, desc="L1 idle, issued GETX, have not seen response yet";
+    SM, AccessPermission:Read_Only, desc="L1 idle, issued GETX, have not seen response yet";
+    IS_I, AccessPermission:Busy, desc="L1 idle, issued GETS, saw Inv before data because directory doesn't block on GETS hit";
  
-    M_I, desc="L1 replacing, waiting for ACK";
-    E_I, desc="L1 replacing, waiting for ACK";
-    SINK_WB_ACK, desc="This is to sink WB_Acks from L2";
+    M_I, AccessPermission:Busy, desc="L1 replacing, waiting for ACK";
+    SINK_WB_ACK, AccessPermission:Busy, desc="This is to sink WB_Acks from L2";
  
    }
  
@@ -119,7 +119,7 @@ machine(L1Cache, "MSI Directory L1 Cache CMP")
      int pendingAcks, default="0", desc="number of pending acks";
    }
  
-  external_type(TBETable) {
+  structure(TBETable, external="yes") {
      TBE lookup(Address);
      void allocate(Address);
      void deallocate(Address);
@@ -130,13 +130,13 @@ machine(L1Cache, "MSI Directory L1 Cache CMP")
  
    MessageBuffer mandatoryQueue, ordered="false";
  
-  int cache_state_to_int(State state);
    int l2_select_low_bit, default="RubySystem::getBlockSizeBits()";
  
    void set_cache_entry(AbstractCacheEntry a);
    void unset_cache_entry();
    void set_tbe(TBE a);
    void unset_tbe();
+  void wakeUpBuffers(Address a);
  
    // inclusive cache returns L1 entries only
    Entry getCacheEntry(Address addr), return_by_pointer="yes" {
@@ -180,29 +180,45 @@ machine(L1Cache, "MSI Directory L1 Cache CMP")
  
      if (is_valid(cache_entry)) {
        cache_entry.CacheState := state;
+    }
+  }
  
-      // Set permission
-      if (state == State:I) {
-        cache_entry.changePermission(AccessPermission:Invalid);
-      } else if (state == State:S || state == State:E) {
-        cache_entry.changePermission(AccessPermission:Read_Only);
-      } else if (state == State:M) {
-        cache_entry.changePermission(AccessPermission:Read_Write);
-      } else {
-        cache_entry.changePermission(AccessPermission:Busy);
-      }
+  AccessPermission getAccessPermission(Address addr) {
+    TBE tbe := L1_TBEs[addr];
+    if(is_valid(tbe)) {
+      DPRINTF(RubySlicc, "%s\n", L1Cache_State_to_permission(tbe.TBEState));
+      return L1Cache_State_to_permission(tbe.TBEState);
+    }
+
+    Entry cache_entry := getCacheEntry(addr);
+    if(is_valid(cache_entry)) {
+      DPRINTF(RubySlicc, "%s\n", L1Cache_State_to_permission(cache_entry.CacheState));
+      return L1Cache_State_to_permission(cache_entry.CacheState);
      }
+
+    DPRINTF(RubySlicc, "%s\n", AccessPermission:NotPresent);
+    return AccessPermission:NotPresent;
+  }
+
+  DataBlock getDataBlock(Address addr), return_by_ref="yes" {
+    return getCacheEntry(addr).DataBlk;
    }
  
-  Event mandatory_request_type_to_event(CacheRequestType type) {
-    if (type == CacheRequestType:LD) {
+  void setAccessPermission(Entry cache_entry, Address addr, State state) {
+    if (is_valid(cache_entry)) {
+      cache_entry.changePermission(L1Cache_State_to_permission(state));
+    }
+  }
+
+  Event mandatory_request_type_to_event(RubyRequestType type) {
+    if (type == RubyRequestType:LD) {
        return Event:Load;
-    } else if (type == CacheRequestType:IFETCH) {
+    } else if (type == RubyRequestType:IFETCH) {
        return Event:Ifetch;
-    } else if ((type == CacheRequestType:ST) || (type == CacheRequestType:ATOMIC)) {
+    } else if ((type == RubyRequestType:ST) || (type == RubyRequestType:ATOMIC)) {
        return Event:Store;
      } else {
-      error("Invalid CacheRequestType");
+      error("Invalid RubyRequestType");
      }
    }
  
@@ -215,7 +231,7 @@ machine(L1Cache, "MSI Directory L1 Cache CMP")
    out_port(unblockNetwork_out, ResponseMsg, unblockFromL1Cache);
  
    // Response IntraChip L1 Network - response msg to this L1 cache
-  in_port(responseIntraChipL1Network_in, ResponseMsg, responseToL1Cache) {
+  in_port(responseIntraChipL1Network_in, ResponseMsg, responseToL1Cache, rank = 2) {
      if (responseIntraChipL1Network_in.isReady()) {
        peek(responseIntraChipL1Network_in, ResponseMsg, block_on="Address") {
          assert(in_msg.Destination.isElement(machineID));
@@ -253,7 +269,7 @@ machine(L1Cache, "MSI Directory L1 Cache CMP")
    }
  
    // Request InterChip network - request from this L1 cache to the shared L2
-  in_port(requestIntraChipL1Network_in, RequestMsg, requestToL1Cache) {
+  in_port(requestIntraChipL1Network_in, RequestMsg, requestToL1Cache, rank = 1) {
      if(requestIntraChipL1Network_in.isReady()) {
        peek(requestIntraChipL1Network_in, RequestMsg, block_on="Address") {
          assert(in_msg.Destination.isElement(machineID));
@@ -278,29 +294,30 @@ machine(L1Cache, "MSI Directory L1 Cache CMP")
    }
  
    // Mandatory Queue betweens Node's CPU and it's L1 caches
-  in_port(mandatoryQueue_in, CacheMsg, mandatoryQueue, desc="...") {
+  in_port(mandatoryQueue_in, RubyRequest, mandatoryQueue, desc="...", rank = 0) {
      if (mandatoryQueue_in.isReady()) {
-      peek(mandatoryQueue_in, CacheMsg, block_on="LineAddress") {
+      peek(mandatoryQueue_in, RubyRequest, block_on="LineAddress") {
  
          // Check for data access to blocks in I-cache and ifetchs to blocks in D-cache
  
-        if (in_msg.Type == CacheRequestType:IFETCH) {
+        if (in_msg.Type == RubyRequestType:IFETCH) {
            // ** INSTRUCTION ACCESS ***
  
-          // Check to see if it is in the OTHER L1
-          Entry L1Dcache_entry := getL1DCacheEntry(in_msg.LineAddress);
-          if (is_valid(L1Dcache_entry)) {
-            // The block is in the wrong L1, put the request on the queue to the shared L2
-            trigger(Event:L1_Replacement, in_msg.LineAddress,
-                    L1Dcache_entry, L1_TBEs[in_msg.LineAddress]);
-          }
-
            Entry L1Icache_entry := getL1ICacheEntry(in_msg.LineAddress);
            if (is_valid(L1Icache_entry)) {
              // The tag matches for the L1, so the L1 asks the L2 for it.
              trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress,
                      L1Icache_entry, L1_TBEs[in_msg.LineAddress]);
            } else {
+
+            // Check to see if it is in the OTHER L1
+            Entry L1Dcache_entry := getL1DCacheEntry(in_msg.LineAddress);
+            if (is_valid(L1Dcache_entry)) {
+              // The block is in the wrong L1, put the request on the queue to the shared L2
+              trigger(Event:L1_Replacement, in_msg.LineAddress,
+                      L1Dcache_entry, L1_TBEs[in_msg.LineAddress]);
+            }
+
              if (L1IcacheMemory.cacheAvail(in_msg.LineAddress)) {
                // L1 does't have the line, but we have space for it in the L1 so let's see if the L2 has it
                trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress,
@@ -313,21 +330,23 @@ machine(L1Cache, "MSI Directory L1 Cache CMP")
              }
            }
          } else {
-          // *** DATA ACCESS ***
-          // Check to see if it is in the OTHER L1
-          Entry L1Icache_entry := getL1ICacheEntry(in_msg.LineAddress);
-          if (is_valid(L1Icache_entry)) {
-            // The block is in the wrong L1, put the request on the queue to the shared L2
-            trigger(Event:L1_Replacement, in_msg.LineAddress,
-                    L1Icache_entry, L1_TBEs[in_msg.LineAddress]);
-          }
  
+          // *** DATA ACCESS ***
            Entry L1Dcache_entry := getL1DCacheEntry(in_msg.LineAddress);
            if (is_valid(L1Dcache_entry)) {
              // The tag matches for the L1, so the L1 ask the L2 for it
              trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress,
                      L1Dcache_entry, L1_TBEs[in_msg.LineAddress]);
            } else {
+
+            // Check to see if it is in the OTHER L1
+            Entry L1Icache_entry := getL1ICacheEntry(in_msg.LineAddress);
+            if (is_valid(L1Icache_entry)) {
+              // The block is in the wrong L1, put the request on the queue to the shared L2
+              trigger(Event:L1_Replacement, in_msg.LineAddress,
+                      L1Icache_entry, L1_TBEs[in_msg.LineAddress]);
+            }
+
              if (L1DcacheMemory.cacheAvail(in_msg.LineAddress)) {
                // L1 does't have the line, but we have space for it in the L1 let's see if the L2 has it
                trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress,
@@ -346,7 +365,7 @@ machine(L1Cache, "MSI Directory L1 Cache CMP")
  
    // ACTIONS
    action(a_issueGETS, "a", desc="Issue GETS") {
-    peek(mandatoryQueue_in, CacheMsg) {
+    peek(mandatoryQueue_in, RubyRequest) {
        enqueue(requestIntraChipL1Network_out, RequestMsg, latency=l1_request_latency) {
          out_msg.Address := address;
          out_msg.Type := CoherenceRequestType:GETS;
@@ -363,7 +382,7 @@ machine(L1Cache, "MSI Directory L1 Cache CMP")
    }
  
    action(ai_issueGETINSTR, "ai", desc="Issue GETINSTR") {
-    peek(mandatoryQueue_in, CacheMsg) {
+    peek(mandatoryQueue_in, RubyRequest) {
        enqueue(requestIntraChipL1Network_out, RequestMsg, latency=l1_request_latency) {
          out_msg.Address := address;
          out_msg.Type := CoherenceRequestType:GET_INSTR;
@@ -381,7 +400,7 @@ machine(L1Cache, "MSI Directory L1 Cache CMP")
  
  
    action(b_issueGETX, "b", desc="Issue GETX") {
-    peek(mandatoryQueue_in, CacheMsg) {
+    peek(mandatoryQueue_in, RubyRequest) {
        enqueue(requestIntraChipL1Network_out, RequestMsg, latency=l1_request_latency) {
          out_msg.Address := address;
          out_msg.Type := CoherenceRequestType:GETX;
@@ -399,7 +418,7 @@ machine(L1Cache, "MSI Directory L1 Cache CMP")
    }
  
    action(c_issueUPGRADE, "c", desc="Issue GETX") {
-    peek(mandatoryQueue_in, CacheMsg) {
+    peek(mandatoryQueue_in, RubyRequest) {
        enqueue(requestIntraChipL1Network_out, RequestMsg, latency= l1_request_latency) {
          out_msg.Address := address;
          out_msg.Type := CoherenceRequestType:UPGRADE;
@@ -526,6 +545,12 @@ machine(L1Cache, "MSI Directory L1 Cache CMP")
      }
    }
  
+  action(forward_eviction_to_cpu, "\cc", desc="sends eviction information to the processor") {
+    if (send_evictions) {
+      DPRINTF(RubySlicc, "Sending invalidation for %s to the CPU\n", address);
+      sequencer.evictionCallback(address);
+    }
+  }
  
    action(g_issuePUTX, "g", desc="send data to the L2 cache") {
      enqueue(requestIntraChipL1Network_out, RequestMsg, latency=l1_response_latency) {
@@ -629,9 +654,6 @@ machine(L1Cache, "MSI Directory L1 Cache CMP")
      }
    }
  
-  action(z_stall, "z", desc="Stall") {
-  }
-
    action(ff_deallocateL1CacheBlock, "\f", desc="Deallocate L1 cache block.  Sets the cache to not present, allowing a replacement in parallel with a fetch.") {
      if (L1DcacheMemory.isTagPresent(address)) {
        L1DcacheMemory.deallocate(address);
@@ -653,22 +675,33 @@ machine(L1Cache, "MSI Directory L1 Cache CMP")
      }
    }
  
-  action(zz_recycleRequestQueue, "zz", desc="recycle L1 request queue") {
-    requestIntraChipL1Network_in.recycle();
+  action(z_stallAndWaitMandatoryQueue, "\z", desc="recycle L1 request queue") {
+    stall_and_wait(mandatoryQueue_in, address);
    }
  
-  action(z_recycleMandatoryQueue, "\z", desc="recycle L1 request queue") {
-    mandatoryQueue_in.recycle();
+  action(kd_wakeUpDependents, "kd", desc="wake-up dependents") {
+    wakeUpBuffers(address);
    }
  
+  action(uu_profileInstMiss, "\ui", desc="Profile the demand miss") {
+    peek(mandatoryQueue_in, RubyRequest) {
+        L1IcacheMemory.profileMiss(in_msg);
+    }
+  }
+
+  action(uu_profileDataMiss, "\ud", desc="Profile the demand miss") {
+    peek(mandatoryQueue_in, RubyRequest) {
+        L1DcacheMemory.profileMiss(in_msg);
+    }
+  }
  
    //*****************************************************
    // TRANSITIONS
    //*****************************************************
  
    // Transitions for Load/Store/Replacement/WriteBack from transient states
-  transition({IS, IM, IS_I, M_I, E_I, SM}, {Load, Ifetch, Store, L1_Replacement}) {
-    z_recycleMandatoryQueue;
+  transition({IS, IM, IS_I, M_I, SM, SINK_WB_ACK}, {Load, Ifetch, Store, L1_Replacement}) {
+    z_stallAndWaitMandatoryQueue;
    }
  
    // Transitions from Idle
@@ -680,6 +713,7 @@ machine(L1Cache, "MSI Directory L1 Cache CMP")
      oo_allocateL1DCacheBlock;
      i_allocateTBE;
      a_issueGETS;
+    uu_profileDataMiss;
      k_popMandatoryQueue;
    }
  
@@ -687,6 +721,7 @@ machine(L1Cache, "MSI Directory L1 Cache CMP")
      pp_allocateL1ICacheBlock;
      i_allocateTBE;
      ai_issueGETINSTR;
+    uu_profileInstMiss;
      k_popMandatoryQueue;
    }
  
@@ -694,6 +729,7 @@ machine(L1Cache, "MSI Directory L1 Cache CMP")
      oo_allocateL1DCacheBlock;
      i_allocateTBE;
      b_issueGETX;
+    uu_profileDataMiss;
      k_popMandatoryQueue;
    }
  
@@ -711,14 +747,17 @@ machine(L1Cache, "MSI Directory L1 Cache CMP")
    transition(S, Store, SM) {
      i_allocateTBE;
      c_issueUPGRADE;
+    uu_profileDataMiss;
      k_popMandatoryQueue;
    }
  
    transition(S, L1_Replacement, I) {
+    forward_eviction_to_cpu;
      ff_deallocateL1CacheBlock;
    }
  
    transition(S, Inv, I) {
+    forward_eviction_to_cpu;
      fi_sendInvAck;
      l_popRequestQueue;
    }
@@ -737,6 +776,7 @@ machine(L1Cache, "MSI Directory L1 Cache CMP")
  
    transition(E, L1_Replacement, M_I) {
      // silent E replacement??
+    forward_eviction_to_cpu;
      i_allocateTBE;
      g_issuePUTX;   // send data, but hold in case forwarded request
      ff_deallocateL1CacheBlock;
@@ -744,11 +784,13 @@ machine(L1Cache, "MSI Directory L1 Cache CMP")
  
    transition(E, Inv, I) {
      // don't send data
+    forward_eviction_to_cpu;
      fi_sendInvAck;
      l_popRequestQueue;
    }
  
    transition(E, Fwd_GETX, I) {
+    forward_eviction_to_cpu;
      d_sendDataToRequestor;
      l_popRequestQueue;
    }
@@ -771,6 +813,7 @@ machine(L1Cache, "MSI Directory L1 Cache CMP")
    }
  
    transition(M, L1_Replacement, M_I) {
+    forward_eviction_to_cpu;
      i_allocateTBE;
      g_issuePUTX;   // send data, but hold in case forwarded request
      ff_deallocateL1CacheBlock;
@@ -779,9 +822,11 @@ machine(L1Cache, "MSI Directory L1 Cache CMP")
    transition(M_I, WB_Ack, I) {
      s_deallocateTBE;
      o_popIncomingResponseQueue;
+    kd_wakeUpDependents;
    }
  
    transition(M, Inv, I) {
+    forward_eviction_to_cpu;
      f_sendDataToL2;
      l_popRequestQueue;
    }
@@ -792,6 +837,7 @@ machine(L1Cache, "MSI Directory L1 Cache CMP")
    }
  
    transition(M, Fwd_GETX, I) {
+    forward_eviction_to_cpu;
      d_sendDataToRequestor;
      l_popRequestQueue;
    }
@@ -824,6 +870,7 @@ machine(L1Cache, "MSI Directory L1 Cache CMP")
      h_load_hit;
      s_deallocateTBE;
      o_popIncomingResponseQueue;
+    kd_wakeUpDependents;
    }
  
    transition(IS_I, Data_all_Acks, I) {
@@ -831,15 +878,16 @@ machine(L1Cache, "MSI Directory L1 Cache CMP")
      h_load_hit;
      s_deallocateTBE;
      o_popIncomingResponseQueue;
+    kd_wakeUpDependents;
    }
  
-
    transition(IS, DataS_fromL1, S) {
      u_writeDataToL1Cache;
      j_sendUnblock;
      h_load_hit;
      s_deallocateTBE;
      o_popIncomingResponseQueue;
+    kd_wakeUpDependents;
    }
  
    transition(IS_I, DataS_fromL1, I) {
@@ -848,6 +896,7 @@ machine(L1Cache, "MSI Directory L1 Cache CMP")
      h_load_hit;
      s_deallocateTBE;
      o_popIncomingResponseQueue;
+    kd_wakeUpDependents;
    }
  
    // directory is blocked when sending exclusive data
@@ -857,6 +906,7 @@ machine(L1Cache, "MSI Directory L1 Cache CMP")
      jj_sendExclusiveUnblock;
      s_deallocateTBE;
      o_popIncomingResponseQueue;
+    kd_wakeUpDependents;
    }
  
    transition(IS, Data_Exclusive, E) {
@@ -865,6 +915,7 @@ machine(L1Cache, "MSI Directory L1 Cache CMP")
      jj_sendExclusiveUnblock;
      s_deallocateTBE;
      o_popIncomingResponseQueue;
+    kd_wakeUpDependents;
    }
  
    // Transitions from IM
@@ -885,6 +936,7 @@ machine(L1Cache, "MSI Directory L1 Cache CMP")
      jj_sendExclusiveUnblock;
      s_deallocateTBE;
      o_popIncomingResponseQueue;
+    kd_wakeUpDependents;
    }
  
    // transitions from SM
@@ -898,11 +950,7 @@ machine(L1Cache, "MSI Directory L1 Cache CMP")
      hh_store_hit;
      s_deallocateTBE;
      o_popIncomingResponseQueue;
-  }
-
-  transition(SINK_WB_ACK, {Load, Store, Ifetch, L1_Replacement}){
-      z_recycleMandatoryQueue;
-
+    kd_wakeUpDependents;
    }
  
    transition(SINK_WB_ACK, Inv){
@@ -910,11 +958,9 @@ machine(L1Cache, "MSI Directory L1 Cache CMP")
      l_popRequestQueue;
    }
  
-  transition(SINK_WB_ACK, WB_Ack){
+  transition(SINK_WB_ACK, WB_Ack, I){
      s_deallocateTBE;
      o_popIncomingResponseQueue;
+    kd_wakeUpDependents;
    }
  }
-
-
-