From 676ae5782796bd6b4f202e138b72c1048efceabd Mon Sep 17 00:00:00 2001 From: Nilay Vaish Date: Mon, 3 Aug 2015 22:44:29 -0500 Subject: [PATCH] ruby: mesi three level: multiple corrections to the protocol 1. Eliminate state NP in L0 and L1 Caches: The two states 'NP' and 'I' both mean that the cache block is not present in the cache. 'I' also means that the cache entry has been allocated. This causes problems when we do not correctly initialize the cache entry when it is re-used. Hence, this patch eliminates the state NP altogether. Everytime a new block comes into the cache, a cache entry is allocated. Everytime a block leaves, the corresponding entry is deallocated. 2. Separate transient state for instruction fetches: purely for accouting purposes. 3. Drop state IS_I in L1 Cache and the message type STALE_DATA: when invalidation is received for a block in IS, the block used to be moved to IS_I. This meant that the data that would arrive in future would be used but not stored since the controller lost the permissions after gaining them. This state is being dropped and now invalidation messages would not processed till the data has arrived. This also means that STALE_DATA type is not longer required. --- src/mem/protocol/MESI_Three_Level-L0cache.sm | 70 +++++++++------ src/mem/protocol/MESI_Three_Level-L1cache.sm | 90 +++++--------------- src/mem/protocol/MESI_Three_Level-msg.sm | 5 -- 3 files changed, 66 insertions(+), 99 deletions(-) diff --git a/src/mem/protocol/MESI_Three_Level-L0cache.sm b/src/mem/protocol/MESI_Three_Level-L0cache.sm index 8ab0a92cb..5bbc83bd0 100644 --- a/src/mem/protocol/MESI_Three_Level-L0cache.sm +++ b/src/mem/protocol/MESI_Three_Level-L0cache.sm @@ -48,9 +48,6 @@ machine(L0Cache, "MESI Directory L0 Cache") // Base states // The cache entry has not been allocated. - NP, AccessPermission:Invalid, desc="Not present in either cache"; - - // The cache entry has been allocated, but is not in use. I, AccessPermission:Invalid; // The cache entry is in shared mode. The processor can read this entry @@ -67,6 +64,10 @@ machine(L0Cache, "MESI Directory L0 Cache") // Transient States + // The cache controller has requested an instruction. It will be stored + // in the shared state so that the processor can read it. + Inst_IS, AccessPermission:Busy; + // The cache controller has requested that this entry be fetched in // shared state so that the processor can read it. IS, AccessPermission:Busy; @@ -100,7 +101,6 @@ machine(L0Cache, "MESI Directory L0 Cache") Data, desc="Data for processor"; Data_Exclusive, desc="Data for processor"; - Data_Stale, desc="Data for processor, but not for storage"; Ack, desc="Ack for processor"; Ack_all, desc="Last ack for processor"; @@ -172,7 +172,7 @@ machine(L0Cache, "MESI Directory L0 Cache") } else if (is_valid(cache_entry)) { return cache_entry.CacheState; } - return State:NP; + return State:I; } void setState(TBE tbe, Entry cache_entry, Address addr, State state) { @@ -266,8 +266,6 @@ machine(L0Cache, "MESI Directory L0 Cache") trigger(Event:Data_Exclusive, in_msg.Addr, cache_entry, tbe); } else if(in_msg.Class == CoherenceClass:DATA) { trigger(Event:Data, in_msg.Addr, cache_entry, tbe); - } else if(in_msg.Class == CoherenceClass:STALE_DATA) { - trigger(Event:Data_Stale, in_msg.Addr, cache_entry, tbe); } else if (in_msg.Class == CoherenceClass:ACK) { trigger(Event:Ack, in_msg.Addr, cache_entry, tbe); } else if (in_msg.Class == CoherenceClass:WB_ACK) { @@ -421,6 +419,7 @@ machine(L0Cache, "MESI Directory L0 Cache") out_msg.Dest := createMachineID(MachineType:L1Cache, version); out_msg.MessageSize := MessageSizeType:Writeback_Data; } + cache_entry.Dirty := false; } action(fi_sendInvAck, "fi", desc="send data to the L2 cache") { @@ -447,13 +446,13 @@ machine(L0Cache, "MESI Directory L0 Cache") assert(is_valid(cache_entry)); out_msg.Addr := address; out_msg.Class := CoherenceClass:PUTX; - out_msg.DataBlk := cache_entry.DataBlk; out_msg.Dirty := cache_entry.Dirty; out_msg.Sender:= machineID; out_msg.Dest := createMachineID(MachineType:L1Cache, version); if (cache_entry.Dirty) { out_msg.MessageSize := MessageSizeType:Writeback_Data; + out_msg.DataBlk := cache_entry.DataBlk; } else { out_msg.MessageSize := MessageSizeType:Writeback_Control; } @@ -466,6 +465,12 @@ machine(L0Cache, "MESI Directory L0 Cache") sequencer.readCallback(address, cache_entry.DataBlk); } + action(hx_load_hit, "hx", desc="If not prefetch, notify sequencer the load completed.") { + assert(is_valid(cache_entry)); + DPRINTF(RubySlicc, "%s\n", cache_entry.DataBlk); + sequencer.readCallback(address, cache_entry.DataBlk, true); + } + action(hh_store_hit, "\h", desc="If not prefetch, notify sequencer that store completed.") { assert(is_valid(cache_entry)); DPRINTF(RubySlicc, "%s\n", cache_entry.DataBlk); @@ -473,6 +478,13 @@ machine(L0Cache, "MESI Directory L0 Cache") cache_entry.Dirty := true; } + action(hhx_store_hit, "\hx", desc="If not prefetch, notify sequencer that store completed.") { + assert(is_valid(cache_entry)); + DPRINTF(RubySlicc, "%s\n", cache_entry.DataBlk); + sequencer.writeCallback(address, cache_entry.DataBlk, true); + cache_entry.Dirty := true; + } + action(i_allocateTBE, "i", desc="Allocate TBE (number of invalidates=0)") { check_allocate(TBEs); assert(is_valid(cache_entry)); @@ -505,7 +517,13 @@ machine(L0Cache, "MESI Directory L0 Cache") peek(messgeBuffer_in, CoherenceMsg) { assert(is_valid(cache_entry)); cache_entry.DataBlk := in_msg.DataBlk; - cache_entry.Dirty := in_msg.Dirty; + } + } + + action(u_writeInstToCache, "ui", desc="Write data to cache") { + peek(messgeBuffer_in, CoherenceMsg) { + assert(is_valid(cache_entry)); + cache_entry.DataBlk := in_msg.DataBlk; } } @@ -560,16 +578,12 @@ machine(L0Cache, "MESI Directory L0 Cache") //***************************************************** // Transitions for Load/Store/Replacement/WriteBack from transient states - transition({IS, IM, SM}, {Load, Ifetch, Store, L0_Replacement}) { + transition({Inst_IS, IS, IM, SM}, {Load, Ifetch, Store, L0_Replacement}) { z_stallAndWaitMandatoryQueue; } // Transitions from Idle - transition({NP,I}, L0_Replacement) { - ff_deallocateCacheBlock; - } - - transition({NP,I}, Load, IS) { + transition(I, Load, IS) { oo_allocateDCacheBlock; i_allocateTBE; a_issueGETS; @@ -577,7 +591,7 @@ machine(L0Cache, "MESI Directory L0 Cache") k_popMandatoryQueue; } - transition({NP,I}, Ifetch, IS) { + transition(I, Ifetch, Inst_IS) { pp_allocateICacheBlock; i_allocateTBE; a_issueGETS; @@ -585,7 +599,7 @@ machine(L0Cache, "MESI Directory L0 Cache") k_popMandatoryQueue; } - transition({NP,I}, Store, IM) { + transition(I, Store, IM) { oo_allocateDCacheBlock; i_allocateTBE; b_issueGETX; @@ -593,7 +607,7 @@ machine(L0Cache, "MESI Directory L0 Cache") k_popMandatoryQueue; } - transition({NP, I, IS, IM}, Inv) { + transition({I, IS, IM, Inst_IS}, Inv) { fi_sendInvAck; l_popRequestQueue; } @@ -682,7 +696,7 @@ machine(L0Cache, "MESI Directory L0 Cache") transition(IS, Data, S) { u_writeDataToCache; - h_load_hit; + hx_load_hit; s_deallocateTBE; o_popIncomingResponseQueue; kd_wakeUpDependents; @@ -690,15 +704,23 @@ machine(L0Cache, "MESI Directory L0 Cache") transition(IS, Data_Exclusive, E) { u_writeDataToCache; - h_load_hit; + hx_load_hit; s_deallocateTBE; o_popIncomingResponseQueue; kd_wakeUpDependents; } - transition(IS, Data_Stale, I) { - u_writeDataToCache; - h_load_hit; + transition(Inst_IS, Data, S) { + u_writeInstToCache; + hx_load_hit; + s_deallocateTBE; + o_popIncomingResponseQueue; + kd_wakeUpDependents; + } + + transition(Inst_IS, Data_Exclusive, E) { + u_writeInstToCache; + hx_load_hit; s_deallocateTBE; o_popIncomingResponseQueue; kd_wakeUpDependents; @@ -706,7 +728,7 @@ machine(L0Cache, "MESI Directory L0 Cache") transition({IM,SM}, Data_Exclusive, M) { u_writeDataToCache; - hh_store_hit; + hhx_store_hit; s_deallocateTBE; o_popIncomingResponseQueue; kd_wakeUpDependents; diff --git a/src/mem/protocol/MESI_Three_Level-L1cache.sm b/src/mem/protocol/MESI_Three_Level-L1cache.sm index 7db3daede..737430765 100644 --- a/src/mem/protocol/MESI_Three_Level-L1cache.sm +++ b/src/mem/protocol/MESI_Three_Level-L1cache.sm @@ -59,7 +59,6 @@ machine(L1Cache, "MESI Directory L1 Cache CMP") // STATES state_declaration(State, desc="Cache states", default="L1Cache_State_I") { // Base states - NP, AccessPermission:Invalid, desc="Not present in either cache"; I, AccessPermission:Invalid, desc="a L1 cache entry Idle"; S, AccessPermission:Read_Only, desc="a L1 cache entry Shared"; SS, AccessPermission:Read_Only, desc="a L1 cache entry Shared"; @@ -72,8 +71,6 @@ machine(L1Cache, "MESI Directory L1 Cache CMP") IS, AccessPermission:Busy, desc="L1 idle, issued GETS, have not seen response yet"; IM, AccessPermission:Busy, desc="L1 idle, issued GETX, have not seen response yet"; SM, AccessPermission:Read_Only, desc="L1 idle, issued GETX, have not seen response yet"; - IS_I, AccessPermission:Busy, desc="L1 idle, issued GETS, saw Inv before data because directory doesn't block on GETS hit"; - M_I, AccessPermission:Busy, desc="L1 replacing, waiting for ACK"; SINK_WB_ACK, AccessPermission:Busy, desc="This is to sink WB_Acks from L2"; @@ -174,7 +171,7 @@ machine(L1Cache, "MESI Directory L1 Cache CMP") } else if (is_valid(cache_entry)) { return cache_entry.CacheState; } - return State:NP; + return State:I; } void setState(TBE tbe, Entry cache_entry, Address addr, State state) { @@ -279,8 +276,7 @@ machine(L1Cache, "MESI Directory L1 Cache CMP") if(in_msg.Type == CoherenceResponseType:DATA_EXCLUSIVE) { trigger(Event:Data_Exclusive, in_msg.Addr, cache_entry, tbe); } else if(in_msg.Type == CoherenceResponseType:DATA) { - if ((getState(tbe, cache_entry, in_msg.Addr) == State:IS || - getState(tbe, cache_entry, in_msg.Addr) == State:IS_I) && + if (getState(tbe, cache_entry, in_msg.Addr) == State:IS && machineIDToMachineType(in_msg.Sender) == MachineType:L1Cache) { trigger(Event:DataS_fromL1, in_msg.Addr, cache_entry, tbe); @@ -561,13 +557,13 @@ machine(L1Cache, "MESI Directory L1 Cache CMP") assert(is_valid(cache_entry)); out_msg.Addr := address; out_msg.Type := CoherenceRequestType:PUTX; - out_msg.DataBlk := cache_entry.DataBlk; out_msg.Dirty := cache_entry.Dirty; out_msg.Requestor:= machineID; out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache, l2_select_low_bit, l2_select_num_bits, clusterID)); if (cache_entry.Dirty) { out_msg.MessageSize := MessageSizeType:Writeback_Data; + out_msg.DataBlk := cache_entry.DataBlk; } else { out_msg.MessageSize := MessageSizeType:Writeback_Control; } @@ -608,21 +604,6 @@ machine(L1Cache, "MESI Directory L1 Cache CMP") out_msg.Sender := machineID; out_msg.Dest := createMachineID(MachineType:L0Cache, version); out_msg.DataBlk := cache_entry.DataBlk; - out_msg.Dirty := cache_entry.Dirty; - out_msg.MessageSize := MessageSizeType:Response_Data; - } - } - - action(h_stale_data_to_l0, "hs", desc="If not prefetch, send data to the L0 cache.") { - enqueue(bufferToL0_out, CoherenceMsg, l1_response_latency) { - assert(is_valid(cache_entry)); - - out_msg.Addr := address; - out_msg.Class := CoherenceClass:STALE_DATA; - out_msg.Sender := machineID; - out_msg.Dest := createMachineID(MachineType:L0Cache, version); - out_msg.DataBlk := cache_entry.DataBlk; - out_msg.Dirty := cache_entry.Dirty; out_msg.MessageSize := MessageSizeType:Response_Data; } } @@ -639,7 +620,7 @@ machine(L1Cache, "MESI Directory L1 Cache CMP") out_msg.Dirty := cache_entry.Dirty; out_msg.MessageSize := MessageSizeType:Response_Data; - cache_entry.Dirty := true; + //cache_entry.Dirty := true; } } @@ -674,8 +655,10 @@ machine(L1Cache, "MESI Directory L1 Cache CMP") action(u_writeDataFromL0Request, "ureql0", desc="Write data to cache") { peek(messageBufferFromL0_in, CoherenceMsg) { assert(is_valid(cache_entry)); - cache_entry.DataBlk := in_msg.DataBlk; - cache_entry.Dirty := in_msg.Dirty; + if (in_msg.Dirty) { + cache_entry.DataBlk := in_msg.DataBlk; + cache_entry.Dirty := in_msg.Dirty; + } } } @@ -683,15 +666,16 @@ machine(L1Cache, "MESI Directory L1 Cache CMP") peek(responseNetwork_in, ResponseMsg) { assert(is_valid(cache_entry)); cache_entry.DataBlk := in_msg.DataBlk; - cache_entry.Dirty := in_msg.Dirty; } } action(u_writeDataFromL0Response, "uresl0", desc="Write data to cache") { peek(messageBufferFromL0_in, CoherenceMsg) { assert(is_valid(cache_entry)); - cache_entry.DataBlk := in_msg.DataBlk; - cache_entry.Dirty := in_msg.Dirty; + if (in_msg.Dirty) { + cache_entry.DataBlk := in_msg.DataBlk; + cache_entry.Dirty := in_msg.Dirty; + } } } @@ -745,17 +729,12 @@ machine(L1Cache, "MESI Directory L1 Cache CMP") //***************************************************** // Transitions for Load/Store/Replacement/WriteBack from transient states - transition({IS, IM, IS_I, M_I, SM, SINK_WB_ACK, S_IL0, M_IL0, E_IL0, MM_IL0}, + transition({IS, IM, M_I, SM, SINK_WB_ACK, S_IL0, M_IL0, E_IL0, MM_IL0}, {Load, Store, L1_Replacement}) { z0_stallAndWaitL0Queue; } - // Transitions from Idle - transition({NP,I}, L1_Replacement) { - ff_deallocateCacheBlock; - } - - transition({NP,I}, Load, IS) { + transition(I, Load, IS) { oo_allocateCacheBlock; i_allocateTBE; a_issueGETS; @@ -763,7 +742,7 @@ machine(L1Cache, "MESI Directory L1 Cache CMP") k_popL0RequestQueue; } - transition({NP,I}, Store, IM) { + transition(I, Store, IM) { oo_allocateCacheBlock; i_allocateTBE; b_issueGETX; @@ -771,7 +750,7 @@ machine(L1Cache, "MESI Directory L1 Cache CMP") k_popL0RequestQueue; } - transition({NP, I}, Inv) { + transition(I, Inv) { fi_sendInvAck; l_popL2RequestQueue; } @@ -869,6 +848,7 @@ machine(L1Cache, "MESI Directory L1 Cache CMP") transition(M_I, WB_Ack, I) { s_deallocateTBE; o_popL2ResponseQueue; + ff_deallocateCacheBlock; kd_wakeUpDependents; } @@ -885,6 +865,7 @@ machine(L1Cache, "MESI Directory L1 Cache CMP") transition(MM, Fwd_GETX, I) { d_sendDataToRequestor; + ff_deallocateCacheBlock; l_popL2RequestQueue; } @@ -910,11 +891,6 @@ machine(L1Cache, "MESI Directory L1 Cache CMP") } // Transitions from IS - transition({IS,IS_I}, Inv, IS_I) { - fi_sendInvAck; - l_popL2RequestQueue; - } - transition(IS, Data_all_Acks, S) { u_writeDataFromL2Response; h_data_to_l0; @@ -923,14 +899,6 @@ machine(L1Cache, "MESI Directory L1 Cache CMP") kd_wakeUpDependents; } - transition(IS_I, Data_all_Acks, I) { - u_writeDataFromL2Response; - h_stale_data_to_l0; - s_deallocateTBE; - o_popL2ResponseQueue; - kd_wakeUpDependents; - } - transition(IS, DataS_fromL1, S) { u_writeDataFromL2Response; j_sendUnblock; @@ -940,15 +908,6 @@ machine(L1Cache, "MESI Directory L1 Cache CMP") kd_wakeUpDependents; } - transition(IS_I, DataS_fromL1, I) { - u_writeDataFromL2Response; - j_sendUnblock; - h_stale_data_to_l0; - s_deallocateTBE; - o_popL2ResponseQueue; - kd_wakeUpDependents; - } - // directory is blocked when sending exclusive data transition(IS, Data_Exclusive, E) { u_writeDataFromL2Response; @@ -959,16 +918,6 @@ machine(L1Cache, "MESI Directory L1 Cache CMP") kd_wakeUpDependents; } - // directory is blocked when sending exclusive data - transition(IS_I, Data_Exclusive, E) { - u_writeDataFromL2Response; - hh_xdata_to_l0; - jj_sendExclusiveUnblock; - s_deallocateTBE; - o_popL2ResponseQueue; - kd_wakeUpDependents; - } - // Transitions from IM transition({IM,SM}, Inv, IM) { fi_sendInvAck; @@ -1015,6 +964,7 @@ machine(L1Cache, "MESI Directory L1 Cache CMP") transition(SINK_WB_ACK, WB_Ack, I){ s_deallocateTBE; o_popL2ResponseQueue; + ff_deallocateCacheBlock; kd_wakeUpDependents; } @@ -1058,7 +1008,7 @@ machine(L1Cache, "MESI Directory L1 Cache CMP") z2_stallAndWaitL2Queue; } - transition({S_IL0, M_IL0, E_IL0, MM_IL0}, {Inv, Fwd_GETX, Fwd_GETS}) { + transition({IS, S_IL0, M_IL0, E_IL0, MM_IL0}, {Inv, Fwd_GETX, Fwd_GETS}) { z2_stallAndWaitL2Queue; } } diff --git a/src/mem/protocol/MESI_Three_Level-msg.sm b/src/mem/protocol/MESI_Three_Level-msg.sm index 968d26964..1769995a0 100644 --- a/src/mem/protocol/MESI_Three_Level-msg.sm +++ b/src/mem/protocol/MESI_Three_Level-msg.sm @@ -46,11 +46,6 @@ enumeration(CoherenceClass, desc="...") { DATA, desc="Data block for L1 cache in S state"; DATA_EXCLUSIVE, desc="Data block for L1 cache in M/E state"; ACK, desc="Generic invalidate ack"; - - // This is a special case in which the L1 cache lost permissions to the - // shared block before it got the data. So the L0 cache can use the data - // but not store it. - STALE_DATA; } // Class for messages sent between the L0 and the L1 controllers. -- 2.30.2