From 3225379cc0ab38e559dd05a8b161d80f16e67d78 Mon Sep 17 00:00:00 2001 From: Lena Olson Date: Wed, 16 Sep 2015 20:18:40 -0500 Subject: [PATCH] ruby: Add missing block deallocations in MOESI_hammer Some blocks in MOESI hammer were not getting deallocated when they were set to an idle state (e.g. by invalidate or other_getx/s messages). While functionally correct, this caused some bad effects on performance, such as blocks in I in the L1s getting sent to the L2 upon eviction, in turn evicting valid blocks. Also, if a valid block was in LRU, that block could be evicted rather than a block in I. This patch adds in the missing deallocations. Committed by: Nilay Vaish --- src/mem/protocol/MOESI_hammer-cache.sm | 60 +++++++++++--------------- 1 file changed, 24 insertions(+), 36 deletions(-) diff --git a/src/mem/protocol/MOESI_hammer-cache.sm b/src/mem/protocol/MOESI_hammer-cache.sm index 5d2383541..cfb2526a5 100644 --- a/src/mem/protocol/MOESI_hammer-cache.sm +++ b/src/mem/protocol/MOESI_hammer-cache.sm @@ -89,7 +89,6 @@ machine({L1Cache, L2Cache}, "AMD Hammer-like protocol") OI, AccessPermission:Busy, "OI", desc="Issued PutO, waiting for ack"; MI, AccessPermission:Busy, "MI", desc="Issued PutX, waiting for ack"; II, AccessPermission:Busy, "II", desc="Issued PutX/O, saw Other_GETS or Other_GETX, waiting for ack"; - IT, AccessPermission:Busy, "IT", desc="Invalid block transferring to L1"; ST, AccessPermission:Busy, "ST", desc="S block transferring to L1"; OT, AccessPermission:Busy, "OT", desc="O block transferring to L1"; MT, AccessPermission:Busy, "MT", desc="M block transferring to L1"; @@ -1243,6 +1242,20 @@ machine({L1Cache, L2Cache}, "AMD Hammer-like protocol") unset_cache_entry(); } + action(gr_deallocateCacheBlock, "\gr", desc="Deallocate an L1 or L2 cache block.") { + if (L1Dcache.isTagPresent(address)) { + L1Dcache.deallocate(address); + } + else if (L1Icache.isTagPresent(address)){ + L1Icache.deallocate(address); + } + else { + assert(L2cache.isTagPresent(address)); + L2cache.deallocate(address); + } + unset_cache_entry(); + } + action(forward_eviction_to_cpu, "\cc", desc="sends eviction information to the processor") { if (send_evictions) { DPRINTF(RubySlicc, "Sending invalidation for %s to the CPU\n", address); @@ -1296,7 +1309,7 @@ machine({L1Cache, L2Cache}, "AMD Hammer-like protocol") //***************************************************** // Transitions for Load/Store/L2_Replacement from transient states - transition({IM, IM_F, MM_WF, SM, SM_F, ISM, ISM_F, OM, OM_F, IS, SS, OI, MI, II, IT, ST, OT, MT, MMT}, {Store, L2_Replacement}) { + transition({IM, IM_F, MM_WF, SM, SM_F, ISM, ISM_F, OM, OM_F, IS, SS, OI, MI, II, ST, OT, MT, MMT}, {Store, L2_Replacement}) { zz_stallAndWaitMandatoryQueue; } @@ -1308,11 +1321,11 @@ machine({L1Cache, L2Cache}, "AMD Hammer-like protocol") zz_stallAndWaitMandatoryQueue; } - transition({IM, IS, OI, MI, II, IT, ST, OT, MT, MMT, MI_F, MM_F, OM_F, IM_F, ISM_F, SM_F, MM_WF}, {Load, Ifetch}) { + transition({IM, IS, OI, MI, II, ST, OT, MT, MMT, MI_F, MM_F, OM_F, IM_F, ISM_F, SM_F, MM_WF}, {Load, Ifetch}) { zz_stallAndWaitMandatoryQueue; } - transition({IM, SM, ISM, OM, IS, SS, MM_W, M_W, OI, MI, II, IT, ST, OT, MT, MMT, IM_F, SM_F, ISM_F, OM_F, MM_WF, MI_F, MM_F, IR, SR, OR, MR, MMR}, L1_to_L2) { + transition({IM, SM, ISM, OM, IS, SS, MM_W, M_W, OI, MI, II, ST, OT, MT, MMT, IM_F, SM_F, ISM_F, OM_F, MM_WF, MI_F, MM_F, IR, SR, OR, MR, MMR}, L1_to_L2) { zz_stallAndWaitMandatoryQueue; } @@ -1324,7 +1337,7 @@ machine({L1Cache, L2Cache}, "AMD Hammer-like protocol") zz_stallAndWaitMandatoryQueue; } - transition({IT, ST, OT, MT, MMT}, {Other_GETX, NC_DMA_GETS, Other_GETS, Merged_GETS, Other_GETS_No_Mig, Invalidate, Flush_line}) { + transition({ST, OT, MT, MMT}, {Other_GETX, NC_DMA_GETS, Other_GETS, Merged_GETS, Other_GETS_No_Mig, Invalidate, Flush_line}) { z_stall; } @@ -1333,7 +1346,7 @@ machine({L1Cache, L2Cache}, "AMD Hammer-like protocol") } // Transitions moving data between the L1 and L2 caches - transition({I, S, O, M, MM}, L1_to_L2) { + transition({S, O, M, MM}, L1_to_L2) { i_allocateTBE; gg_deallocateL1CacheBlock; vv_allocateL2CacheBlock; @@ -1341,16 +1354,6 @@ machine({L1Cache, L2Cache}, "AMD Hammer-like protocol") s_deallocateTBE; } - transition(I, Trigger_L2_to_L1D, IT) { - i_allocateTBE; - rr_deallocateL2CacheBlock; - ii_allocateL1DCacheBlock; - nb_copyFromTBEToL1; // Not really needed for state I - s_deallocateTBE; - zz_stallAndWaitMandatoryQueue; - ll_L2toL1Transfer; - } - transition(S, Trigger_L2_to_L1D, ST) { i_allocateTBE; rr_deallocateL2CacheBlock; @@ -1391,16 +1394,6 @@ machine({L1Cache, L2Cache}, "AMD Hammer-like protocol") ll_L2toL1Transfer; } - transition(I, Trigger_L2_to_L1I, IT) { - i_allocateTBE; - rr_deallocateL2CacheBlock; - jj_allocateL1ICacheBlock; - nb_copyFromTBEToL1; - s_deallocateTBE; - zz_stallAndWaitMandatoryQueue; - ll_L2toL1Transfer; - } - transition(S, Trigger_L2_to_L1I, ST) { i_allocateTBE; rr_deallocateL2CacheBlock; @@ -1441,11 +1434,6 @@ machine({L1Cache, L2Cache}, "AMD Hammer-like protocol") ll_L2toL1Transfer; } - transition(IT, Complete_L2_to_L1, IR) { - j_popTriggerQueue; - kd_wakeUpDependents; - } - transition(ST, Complete_L2_to_L1, SR) { j_popTriggerQueue; kd_wakeUpDependents; @@ -1500,11 +1488,6 @@ machine({L1Cache, L2Cache}, "AMD Hammer-like protocol") k_popMandatoryQueue; } - transition(I, L2_Replacement) { - rr_deallocateL2CacheBlock; - ka_wakeUpAllDependents; - } - transition(I, {Other_GETX, NC_DMA_GETS, Other_GETS, Other_GETS_No_Mig, Invalidate}) { f_sendAck; l_popForwardQueue; @@ -1564,6 +1547,7 @@ machine({L1Cache, L2Cache}, "AMD Hammer-like protocol") transition(S, {Other_GETX, Invalidate}, I) { f_sendAck; forward_eviction_to_cpu; + gr_deallocateCacheBlock; l_popForwardQueue; } @@ -1630,6 +1614,7 @@ machine({L1Cache, L2Cache}, "AMD Hammer-like protocol") transition(O, {Other_GETX, Invalidate}, I) { e_sendData; forward_eviction_to_cpu; + gr_deallocateCacheBlock; l_popForwardQueue; } @@ -1712,12 +1697,14 @@ machine({L1Cache, L2Cache}, "AMD Hammer-like protocol") transition(MM, {Other_GETX, Invalidate}, I) { c_sendExclusiveData; forward_eviction_to_cpu; + gr_deallocateCacheBlock; l_popForwardQueue; } transition(MM, Other_GETS, I) { c_sendExclusiveData; forward_eviction_to_cpu; + gr_deallocateCacheBlock; l_popForwardQueue; } @@ -1778,6 +1765,7 @@ machine({L1Cache, L2Cache}, "AMD Hammer-like protocol") transition(M, {Other_GETX, Invalidate}, I) { c_sendExclusiveData; forward_eviction_to_cpu; + gr_deallocateCacheBlock; l_popForwardQueue; } -- 2.30.2