From 8ec2abb98a2482e41ec7287832cd2cd4b2009d26 Mon Sep 17 00:00:00 2001 From: Tiago Muck Date: Thu, 2 May 2019 18:41:13 -0500 Subject: [PATCH] mem-ruby: fix MOESI_CMP_directory functional reads MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit This patch properly sets the access permissions in all controllers. 'Busy' was used for all transient states, which is incorrect in lots of cases when we still hold a valid copy of the line and are able to handle a functional read. In the L2 controller these states were split to differentiate the access permissions: IFGXX -> IFGXX, IFGXXD IGMO -> IGMO, IGMOU IGMIOF -> IGMIOF, IGMIOFD Same for the dir. controller: IS -> IS, IS_M MM -> MM, MM_M The dir. controllers also has the states WBI/WBS for lines that have been queued for a writeback. In these states we hold the data in the TBE for replying to functional reads until the memory acks the write and we move to I or S. Other minor changes includes updated debug messages and asserts. Change-Id: Ie4f6eac3b4d2641ec91ac6b168a0a017f61c0d6f Signed-off-by: Tiago Mück Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/21927 Maintainer: Jason Lowe-Power Reviewed-by: Pouya Fotouhi Tested-by: kokoro --- configs/ruby/MOESI_CMP_directory.py | 1 + .../protocol/MOESI_CMP_directory-L1cache.sm | 23 +- .../protocol/MOESI_CMP_directory-L2cache.sm | 94 ++++---- .../ruby/protocol/MOESI_CMP_directory-dir.sm | 203 +++++++++++++----- .../ruby/protocol/MOESI_CMP_directory-dma.sm | 1 + .../ruby/protocol/MOESI_CMP_directory-msg.sm | 4 +- 6 files changed, 220 insertions(+), 106 deletions(-) diff --git a/configs/ruby/MOESI_CMP_directory.py b/configs/ruby/MOESI_CMP_directory.py index 2b7770a94..315d62be1 100644 --- a/configs/ruby/MOESI_CMP_directory.py +++ b/configs/ruby/MOESI_CMP_directory.py @@ -211,6 +211,7 @@ def create_system(options, full_system, system, dma_ports, bootmem, dir_cntrl.forwardFromDir.master = ruby_system.network.slave dir_cntrl.requestToMemory = MessageBuffer() dir_cntrl.responseFromMemory = MessageBuffer() + dir_cntrl.triggerQueue = MessageBuffer(ordered = True) for i, dma_port in enumerate(dma_ports): diff --git a/src/mem/ruby/protocol/MOESI_CMP_directory-L1cache.sm b/src/mem/ruby/protocol/MOESI_CMP_directory-L1cache.sm index d7b175c7e..a29fb5c4f 100644 --- a/src/mem/ruby/protocol/MOESI_CMP_directory-L1cache.sm +++ b/src/mem/ruby/protocol/MOESI_CMP_directory-L1cache.sm @@ -74,19 +74,20 @@ machine(MachineType:L1Cache, "L1 cache protocol") I, AccessPermission:Invalid, desc="Idle"; S, AccessPermission:Read_Only, desc="Shared"; O, AccessPermission:Read_Only, desc="Owned"; - M, AccessPermission:Read_Only, desc="Modified (dirty)"; - M_W, AccessPermission:Read_Only, desc="Modified (dirty)"; + M, AccessPermission:Read_Write, desc="Modified (dirty)"; + M_W, AccessPermission:Read_Write, desc="Modified (dirty)"; MM, AccessPermission:Read_Write, desc="Modified (dirty and locally modified)"; MM_W, AccessPermission:Read_Write, desc="Modified (dirty and locally modified)"; // Transient States + // Notice we still have a valid copy of the block in most states IM, AccessPermission:Busy, "IM", desc="Issued GetX"; + IS, AccessPermission:Busy, "IS", desc="Issued GetS"; SM, AccessPermission:Read_Only, "SM", desc="Issued GetX, we still have an old copy of the line"; OM, AccessPermission:Read_Only, "SM", desc="Issued GetX, received data"; - IS, AccessPermission:Busy, "IS", desc="Issued GetS"; - SI, AccessPermission:Busy, "OI", desc="Issued PutS, waiting for ack"; - OI, AccessPermission:Busy, "OI", desc="Issued PutO, waiting for ack"; - MI, AccessPermission:Busy, "MI", desc="Issued PutX, waiting for ack"; + SI, AccessPermission:Read_Only, "OI", desc="Issued PutS, waiting for ack"; + OI, AccessPermission:Read_Only, "OI", desc="Issued PutO, waiting for ack"; + MI, AccessPermission:Read_Write, "MI", desc="Issued PutX, waiting for ack"; II, AccessPermission:Busy, "II", desc="Issued PutX/O, saw Fwd_GETS or Fwd_GETX, waiting for ack"; } @@ -225,13 +226,13 @@ machine(MachineType:L1Cache, "L1 cache protocol") AccessPermission getAccessPermission(Addr addr) { TBE tbe := TBEs[addr]; if(is_valid(tbe)) { - DPRINTF(RubySlicc, "%s\n", L1Cache_State_to_permission(tbe.TBEState)); + DPRINTF(RubySlicc, "%s,%s\n", tbe.TBEState, L1Cache_State_to_permission(tbe.TBEState)); return L1Cache_State_to_permission(tbe.TBEState); } Entry cache_entry := getCacheEntry(addr); if(is_valid(cache_entry)) { - DPRINTF(RubySlicc, "%s\n", L1Cache_State_to_permission(cache_entry.CacheState)); + DPRINTF(RubySlicc, "%s,%s\n", cache_entry.CacheState, L1Cache_State_to_permission(cache_entry.CacheState)); return L1Cache_State_to_permission(cache_entry.CacheState); } @@ -270,8 +271,10 @@ machine(MachineType:L1Cache, "L1 cache protocol") } TBE tbe := TBEs[addr]; - num_functional_writes := num_functional_writes + - testAndWrite(addr, tbe.DataBlk, pkt); + if (is_valid(tbe)){ + num_functional_writes := num_functional_writes + + testAndWrite(addr, tbe.DataBlk, pkt); + } return num_functional_writes; } diff --git a/src/mem/ruby/protocol/MOESI_CMP_directory-L2cache.sm b/src/mem/ruby/protocol/MOESI_CMP_directory-L2cache.sm index 0faa03ff2..3c7763f64 100644 --- a/src/mem/ruby/protocol/MOESI_CMP_directory-L2cache.sm +++ b/src/mem/ruby/protocol/MOESI_CMP_directory-L2cache.sm @@ -85,23 +85,25 @@ machine(MachineType:L2Cache, "Token protocol") IFGX, AccessPermission:Busy, desc="Blocked, forwarded global GETX to local owner/exclusive. No other on-chip invs needed"; IFGS, AccessPermission:Busy, desc="Blocked, forwarded global GETS to local owner"; ISFGS, AccessPermission:Busy, desc="Blocked, forwarded global GETS to local owner, local sharers exist"; - IFGXX, AccessPermission:Busy, desc="Blocked, forwarded global GETX to local owner but may need acks from other sharers"; - OLSF, AccessPermission:Busy, desc="Blocked, got Fwd_GETX with local sharers, waiting for local inv acks"; + IFGXX, AccessPermission:Busy, desc="Blocked, forwarded global GETX to local owner, waiting for data and acks from other sharers"; + IFGXXD, AccessPermission:Read_Only, desc="Blocked, was IFGXX and received data, still waiting for acks"; + OLSF, AccessPermission:Read_Only, desc="Blocked, got Fwd_GETX with local sharers, waiting for local inv acks"; - // writebacks + // Writebacks + // Notice we still have a valid copy of the block in some states ILOW, AccessPermission:Busy, desc="local WB request, was ILO"; ILOXW, AccessPermission:Busy, desc="local WB request, was ILOX"; ILOSW, AccessPermission:Busy, desc="local WB request, was ILOS"; ILOSXW, AccessPermission:Busy, desc="local WB request, was ILOSX"; - SLSW, AccessPermission:Busy, desc="local WB request, was SLS"; - OLSW, AccessPermission:Busy, desc="local WB request, was OLS"; ILSW, AccessPermission:Busy, desc="local WB request, was ILS"; IW, AccessPermission:Busy, desc="local WB request from only sharer, was ILS"; - OW, AccessPermission:Busy, desc="local WB request from only sharer, was OLS"; - SW, AccessPermission:Busy, desc="local WB request from only sharer, was SLS"; - OXW, AccessPermission:Busy, desc="local WB request from only sharer, was OLSX"; - OLSXW, AccessPermission:Busy, desc="local WB request from sharer, was OLSX"; ILXW, AccessPermission:Busy, desc="local WB request, was ILX"; + SLSW, AccessPermission:Read_Only, desc="local WB request, was SLS"; + OLSW, AccessPermission:Read_Only, desc="local WB request, was OLS"; + OW, AccessPermission:Read_Only, desc="local WB request from only sharer, was OLS"; + SW, AccessPermission:Read_Only, desc="local WB request from only sharer, was SLS"; + OXW, AccessPermission:Read_Only, desc="local WB request from only sharer, was OLSX"; + OLSXW, AccessPermission:Read_Only, desc="local WB request from sharer, was OLSX"; IFLS, AccessPermission:Busy, desc="Blocked, forwarded local GETS to _some_ local sharer"; IFLO, AccessPermission:Busy, desc="Blocked, forwarded local GETS to local owner"; @@ -110,29 +112,34 @@ machine(MachineType:L2Cache, "Token protocol") IFLOSX, AccessPermission:Busy, desc="Blocked, forwarded local GETS to local owner w/ other sharers, chip is exclusive"; IFLXO, AccessPermission:Busy, desc="Blocked, forwarded local GETX to local owner with other sharers, chip is exclusive"; + // Some states hold valid data while waiting for acks IGS, AccessPermission:Busy, desc="Semi-blocked, issued local GETS to directory"; IGM, AccessPermission:Busy, desc="Blocked, issued local GETX to directory. Need global acks and data"; IGMLS, AccessPermission:Busy, desc="Blocked, issued local GETX to directory but may need to INV local sharers"; - IGMO, AccessPermission:Busy, desc="Blocked, have data for local GETX but need all acks"; + IGMO, AccessPermission:Read_Only, desc="Blocked, have data for local GETX but need all acks"; + IGMOU, AccessPermission:Busy, desc="Blocked, responded to GETX, waiting unblock"; IGMIO, AccessPermission:Busy, desc="Blocked, issued local GETX, local owner with possible local sharer, may need to INV"; OGMIO, AccessPermission:Busy, desc="Blocked, issued local GETX, was owner, may need to INV"; - IGMIOF, AccessPermission:Busy, desc="Blocked, issued local GETX, local owner, waiting for global acks, got Fwd_GETX"; + IGMIOF, AccessPermission:Busy, desc="Blocked, issued local GETX, local owner, waiting for global acks, got Fwd_GETX"; + IGMIOFD, AccessPermission:Read_Only, desc="Blocked, was IGMIOF but received data, still waiting acks"; IGMIOFS, AccessPermission:Busy, desc="Blocked, issued local GETX, local owner, waiting for global acks, got Fwd_GETS"; OGMIOF, AccessPermission:Busy, desc="Blocked, issued local GETX, was owner, waiting for global acks, got Fwd_GETX"; + // Have valid data in some of these transient states II, AccessPermission:Busy, desc="Blocked, handling invalidations"; MM, AccessPermission:Busy, desc="Blocked, was M satisfying local GETX"; SS, AccessPermission:Busy, desc="Blocked, was S satisfying local GETS"; OO, AccessPermission:Busy, desc="Blocked, was O satisfying local GETS"; - OLSS, AccessPermission:Busy, desc="Blocked, satisfying local GETS"; - OLSXS, AccessPermission:Busy, desc="Blocked, satisfying local GETS"; - SLSS, AccessPermission:Busy, desc="Blocked, satisfying local GETS"; + OLSS, AccessPermission:Read_Only, desc="Blocked, satisfying local GETS"; + OLSXS, AccessPermission:Read_Only, desc="Blocked, satisfying local GETS"; + SLSS, AccessPermission:Read_Only, desc="Blocked, satisfying local GETS"; - OI, AccessPermission:Busy, desc="Blocked, doing writeback, was O"; - MI, AccessPermission:Busy, desc="Blocked, doing writeback, was M"; + // Have valid data in most of this states + OI, AccessPermission:Read_Only, desc="Blocked, doing writeback, was O"; + MI, AccessPermission:Read_Write, desc="Blocked, doing writeback, was M"; MII, AccessPermission:Busy, desc="Blocked, doing writeback, was M, got Fwd_GETX"; - OLSI, AccessPermission:Busy, desc="Blocked, doing writeback, was OLS"; - ILSI, AccessPermission:Busy, desc="Blocked, doing writeback, was OLS got Fwd_GETX"; + OLSI, AccessPermission:Read_Only, desc="Blocked, doing writeback, was OLS"; + ILSI, AccessPermission:Read_Only, desc="Blocked, doing writeback, was OLS got Fwd_GETX"; // DMA blocking states ILOSD, AccessPermission:Busy, desc="Blocked, waiting for DMA ack"; @@ -548,13 +555,13 @@ machine(MachineType:L2Cache, "Token protocol") AccessPermission getAccessPermission(Addr addr) { TBE tbe := TBEs[addr]; if(is_valid(tbe)) { - DPRINTF(RubySlicc, "%s\n", L2Cache_State_to_permission(tbe.TBEState)); + DPRINTF(RubySlicc, "%s,%s\n", tbe.TBEState, L2Cache_State_to_permission(tbe.TBEState)); return L2Cache_State_to_permission(tbe.TBEState); } Entry cache_entry := getCacheEntry(addr); if(is_valid(cache_entry)) { - DPRINTF(RubySlicc, "%s\n", L2Cache_State_to_permission(cache_entry.CacheState)); + DPRINTF(RubySlicc, "%s,%s\n", cache_entry.CacheState, L2Cache_State_to_permission(cache_entry.CacheState)); return L2Cache_State_to_permission(cache_entry.CacheState); } @@ -570,10 +577,13 @@ machine(MachineType:L2Cache, "Token protocol") void functionalRead(Addr addr, Packet *pkt) { TBE tbe := TBEs[addr]; + Entry cache_entry := getCacheEntry(addr); if(is_valid(tbe)) { testAndRead(addr, tbe.DataBlk, pkt); + } else if (is_valid(cache_entry)) { + testAndRead(addr, cache_entry.DataBlk, pkt); } else { - testAndRead(addr, getCacheEntry(addr).DataBlk, pkt); + error("Block not present!"); } } @@ -584,11 +594,14 @@ machine(MachineType:L2Cache, "Token protocol") if(is_valid(tbe)) { num_functional_writes := num_functional_writes + testAndWrite(addr, tbe.DataBlk, pkt); - return num_functional_writes; } - num_functional_writes := num_functional_writes + - testAndWrite(addr, getCacheEntry(addr).DataBlk, pkt); + Entry cache_entry := getCacheEntry(addr); + if (is_valid(cache_entry)) { + num_functional_writes := num_functional_writes + + testAndWrite(addr, cache_entry.DataBlk, pkt); + } + return num_functional_writes; } @@ -1612,27 +1625,27 @@ machine(MachineType:L2Cache, "Token protocol") // TRANSITIONS //***************************************************** - transition({II, IFGX, IFGS, ISFGS, IFGXX, IFLXO, ILOW, ILOXW, ILOSW, ILOSXW, SLSW, OLSW, ILSW, IW, OW, SW, OXW, OLSXW, ILXW, IFLS, IFLO, IFLOX, IFLOXX, IFLOSX, OLSXS, IGS, IGM, IGMLS, IGMO, IGMIO, OGMIO, IGMIOF, OGMIOF, MM, SS, OO, OI, MI, MII, OLSI, ILSI, SLSS, OLSS, OLSF, IGMIOFS, ILOSD, ILOSXD, ILOD, ILXD, ILOXD}, {L1_PUTO, L1_PUTS, L1_PUTS_only, L1_PUTX}) { + transition({II, IFGX, IFGS, ISFGS, IFGXX, IFGXXD, IFLXO, ILOW, ILOXW, ILOSW, ILOSXW, SLSW, OLSW, ILSW, IW, OW, SW, OXW, OLSXW, ILXW, IFLS, IFLO, IFLOX, IFLOXX, IFLOSX, OLSXS, IGS, IGM, IGMLS, IGMO, IGMOU, IGMIO, OGMIO, IGMIOF, IGMIOFD, OGMIOF, MM, SS, OO, OI, MI, MII, OLSI, ILSI, SLSS, OLSS, OLSF, IGMIOFS, ILOSD, ILOSXD, ILOD, ILXD, ILOXD}, {L1_PUTO, L1_PUTS, L1_PUTS_only, L1_PUTX}) { st_stallAndWaitL1RequestQueue; } - transition({II, IFGX, IFGS, ISFGS, IFGXX, IFLXO, ILOW, ILOXW, ILOSW, ILOSXW, SLSW, OLSW, ILSW, IW, OW, SW, OXW, OLSXW, ILXW, IFLS, IFLO, IFLOX, IFLOXX, IFLOSX, OLSXS, IGS, IGM, IGMLS, IGMO, IGMIO, OGMIO, IGMIOF, OGMIOF, MM, SS, OO, OI, MI, MII, OLSI, ILSI, SLSS, OLSS, OLSF, IGMIOFS, ILOSD, ILOSXD, ILOD, ILXD, ILOXD}, {L1_GETX, L1_GETS}) { + transition({II, IFGX, IFGS, ISFGS, IFGXX, IFGXXD, IFLXO, ILOW, ILOXW, ILOSW, ILOSXW, SLSW, OLSW, ILSW, IW, OW, SW, OXW, OLSXW, ILXW, IFLS, IFLO, IFLOX, IFLOXX, IFLOSX, OLSXS, IGS, IGM, IGMLS, IGMO, IGMOU, IGMIO, OGMIO, IGMIOF, IGMIOFD, OGMIOF, MM, SS, OO, OI, MI, MII, OLSI, ILSI, SLSS, OLSS, OLSF, IGMIOFS, ILOSD, ILOSXD, ILOD, ILXD, ILOXD}, {L1_GETX, L1_GETS}) { st_stallAndWaitL1RequestQueue; } - transition({IFGX, IFGS, ISFGS, IFGXX, IFLXO, ILOW, ILOXW, ILOSW, ILOSXW, SLSW, OLSW, ILSW, IW, ILXW, OW, SW, OXW, OLSXW, IFLS, IFLO, IFLOX, IFLOXX, IFLOSX,OLSXS, IGS, IGM, IGMLS, IGMO, MM, SS, OO, OI, MI, MII, OLSI, ILSI, SLSS, OLSS, OLSF, IGMIOFS, ILOSD, ILOSXD, ILOD, ILXD, ILOXD}, L2_Replacement) { + transition({IFGX, IFGS, ISFGS, IFGXX, IFGXXD, IFLXO, ILOW, ILOXW, ILOSW, ILOSXW, SLSW, OLSW, ILSW, IW, ILXW, OW, SW, OXW, OLSXW, IFLS, IFLO, IFLOX, IFLOXX, IFLOSX,OLSXS, IGS, IGM, IGMLS, IGMO, IGMOU, MM, SS, OO, OI, MI, MII, OLSI, ILSI, SLSS, OLSS, OLSF, IGMIOFS, ILOSD, ILOSXD, ILOD, ILXD, ILOXD}, L2_Replacement) { zz_recycleL1RequestQueue; } - transition({IFGX, IFGS, ISFGS, IFGXX, IFLXO, ILOW, ILOXW, ILOSW, ILOSXW, SLSW, OLSW, ILSW, IW, OW, SW, OXW, OLSXW, ILXW, IFLS, IFLO, IFLOX, IFLOXX, IFLOSX,OLSXS, IGS, IGM, MM, SS, OO, SLSS, OLSS, OLSF, IGMIOFS, ILOSD, ILOSXD, ILOD, ILXD, ILOXD}, {Fwd_GETX, Fwd_GETS, Fwd_DMA}) { + transition({IFGX, IFGS, ISFGS, IFGXX, IFGXXD, IFLXO, ILOW, ILOXW, ILOSW, ILOSXW, SLSW, OLSW, ILSW, IW, OW, SW, OXW, OLSXW, ILXW, IFLS, IFLO, IFLOX, IFLOXX, IFLOSX,OLSXS, IGS, IGM, MM, SS, OO, SLSS, OLSS, OLSF, IGMIOFS, ILOSD, ILOSXD, ILOD, ILXD, ILOXD}, {Fwd_GETX, Fwd_GETS, Fwd_DMA}) { zz_recycleGlobalRequestQueue; } - transition({OGMIO, IGMIO, IGMO}, Fwd_DMA) { + transition({OGMIO, IGMIO, IGMO, IGMOU}, Fwd_DMA) { zz_recycleGlobalRequestQueue; } - transition({IFGX, IFGS, ISFGS, IFGXX, IFLXO, ILOW, ILOXW, ILOSW, ILOSXW, SLSW, OLSW, ILSW, IW, OW, SW, OXW, OLSXW, ILXW, IFLS, IFLO, IFLOX, IFLOXX, IFLOSX,OLSXS, MM, SS, OO, SLSS, OLSS, OLSF, IGMIOFS, ILOSD, ILOSXD, ILOD, ILXD, ILOXD}, {Inv}) { + transition({IFGX, IFGS, ISFGS, IFGXX, IFGXXD, IFLXO, ILOW, ILOXW, ILOSW, ILOSXW, SLSW, OLSW, ILSW, IW, OW, SW, OXW, OLSXW, ILXW, IFLS, IFLO, IFLOX, IFLOXX, IFLOSX,OLSXS, MM, SS, OO, SLSS, OLSS, OLSF, IGMIOFS, ILOSD, ILOSXD, ILOD, ILXD, ILOXD}, {Inv}) { zz_recycleGlobalRequestQueue; } @@ -1821,20 +1834,20 @@ machine(MachineType:L2Cache, "Token protocol") } - transition(IFGXX, IntAck) { + transition({IFGXX, IFGXXD}, IntAck) { m_decrementNumberOfMessagesInt; o_checkForIntCompletion; n_popResponseQueue; } - transition(IFGXX, Data_Exclusive) { + transition(IFGXX, Data_Exclusive, IFGXXD) { i_copyDataToTBE; m_decrementNumberOfMessagesInt; o_checkForIntCompletion; n_popResponseQueue; } - transition(IFGXX, All_Acks, I) { + transition(IFGXXD, All_Acks, I) { c_sendDataFromTBEToFwdGETX; gg_clearLocalSharers; removeFromDir; @@ -2178,7 +2191,6 @@ machine(MachineType:L2Cache, "Token protocol") n_popTriggerQueue; } - // transition(IGMLS, ExtAck, IGMO) { transition(IGMLS, ExtAck) { m_decrementNumberOfMessagesExt; o_checkForExtCompletion; @@ -2285,26 +2297,26 @@ machine(MachineType:L2Cache, "Token protocol") n_popTriggerQueue; } - transition(IGMIOF, IntAck) { + transition({IGMIOF, IGMIOFD}, IntAck) { m_decrementNumberOfMessagesInt; o_checkForIntCompletion; n_popResponseQueue; } - transition(IGMIOF, Data_Exclusive) { + transition(IGMIOF, Data_Exclusive, IGMIOFD) { i_copyDataToTBE; m_decrementNumberOfMessagesInt; o_checkForIntCompletion; n_popResponseQueue; } - transition(IGMIOF, All_Acks, IGM) { + transition(IGMIOFD, All_Acks, IGM) { gg_clearLocalSharers; c_sendDataFromTBEToFwdGETX; n_popTriggerQueue; } - transition(IGMIO, All_Acks, IGMO) { + transition(IGMIO, All_Acks, IGMOU) { hh_countLocalSharersExceptL1GETXRequestorInTBE; ee_issueLocalInvExceptL1RequestorInTBE; k_forwardLocalGETXToLocalOwner; @@ -2312,7 +2324,7 @@ machine(MachineType:L2Cache, "Token protocol") n_popTriggerQueue; } - transition(OGMIO, All_Acks, IGMO) { + transition(OGMIO, All_Acks, IGMOU) { ee_issueLocalInvExceptL1RequestorInTBE; c_sendDataFromTBEToL1GETX; n_popTriggerQueue; @@ -2374,12 +2386,12 @@ machine(MachineType:L2Cache, "Token protocol") wa_wakeUpDependents; } - transition(IGMO, All_Acks) { + transition(IGMO, All_Acks, IGMOU) { c_sendDataFromTBEToL1GETX; n_popTriggerQueue; } - transition(IGMO, Exclusive_Unblock, ILX) { + transition(IGMOU, Exclusive_Unblock, ILX) { g_recordLocalExclusive; f_sendExclusiveUnblock; s_deallocateTBE; diff --git a/src/mem/ruby/protocol/MOESI_CMP_directory-dir.sm b/src/mem/ruby/protocol/MOESI_CMP_directory-dir.sm index 0dfbdb83c..ff1f91b37 100644 --- a/src/mem/ruby/protocol/MOESI_CMP_directory-dir.sm +++ b/src/mem/ruby/protocol/MOESI_CMP_directory-dir.sm @@ -56,31 +56,40 @@ machine(MachineType:Directory, "Directory protocol") MessageBuffer * requestToMemory; MessageBuffer * responseFromMemory; + + MessageBuffer * triggerQueue; { // STATES state_declaration(State, desc="Directory states", default="Directory_State_I") { // Base states I, AccessPermission:Read_Write, desc="Invalid"; - S, AccessPermission:Read_Only, desc="Shared"; + S, AccessPermission:Read_Write, desc="Shared"; O, AccessPermission:Maybe_Stale, desc="Owner"; M, AccessPermission:Maybe_Stale, desc="Modified"; - IS, AccessPermission:Busy, desc="Blocked, was in idle"; + // Transient states + // The memory has valid data in some of these + IS_M, AccessPermission:Read_Write, desc="Blocked, was in I, waiting for mem"; + IS, AccessPermission:Read_Write, desc="Blocked, was in I, data forwarded"; SS, AccessPermission:Read_Only, desc="Blocked, was in shared"; OO, AccessPermission:Busy, desc="Blocked, was in owned"; MO, AccessPermission:Busy, desc="Blocked, going to owner or maybe modified"; - MM, AccessPermission:Busy, desc="Blocked, going to modified"; + MM_M, AccessPermission:Read_Only, desc="Blocked, fetching from memory, going to MM"; + MM, AccessPermission:Busy, desc="Blocked, req or mem data forwarded, going to modified"; MI, AccessPermission:Busy, desc="Blocked on a writeback"; MIS, AccessPermission:Busy, desc="Blocked on a writeback, but don't remove from sharers when received"; OS, AccessPermission:Busy, desc="Blocked on a writeback"; OSS, AccessPermission:Busy, desc="Blocked on a writeback, but don't remove from sharers when received"; - XI_M, AccessPermission:Busy, desc="Blocked, going to I, waiting for the memory controller"; - XI_M_U, AccessPermission:Busy, desc="Blocked, going to XI_U, waiting for the memory controller"; - XI_U, AccessPermission:Busy, desc="Blocked, going to I, waiting for an unblock"; - OI_D, AccessPermission:Busy, desc="In O, going to I, waiting for data"; + // We have valid data in a TBE + WBI, AccessPermission:Read_Only, desc="Sent writeback, waiting for memory; will be I"; + WBS, AccessPermission:Read_Only, desc="Sent writeback, waiting for memory; will be S"; + XI_M, AccessPermission:Read_Only, desc="Blocked, going to I, waiting for the memory controller"; + XI_M_U, AccessPermission:Read_Only, desc="Blocked, going to XI_U, waiting for the memory controller"; + XI_U, AccessPermission:Read_Only, desc="Blocked, going to I, waiting for an unblock"; + OI_D, AccessPermission:Busy, desc="In O, going to I, waiting for data"; OD, AccessPermission:Busy, desc="In O, waiting for dma ack from L2"; MD, AccessPermission:Busy, desc="In M, waiting for dma ack from L2"; } @@ -105,6 +114,7 @@ machine(MachineType:Directory, "Directory protocol") DMA_WRITE_PARTIAL, desc="DMA Write partial line"; DMA_ACK, desc="DMA Ack"; Data, desc="Data to directory"; + All_Acks, desk="All pending acks, unblocks, etc have been received"; } // TYPES @@ -122,6 +132,8 @@ machine(MachineType:Directory, "Directory protocol") int Len, desc="Length of request"; DataBlock DataBlk, desc="DataBlk"; MachineID Requestor, desc="original requestor"; + bool WaitingWBAck, desc="DataBlk WB request sent, but no ack from mem yet"; + bool WaitingDMAAck, desc="DMA ack sent, waiting for unblock"; } structure(TBETable, external = "yes") { @@ -212,10 +224,10 @@ machine(MachineType:Directory, "Directory protocol") if (directory.isPresent(addr)) { Entry dir_entry := static_cast(Entry, "pointer", directory[addr]); if (is_valid(dir_entry)) { - DPRINTF(RubySlicc, "%s\n", Directory_State_to_permission(dir_entry.DirectoryState)); + DPRINTF(RubySlicc, "%s,%s\n", dir_entry.DirectoryState, Directory_State_to_permission(dir_entry.DirectoryState)); return Directory_State_to_permission(dir_entry.DirectoryState); } else { - DPRINTF(RubySlicc, "%s\n", Directory_State_to_permission(State:I)); + DPRINTF(RubySlicc, "%s,%s\n", State:I, Directory_State_to_permission(State:I)); return Directory_State_to_permission(State:I); } } @@ -235,11 +247,21 @@ machine(MachineType:Directory, "Directory protocol") } void functionalRead(Addr addr, Packet *pkt) { - functionalMemoryRead(pkt); + TBE tbe := TBEs[addr]; + if (is_valid(tbe) && tbe.WaitingWBAck) { + testAndRead(addr, tbe.DataBlk, pkt); + } else { + functionalMemoryRead(pkt); + } } int functionalWrite(Addr addr, Packet *pkt) { int num_functional_writes := 0; + TBE tbe := TBEs[addr]; + if (is_valid(tbe)) { + num_functional_writes := num_functional_writes + + testAndWrite(addr, tbe.DataBlk, pkt); + } num_functional_writes := num_functional_writes + functionalMemoryWrite(pkt); return num_functional_writes; } @@ -272,8 +294,23 @@ machine(MachineType:Directory, "Directory protocol") // For inserting internal unblocks only out_port(unblockNetwork_out_internal, ResponseMsg, responseToDir); + out_port(triggerQueue_out, TriggerMsg, triggerQueue); + // ** IN_PORTS ** + // Trigger Queue + in_port(triggerQueue_in, TriggerMsg, triggerQueue, rank=3) { + if (triggerQueue_in.isReady(clockEdge())) { + peek(triggerQueue_in, TriggerMsg) { + if (in_msg.Type == TriggerType:ALL_ACKS) { + trigger(Event:All_Acks, in_msg.addr, TBEs[in_msg.addr]); + } else { + error("Unexpected message"); + } + } + } + } + in_port(unblockNetwork_in, ResponseMsg, responseToDir, rank=2) { if (unblockNetwork_in.isReady(clockEdge())) { peek(unblockNetwork_in, ResponseMsg) { @@ -395,6 +432,18 @@ machine(MachineType:Directory, "Directory protocol") } } + action(clearDMA, "cD", desc="Clear DMA flag in TBE") { + assert(is_valid(tbe)); + assert(tbe.WaitingDMAAck); + tbe.WaitingDMAAck := false; + } + + action(clearWBAck, "cWb", desc="Clear WB ack flag in TBE") { + assert(is_valid(tbe)); + assert(tbe.WaitingWBAck); + tbe.WaitingWBAck := false; + } + action(c_clearOwner, "c", desc="Clear the owner field") { getDirectoryEntry(address).Owner.clear(); } @@ -509,6 +558,21 @@ machine(MachineType:Directory, "Directory protocol") unblockNetwork_in.dequeue(clockEdge()); } + action(popTriggerQueue, "pt", desc="Pop trigger queue.") { + triggerQueue_in.dequeue(clockEdge()); + } + + action(checkForCompletion, "\o", desc="Check if we have received all the messages required for completion") { + assert(is_valid(tbe)); + if ((tbe.WaitingDMAAck == false) && + (tbe.WaitingWBAck == false)) { + enqueue(triggerQueue_out, TriggerMsg) { + out_msg.addr := address; + out_msg.Type := TriggerType:ALL_ACKS; + } + } + } + action(m_addUnlockerToSharers, "m", desc="Add the unlocker to the sharer list") { peek(unblockNetwork_in, ResponseMsg) { if (in_msg.SenderMachine == MachineType:L2Cache) { @@ -544,25 +608,17 @@ machine(MachineType:Directory, "Directory protocol") action(qw_queueMemoryWBFromCacheRequest, "qw", desc="Queue off-chip writeback request") { peek(requestQueue_in, RequestMsg) { - if (is_valid(tbe)) { - enqueue(memQueue_out, MemoryMsg, to_memory_controller_latency) { - out_msg.addr := address; - out_msg.Type := MemoryRequestType:MEMORY_WB; - out_msg.Sender := tbe.Requestor; - out_msg.MessageSize := MessageSizeType:Writeback_Data; - out_msg.DataBlk := in_msg.DataBlk; - out_msg.Len := 0; - } - } else { - enqueue(memQueue_out, MemoryMsg, to_memory_controller_latency) { - out_msg.addr := address; - out_msg.Type := MemoryRequestType:MEMORY_WB; - out_msg.Sender := in_msg.Requestor; - out_msg.MessageSize := MessageSizeType:Writeback_Data; - out_msg.DataBlk := in_msg.DataBlk; - out_msg.Len := 0; - } + assert(is_valid(tbe)); + enqueue(memQueue_out, MemoryMsg, to_memory_controller_latency) { + out_msg.addr := address; + out_msg.Type := MemoryRequestType:MEMORY_WB; + out_msg.Sender := in_msg.Requestor; + out_msg.MessageSize := MessageSizeType:Writeback_Data; + out_msg.DataBlk := in_msg.DataBlk; + out_msg.Len := 0; } + tbe.DataBlk := in_msg.DataBlk; + tbe.WaitingWBAck := true; } } @@ -581,6 +637,8 @@ machine(MachineType:Directory, "Directory protocol") out_msg.DataBlk := DataBlk; out_msg.Len := 0; } + tbe.DataBlk := DataBlk; + tbe.WaitingWBAck := true; } } @@ -599,11 +657,14 @@ machine(MachineType:Directory, "Directory protocol") out_msg.DataBlk := DataBlk; out_msg.Len := 0; } + tbe.DataBlk := DataBlk; + tbe.WaitingWBAck := true; } } action(qw_queueMemoryWBFromDMARequest, "/qw", desc="Queue off-chip writeback request") { peek(requestQueue_in, RequestMsg) { + assert(is_valid(tbe)); enqueue(memQueue_out, MemoryMsg, to_memory_controller_latency) { out_msg.addr := address; out_msg.Type := MemoryRequestType:MEMORY_WB; @@ -612,6 +673,8 @@ machine(MachineType:Directory, "Directory protocol") out_msg.DataBlk := in_msg.DataBlk; out_msg.Len := 0; } + tbe.DataBlk := in_msg.DataBlk; + tbe.WaitingWBAck := true; } } @@ -622,6 +685,7 @@ machine(MachineType:Directory, "Directory protocol") action(a_sendDMAAckFromReq, "\a", desc="Send DMA Ack that write completed, along with Inv Ack count") { peek(requestQueue_in, RequestMsg) { enqueue(responseNetwork_out, ResponseMsg, 1) { + assert(is_valid(tbe)); out_msg.addr := address; out_msg.Sender := machineID; out_msg.SenderMachine := MachineType:Directory; @@ -629,6 +693,7 @@ machine(MachineType:Directory, "Directory protocol") out_msg.Acks := getDirectoryEntry(address).Sharers.count(); // for dma requests out_msg.Type := CoherenceResponseType:DMA_ACK; out_msg.MessageSize := MessageSizeType:Writeback_Control; + tbe.WaitingDMAAck := true; } } } @@ -643,28 +708,35 @@ machine(MachineType:Directory, "Directory protocol") out_msg.Acks := getDirectoryEntry(address).Sharers.count(); // for dma requests out_msg.Type := CoherenceResponseType:DMA_ACK; out_msg.MessageSize := MessageSizeType:Writeback_Control; + tbe.WaitingDMAAck := true; } } action(v_allocateTBE, "v", desc="Allocate TBE entry") { peek (requestQueue_in, RequestMsg) { + assert(is_valid(tbe) == false); TBEs.allocate(address); set_tbe(TBEs[address]); tbe.PhysicalAddress := in_msg.addr; tbe.Len := in_msg.Len; tbe.DataBlk := in_msg.DataBlk; tbe.Requestor := in_msg.Requestor; + tbe.WaitingWBAck := false; + tbe.WaitingDMAAck := false; } } action(w_deallocateTBE, "w", desc="Deallocate TBE entry") { + assert(is_valid(tbe)); + assert(tbe.WaitingWBAck == false); + assert(tbe.WaitingDMAAck == false); TBEs.deallocate(address); unset_tbe(); } // TRANSITIONS - transition(I, GETX, MM) { + transition(I, GETX, MM_M) { allocDirEntry; qf_queueMemoryFetchRequest; i_popIncomingRequestQueue; @@ -678,6 +750,7 @@ machine(MachineType:Directory, "Directory protocol") transition(I, DMA_WRITE_LINE, XI_U) { allocDirEntry; + v_allocateTBE; qw_queueMemoryWBFromDMARequest; a_sendDMAAckFromReq; // ack count may be zero i_popIncomingRequestQueue; @@ -693,7 +766,6 @@ machine(MachineType:Directory, "Directory protocol") transition(XI_M_U, Memory_Data_DMA, XI_U) { qw_queueMemoryWBFromMemResp; a_sendDMAAckFromTBE; // ack count may be zero - w_deallocateTBE; q_popMemQueue; } @@ -703,20 +775,34 @@ machine(MachineType:Directory, "Directory protocol") q_popMemQueue; } - transition(XI_U, Exclusive_Unblock, I) { + transition(XI_U, Exclusive_Unblock, XI_U) { cc_clearSharers; c_clearOwner; - deallocDirEntry; + clearDMA; + checkForCompletion; j_popIncomingUnblockQueue; } - transition(S, GETX, MM) { + transition(XI_U, Memory_Ack, XI_U) { + clearWBAck; + checkForCompletion; + q_popMemQueue; + } + + transition(XI_U, All_Acks, I) { + deallocDirEntry; + w_deallocateTBE; + popTriggerQueue; + } + + transition(S, GETX, MM_M) { qf_queueMemoryFetchRequest; g_sendInvalidations; i_popIncomingRequestQueue; } transition(S, DMA_WRITE_LINE, XI_U) { + v_allocateTBE; qw_queueMemoryWBFromDMARequest; a_sendDMAAckFromReq; // ack count may be zero g_sendInvalidations; // the DMA will collect invalidations @@ -730,7 +816,7 @@ machine(MachineType:Directory, "Directory protocol") i_popIncomingRequestQueue; } - transition(I, GETS, IS) { + transition(I, GETS, IS_M) { allocDirEntry; qf_queueMemoryFetchRequest; i_popIncomingRequestQueue; @@ -777,7 +863,6 @@ machine(MachineType:Directory, "Directory protocol") transition(OI_D, Data, XI_U) { qw_queueMemoryWBFromCacheResp; a_sendDMAAckFromTBE; // ack count may be zero - w_deallocateTBE; j_popIncomingUnblockQueue; } @@ -834,7 +919,7 @@ machine(MachineType:Directory, "Directory protocol") } - transition({MM, MO, MI, MIS, OS, OSS, XI_M, XI_M_U, XI_U, OI_D, OD, MD}, {GETS, GETX, PUTO, PUTO_SHARERS, PUTX, DMA_READ, DMA_WRITE_LINE, DMA_WRITE_PARTIAL}) { + transition({MM_M, MM, MO, MI, MIS, OS, OSS, WBI, WBS, XI_M, XI_M_U, XI_U, OI_D, OD, MD}, {GETS, GETX, PUTO, PUTO_SHARERS, PUTX, DMA_READ, DMA_WRITE_LINE, DMA_WRITE_PARTIAL}) { zz_recycleRequest; } @@ -849,11 +934,11 @@ machine(MachineType:Directory, "Directory protocol") j_popIncomingUnblockQueue; } - transition({IS, SS, OO}, {GETX, PUTO, PUTO_SHARERS, PUTX, DMA_WRITE_LINE,DMA_WRITE_PARTIAL}) { + transition({IS, IS_M, SS, OO}, {GETX, PUTO, PUTO_SHARERS, PUTX, DMA_WRITE_LINE,DMA_WRITE_PARTIAL}) { zz_recycleRequest; } - transition(IS, {GETS, DMA_READ}) { + transition({IS, IS_M}, {GETS, DMA_READ}) { zz_recycleRequest; } @@ -892,16 +977,24 @@ machine(MachineType:Directory, "Directory protocol") j_popIncomingUnblockQueue; } - transition(MI, Dirty_Writeback, I) { + transition(MI, Dirty_Writeback, WBI) { c_clearOwner; cc_clearSharers; + v_allocateTBE; qw_queueMemoryWBFromCacheRequest; - deallocDirEntry; i_popIncomingRequestQueue; } - transition(MIS, Dirty_Writeback, S) { + transition(WBI, Memory_Ack, I) { + clearWBAck; + w_deallocateTBE; + deallocDirEntry; + q_popMemQueue; + } + + transition(MIS, Dirty_Writeback, WBS) { c_moveOwnerToSharer; + v_allocateTBE; qw_queueMemoryWBFromCacheRequest; i_popIncomingRequestQueue; } @@ -911,18 +1004,26 @@ machine(MachineType:Directory, "Directory protocol") i_popIncomingRequestQueue; } - transition(OS, Dirty_Writeback, S) { + transition(OS, Dirty_Writeback, WBS) { c_clearOwner; + v_allocateTBE; qw_queueMemoryWBFromCacheRequest; i_popIncomingRequestQueue; } - transition(OSS, Dirty_Writeback, S) { + transition(OSS, Dirty_Writeback, WBS) { c_moveOwnerToSharer; + v_allocateTBE; qw_queueMemoryWBFromCacheRequest; i_popIncomingRequestQueue; } + transition(WBS, Memory_Ack, S) { + clearWBAck; + w_deallocateTBE; + q_popMemQueue; + } + transition(OSS, Clean_Writeback, S) { c_moveOwnerToSharer; i_popIncomingRequestQueue; @@ -940,15 +1041,17 @@ machine(MachineType:Directory, "Directory protocol") i_popIncomingRequestQueue; } - transition({MI, MIS}, Unblock, M) { - j_popIncomingUnblockQueue; + transition({S, O, M, SS, OO}, Memory_Data_Cache) { + d_sendDataMsg; + q_popMemQueue; } - transition({OS, OSS}, Unblock, O) { - j_popIncomingUnblockQueue; + transition(IS_M, Memory_Data_Cache, IS) { + d_sendDataMsg; + q_popMemQueue; } - transition({S, O, M, IS, SS, OO, MO, MM, MI, MIS, OS, OSS}, Memory_Data_Cache) { + transition(MM_M, Memory_Data_Cache, MM) { d_sendDataMsg; q_popMemQueue; } @@ -959,8 +1062,4 @@ machine(MachineType:Directory, "Directory protocol") q_popMemQueue; } - transition({I, S, O, M, IS, SS, OO, MO, MM, MI, MIS, OS, OSS, XI_U, XI_M, XI_M_U}, Memory_Ack) { - q_popMemQueue; - } - } diff --git a/src/mem/ruby/protocol/MOESI_CMP_directory-dma.sm b/src/mem/ruby/protocol/MOESI_CMP_directory-dma.sm index 1dc0c58be..5a52b60c3 100644 --- a/src/mem/ruby/protocol/MOESI_CMP_directory-dma.sm +++ b/src/mem/ruby/protocol/MOESI_CMP_directory-dma.sm @@ -100,6 +100,7 @@ machine(MachineType:DMA, "DMA Controller") } AccessPermission getAccessPermission(Addr addr) { + DPRINTF(RubySlicc, "AccessPermission_NotPresent\n"); return AccessPermission:NotPresent; } diff --git a/src/mem/ruby/protocol/MOESI_CMP_directory-msg.sm b/src/mem/ruby/protocol/MOESI_CMP_directory-msg.sm index 7dc582215..2dd34e4d9 100644 --- a/src/mem/ruby/protocol/MOESI_CMP_directory-msg.sm +++ b/src/mem/ruby/protocol/MOESI_CMP_directory-msg.sm @@ -109,9 +109,7 @@ structure(RequestMsg, desc="...", interface="Message") { bool functionalRead(Packet *pkt) { // Read only those messages that contain the data - if (Type == CoherenceRequestType:DMA_READ || - Type == CoherenceRequestType:DMA_WRITE || - Type == CoherenceRequestType:WRITEBACK_CLEAN_DATA || + if (Type == CoherenceRequestType:WRITEBACK_CLEAN_DATA || Type == CoherenceRequestType:WRITEBACK_DIRTY_DATA) { return testAndRead(addr, DataBlk, pkt); } -- 2.30.2