From: Tiago Muck Date: Thu, 2 May 2019 23:41:13 +0000 (-0500) Subject: mem-ruby: fix MOESI_CMP_directory functional reads X-Git-Tag: v20.1.0.0~657 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=8ec2abb98a2482e41ec7287832cd2cd4b2009d26;p=gem5.git mem-ruby: fix MOESI_CMP_directory functional reads This patch properly sets the access permissions in all controllers. 'Busy' was used for all transient states, which is incorrect in lots of cases when we still hold a valid copy of the line and are able to handle a functional read. In the L2 controller these states were split to differentiate the access permissions: IFGXX -> IFGXX, IFGXXD IGMO -> IGMO, IGMOU IGMIOF -> IGMIOF, IGMIOFD Same for the dir. controller: IS -> IS, IS_M MM -> MM, MM_M The dir. controllers also has the states WBI/WBS for lines that have been queued for a writeback. In these states we hold the data in the TBE for replying to functional reads until the memory acks the write and we move to I or S. Other minor changes includes updated debug messages and asserts. Change-Id: Ie4f6eac3b4d2641ec91ac6b168a0a017f61c0d6f Signed-off-by: Tiago Mück Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/21927 Maintainer: Jason Lowe-Power Reviewed-by: Pouya Fotouhi Tested-by: kokoro --- diff --git a/configs/ruby/MOESI_CMP_directory.py b/configs/ruby/MOESI_CMP_directory.py index 2b7770a94..315d62be1 100644 --- a/configs/ruby/MOESI_CMP_directory.py +++ b/configs/ruby/MOESI_CMP_directory.py @@ -211,6 +211,7 @@ def create_system(options, full_system, system, dma_ports, bootmem, dir_cntrl.forwardFromDir.master = ruby_system.network.slave dir_cntrl.requestToMemory = MessageBuffer() dir_cntrl.responseFromMemory = MessageBuffer() + dir_cntrl.triggerQueue = MessageBuffer(ordered = True) for i, dma_port in enumerate(dma_ports): diff --git a/src/mem/ruby/protocol/MOESI_CMP_directory-L1cache.sm b/src/mem/ruby/protocol/MOESI_CMP_directory-L1cache.sm index d7b175c7e..a29fb5c4f 100644 --- a/src/mem/ruby/protocol/MOESI_CMP_directory-L1cache.sm +++ b/src/mem/ruby/protocol/MOESI_CMP_directory-L1cache.sm @@ -74,19 +74,20 @@ machine(MachineType:L1Cache, "L1 cache protocol") I, AccessPermission:Invalid, desc="Idle"; S, AccessPermission:Read_Only, desc="Shared"; O, AccessPermission:Read_Only, desc="Owned"; - M, AccessPermission:Read_Only, desc="Modified (dirty)"; - M_W, AccessPermission:Read_Only, desc="Modified (dirty)"; + M, AccessPermission:Read_Write, desc="Modified (dirty)"; + M_W, AccessPermission:Read_Write, desc="Modified (dirty)"; MM, AccessPermission:Read_Write, desc="Modified (dirty and locally modified)"; MM_W, AccessPermission:Read_Write, desc="Modified (dirty and locally modified)"; // Transient States + // Notice we still have a valid copy of the block in most states IM, AccessPermission:Busy, "IM", desc="Issued GetX"; + IS, AccessPermission:Busy, "IS", desc="Issued GetS"; SM, AccessPermission:Read_Only, "SM", desc="Issued GetX, we still have an old copy of the line"; OM, AccessPermission:Read_Only, "SM", desc="Issued GetX, received data"; - IS, AccessPermission:Busy, "IS", desc="Issued GetS"; - SI, AccessPermission:Busy, "OI", desc="Issued PutS, waiting for ack"; - OI, AccessPermission:Busy, "OI", desc="Issued PutO, waiting for ack"; - MI, AccessPermission:Busy, "MI", desc="Issued PutX, waiting for ack"; + SI, AccessPermission:Read_Only, "OI", desc="Issued PutS, waiting for ack"; + OI, AccessPermission:Read_Only, "OI", desc="Issued PutO, waiting for ack"; + MI, AccessPermission:Read_Write, "MI", desc="Issued PutX, waiting for ack"; II, AccessPermission:Busy, "II", desc="Issued PutX/O, saw Fwd_GETS or Fwd_GETX, waiting for ack"; } @@ -225,13 +226,13 @@ machine(MachineType:L1Cache, "L1 cache protocol") AccessPermission getAccessPermission(Addr addr) { TBE tbe := TBEs[addr]; if(is_valid(tbe)) { - DPRINTF(RubySlicc, "%s\n", L1Cache_State_to_permission(tbe.TBEState)); + DPRINTF(RubySlicc, "%s,%s\n", tbe.TBEState, L1Cache_State_to_permission(tbe.TBEState)); return L1Cache_State_to_permission(tbe.TBEState); } Entry cache_entry := getCacheEntry(addr); if(is_valid(cache_entry)) { - DPRINTF(RubySlicc, "%s\n", L1Cache_State_to_permission(cache_entry.CacheState)); + DPRINTF(RubySlicc, "%s,%s\n", cache_entry.CacheState, L1Cache_State_to_permission(cache_entry.CacheState)); return L1Cache_State_to_permission(cache_entry.CacheState); } @@ -270,8 +271,10 @@ machine(MachineType:L1Cache, "L1 cache protocol") } TBE tbe := TBEs[addr]; - num_functional_writes := num_functional_writes + - testAndWrite(addr, tbe.DataBlk, pkt); + if (is_valid(tbe)){ + num_functional_writes := num_functional_writes + + testAndWrite(addr, tbe.DataBlk, pkt); + } return num_functional_writes; } diff --git a/src/mem/ruby/protocol/MOESI_CMP_directory-L2cache.sm b/src/mem/ruby/protocol/MOESI_CMP_directory-L2cache.sm index 0faa03ff2..3c7763f64 100644 --- a/src/mem/ruby/protocol/MOESI_CMP_directory-L2cache.sm +++ b/src/mem/ruby/protocol/MOESI_CMP_directory-L2cache.sm @@ -85,23 +85,25 @@ machine(MachineType:L2Cache, "Token protocol") IFGX, AccessPermission:Busy, desc="Blocked, forwarded global GETX to local owner/exclusive. No other on-chip invs needed"; IFGS, AccessPermission:Busy, desc="Blocked, forwarded global GETS to local owner"; ISFGS, AccessPermission:Busy, desc="Blocked, forwarded global GETS to local owner, local sharers exist"; - IFGXX, AccessPermission:Busy, desc="Blocked, forwarded global GETX to local owner but may need acks from other sharers"; - OLSF, AccessPermission:Busy, desc="Blocked, got Fwd_GETX with local sharers, waiting for local inv acks"; + IFGXX, AccessPermission:Busy, desc="Blocked, forwarded global GETX to local owner, waiting for data and acks from other sharers"; + IFGXXD, AccessPermission:Read_Only, desc="Blocked, was IFGXX and received data, still waiting for acks"; + OLSF, AccessPermission:Read_Only, desc="Blocked, got Fwd_GETX with local sharers, waiting for local inv acks"; - // writebacks + // Writebacks + // Notice we still have a valid copy of the block in some states ILOW, AccessPermission:Busy, desc="local WB request, was ILO"; ILOXW, AccessPermission:Busy, desc="local WB request, was ILOX"; ILOSW, AccessPermission:Busy, desc="local WB request, was ILOS"; ILOSXW, AccessPermission:Busy, desc="local WB request, was ILOSX"; - SLSW, AccessPermission:Busy, desc="local WB request, was SLS"; - OLSW, AccessPermission:Busy, desc="local WB request, was OLS"; ILSW, AccessPermission:Busy, desc="local WB request, was ILS"; IW, AccessPermission:Busy, desc="local WB request from only sharer, was ILS"; - OW, AccessPermission:Busy, desc="local WB request from only sharer, was OLS"; - SW, AccessPermission:Busy, desc="local WB request from only sharer, was SLS"; - OXW, AccessPermission:Busy, desc="local WB request from only sharer, was OLSX"; - OLSXW, AccessPermission:Busy, desc="local WB request from sharer, was OLSX"; ILXW, AccessPermission:Busy, desc="local WB request, was ILX"; + SLSW, AccessPermission:Read_Only, desc="local WB request, was SLS"; + OLSW, AccessPermission:Read_Only, desc="local WB request, was OLS"; + OW, AccessPermission:Read_Only, desc="local WB request from only sharer, was OLS"; + SW, AccessPermission:Read_Only, desc="local WB request from only sharer, was SLS"; + OXW, AccessPermission:Read_Only, desc="local WB request from only sharer, was OLSX"; + OLSXW, AccessPermission:Read_Only, desc="local WB request from sharer, was OLSX"; IFLS, AccessPermission:Busy, desc="Blocked, forwarded local GETS to _some_ local sharer"; IFLO, AccessPermission:Busy, desc="Blocked, forwarded local GETS to local owner"; @@ -110,29 +112,34 @@ machine(MachineType:L2Cache, "Token protocol") IFLOSX, AccessPermission:Busy, desc="Blocked, forwarded local GETS to local owner w/ other sharers, chip is exclusive"; IFLXO, AccessPermission:Busy, desc="Blocked, forwarded local GETX to local owner with other sharers, chip is exclusive"; + // Some states hold valid data while waiting for acks IGS, AccessPermission:Busy, desc="Semi-blocked, issued local GETS to directory"; IGM, AccessPermission:Busy, desc="Blocked, issued local GETX to directory. Need global acks and data"; IGMLS, AccessPermission:Busy, desc="Blocked, issued local GETX to directory but may need to INV local sharers"; - IGMO, AccessPermission:Busy, desc="Blocked, have data for local GETX but need all acks"; + IGMO, AccessPermission:Read_Only, desc="Blocked, have data for local GETX but need all acks"; + IGMOU, AccessPermission:Busy, desc="Blocked, responded to GETX, waiting unblock"; IGMIO, AccessPermission:Busy, desc="Blocked, issued local GETX, local owner with possible local sharer, may need to INV"; OGMIO, AccessPermission:Busy, desc="Blocked, issued local GETX, was owner, may need to INV"; - IGMIOF, AccessPermission:Busy, desc="Blocked, issued local GETX, local owner, waiting for global acks, got Fwd_GETX"; + IGMIOF, AccessPermission:Busy, desc="Blocked, issued local GETX, local owner, waiting for global acks, got Fwd_GETX"; + IGMIOFD, AccessPermission:Read_Only, desc="Blocked, was IGMIOF but received data, still waiting acks"; IGMIOFS, AccessPermission:Busy, desc="Blocked, issued local GETX, local owner, waiting for global acks, got Fwd_GETS"; OGMIOF, AccessPermission:Busy, desc="Blocked, issued local GETX, was owner, waiting for global acks, got Fwd_GETX"; + // Have valid data in some of these transient states II, AccessPermission:Busy, desc="Blocked, handling invalidations"; MM, AccessPermission:Busy, desc="Blocked, was M satisfying local GETX"; SS, AccessPermission:Busy, desc="Blocked, was S satisfying local GETS"; OO, AccessPermission:Busy, desc="Blocked, was O satisfying local GETS"; - OLSS, AccessPermission:Busy, desc="Blocked, satisfying local GETS"; - OLSXS, AccessPermission:Busy, desc="Blocked, satisfying local GETS"; - SLSS, AccessPermission:Busy, desc="Blocked, satisfying local GETS"; + OLSS, AccessPermission:Read_Only, desc="Blocked, satisfying local GETS"; + OLSXS, AccessPermission:Read_Only, desc="Blocked, satisfying local GETS"; + SLSS, AccessPermission:Read_Only, desc="Blocked, satisfying local GETS"; - OI, AccessPermission:Busy, desc="Blocked, doing writeback, was O"; - MI, AccessPermission:Busy, desc="Blocked, doing writeback, was M"; + // Have valid data in most of this states + OI, AccessPermission:Read_Only, desc="Blocked, doing writeback, was O"; + MI, AccessPermission:Read_Write, desc="Blocked, doing writeback, was M"; MII, AccessPermission:Busy, desc="Blocked, doing writeback, was M, got Fwd_GETX"; - OLSI, AccessPermission:Busy, desc="Blocked, doing writeback, was OLS"; - ILSI, AccessPermission:Busy, desc="Blocked, doing writeback, was OLS got Fwd_GETX"; + OLSI, AccessPermission:Read_Only, desc="Blocked, doing writeback, was OLS"; + ILSI, AccessPermission:Read_Only, desc="Blocked, doing writeback, was OLS got Fwd_GETX"; // DMA blocking states ILOSD, AccessPermission:Busy, desc="Blocked, waiting for DMA ack"; @@ -548,13 +555,13 @@ machine(MachineType:L2Cache, "Token protocol") AccessPermission getAccessPermission(Addr addr) { TBE tbe := TBEs[addr]; if(is_valid(tbe)) { - DPRINTF(RubySlicc, "%s\n", L2Cache_State_to_permission(tbe.TBEState)); + DPRINTF(RubySlicc, "%s,%s\n", tbe.TBEState, L2Cache_State_to_permission(tbe.TBEState)); return L2Cache_State_to_permission(tbe.TBEState); } Entry cache_entry := getCacheEntry(addr); if(is_valid(cache_entry)) { - DPRINTF(RubySlicc, "%s\n", L2Cache_State_to_permission(cache_entry.CacheState)); + DPRINTF(RubySlicc, "%s,%s\n", cache_entry.CacheState, L2Cache_State_to_permission(cache_entry.CacheState)); return L2Cache_State_to_permission(cache_entry.CacheState); } @@ -570,10 +577,13 @@ machine(MachineType:L2Cache, "Token protocol") void functionalRead(Addr addr, Packet *pkt) { TBE tbe := TBEs[addr]; + Entry cache_entry := getCacheEntry(addr); if(is_valid(tbe)) { testAndRead(addr, tbe.DataBlk, pkt); + } else if (is_valid(cache_entry)) { + testAndRead(addr, cache_entry.DataBlk, pkt); } else { - testAndRead(addr, getCacheEntry(addr).DataBlk, pkt); + error("Block not present!"); } } @@ -584,11 +594,14 @@ machine(MachineType:L2Cache, "Token protocol") if(is_valid(tbe)) { num_functional_writes := num_functional_writes + testAndWrite(addr, tbe.DataBlk, pkt); - return num_functional_writes; } - num_functional_writes := num_functional_writes + - testAndWrite(addr, getCacheEntry(addr).DataBlk, pkt); + Entry cache_entry := getCacheEntry(addr); + if (is_valid(cache_entry)) { + num_functional_writes := num_functional_writes + + testAndWrite(addr, cache_entry.DataBlk, pkt); + } + return num_functional_writes; } @@ -1612,27 +1625,27 @@ machine(MachineType:L2Cache, "Token protocol") // TRANSITIONS //***************************************************** - transition({II, IFGX, IFGS, ISFGS, IFGXX, IFLXO, ILOW, ILOXW, ILOSW, ILOSXW, SLSW, OLSW, ILSW, IW, OW, SW, OXW, OLSXW, ILXW, IFLS, IFLO, IFLOX, IFLOXX, IFLOSX, OLSXS, IGS, IGM, IGMLS, IGMO, IGMIO, OGMIO, IGMIOF, OGMIOF, MM, SS, OO, OI, MI, MII, OLSI, ILSI, SLSS, OLSS, OLSF, IGMIOFS, ILOSD, ILOSXD, ILOD, ILXD, ILOXD}, {L1_PUTO, L1_PUTS, L1_PUTS_only, L1_PUTX}) { + transition({II, IFGX, IFGS, ISFGS, IFGXX, IFGXXD, IFLXO, ILOW, ILOXW, ILOSW, ILOSXW, SLSW, OLSW, ILSW, IW, OW, SW, OXW, OLSXW, ILXW, IFLS, IFLO, IFLOX, IFLOXX, IFLOSX, OLSXS, IGS, IGM, IGMLS, IGMO, IGMOU, IGMIO, OGMIO, IGMIOF, IGMIOFD, OGMIOF, MM, SS, OO, OI, MI, MII, OLSI, ILSI, SLSS, OLSS, OLSF, IGMIOFS, ILOSD, ILOSXD, ILOD, ILXD, ILOXD}, {L1_PUTO, L1_PUTS, L1_PUTS_only, L1_PUTX}) { st_stallAndWaitL1RequestQueue; } - transition({II, IFGX, IFGS, ISFGS, IFGXX, IFLXO, ILOW, ILOXW, ILOSW, ILOSXW, SLSW, OLSW, ILSW, IW, OW, SW, OXW, OLSXW, ILXW, IFLS, IFLO, IFLOX, IFLOXX, IFLOSX, OLSXS, IGS, IGM, IGMLS, IGMO, IGMIO, OGMIO, IGMIOF, OGMIOF, MM, SS, OO, OI, MI, MII, OLSI, ILSI, SLSS, OLSS, OLSF, IGMIOFS, ILOSD, ILOSXD, ILOD, ILXD, ILOXD}, {L1_GETX, L1_GETS}) { + transition({II, IFGX, IFGS, ISFGS, IFGXX, IFGXXD, IFLXO, ILOW, ILOXW, ILOSW, ILOSXW, SLSW, OLSW, ILSW, IW, OW, SW, OXW, OLSXW, ILXW, IFLS, IFLO, IFLOX, IFLOXX, IFLOSX, OLSXS, IGS, IGM, IGMLS, IGMO, IGMOU, IGMIO, OGMIO, IGMIOF, IGMIOFD, OGMIOF, MM, SS, OO, OI, MI, MII, OLSI, ILSI, SLSS, OLSS, OLSF, IGMIOFS, ILOSD, ILOSXD, ILOD, ILXD, ILOXD}, {L1_GETX, L1_GETS}) { st_stallAndWaitL1RequestQueue; } - transition({IFGX, IFGS, ISFGS, IFGXX, IFLXO, ILOW, ILOXW, ILOSW, ILOSXW, SLSW, OLSW, ILSW, IW, ILXW, OW, SW, OXW, OLSXW, IFLS, IFLO, IFLOX, IFLOXX, IFLOSX,OLSXS, IGS, IGM, IGMLS, IGMO, MM, SS, OO, OI, MI, MII, OLSI, ILSI, SLSS, OLSS, OLSF, IGMIOFS, ILOSD, ILOSXD, ILOD, ILXD, ILOXD}, L2_Replacement) { + transition({IFGX, IFGS, ISFGS, IFGXX, IFGXXD, IFLXO, ILOW, ILOXW, ILOSW, ILOSXW, SLSW, OLSW, ILSW, IW, ILXW, OW, SW, OXW, OLSXW, IFLS, IFLO, IFLOX, IFLOXX, IFLOSX,OLSXS, IGS, IGM, IGMLS, IGMO, IGMOU, MM, SS, OO, OI, MI, MII, OLSI, ILSI, SLSS, OLSS, OLSF, IGMIOFS, ILOSD, ILOSXD, ILOD, ILXD, ILOXD}, L2_Replacement) { zz_recycleL1RequestQueue; } - transition({IFGX, IFGS, ISFGS, IFGXX, IFLXO, ILOW, ILOXW, ILOSW, ILOSXW, SLSW, OLSW, ILSW, IW, OW, SW, OXW, OLSXW, ILXW, IFLS, IFLO, IFLOX, IFLOXX, IFLOSX,OLSXS, IGS, IGM, MM, SS, OO, SLSS, OLSS, OLSF, IGMIOFS, ILOSD, ILOSXD, ILOD, ILXD, ILOXD}, {Fwd_GETX, Fwd_GETS, Fwd_DMA}) { + transition({IFGX, IFGS, ISFGS, IFGXX, IFGXXD, IFLXO, ILOW, ILOXW, ILOSW, ILOSXW, SLSW, OLSW, ILSW, IW, OW, SW, OXW, OLSXW, ILXW, IFLS, IFLO, IFLOX, IFLOXX, IFLOSX,OLSXS, IGS, IGM, MM, SS, OO, SLSS, OLSS, OLSF, IGMIOFS, ILOSD, ILOSXD, ILOD, ILXD, ILOXD}, {Fwd_GETX, Fwd_GETS, Fwd_DMA}) { zz_recycleGlobalRequestQueue; } - transition({OGMIO, IGMIO, IGMO}, Fwd_DMA) { + transition({OGMIO, IGMIO, IGMO, IGMOU}, Fwd_DMA) { zz_recycleGlobalRequestQueue; } - transition({IFGX, IFGS, ISFGS, IFGXX, IFLXO, ILOW, ILOXW, ILOSW, ILOSXW, SLSW, OLSW, ILSW, IW, OW, SW, OXW, OLSXW, ILXW, IFLS, IFLO, IFLOX, IFLOXX, IFLOSX,OLSXS, MM, SS, OO, SLSS, OLSS, OLSF, IGMIOFS, ILOSD, ILOSXD, ILOD, ILXD, ILOXD}, {Inv}) { + transition({IFGX, IFGS, ISFGS, IFGXX, IFGXXD, IFLXO, ILOW, ILOXW, ILOSW, ILOSXW, SLSW, OLSW, ILSW, IW, OW, SW, OXW, OLSXW, ILXW, IFLS, IFLO, IFLOX, IFLOXX, IFLOSX,OLSXS, MM, SS, OO, SLSS, OLSS, OLSF, IGMIOFS, ILOSD, ILOSXD, ILOD, ILXD, ILOXD}, {Inv}) { zz_recycleGlobalRequestQueue; } @@ -1821,20 +1834,20 @@ machine(MachineType:L2Cache, "Token protocol") } - transition(IFGXX, IntAck) { + transition({IFGXX, IFGXXD}, IntAck) { m_decrementNumberOfMessagesInt; o_checkForIntCompletion; n_popResponseQueue; } - transition(IFGXX, Data_Exclusive) { + transition(IFGXX, Data_Exclusive, IFGXXD) { i_copyDataToTBE; m_decrementNumberOfMessagesInt; o_checkForIntCompletion; n_popResponseQueue; } - transition(IFGXX, All_Acks, I) { + transition(IFGXXD, All_Acks, I) { c_sendDataFromTBEToFwdGETX; gg_clearLocalSharers; removeFromDir; @@ -2178,7 +2191,6 @@ machine(MachineType:L2Cache, "Token protocol") n_popTriggerQueue; } - // transition(IGMLS, ExtAck, IGMO) { transition(IGMLS, ExtAck) { m_decrementNumberOfMessagesExt; o_checkForExtCompletion; @@ -2285,26 +2297,26 @@ machine(MachineType:L2Cache, "Token protocol") n_popTriggerQueue; } - transition(IGMIOF, IntAck) { + transition({IGMIOF, IGMIOFD}, IntAck) { m_decrementNumberOfMessagesInt; o_checkForIntCompletion; n_popResponseQueue; } - transition(IGMIOF, Data_Exclusive) { + transition(IGMIOF, Data_Exclusive, IGMIOFD) { i_copyDataToTBE; m_decrementNumberOfMessagesInt; o_checkForIntCompletion; n_popResponseQueue; } - transition(IGMIOF, All_Acks, IGM) { + transition(IGMIOFD, All_Acks, IGM) { gg_clearLocalSharers; c_sendDataFromTBEToFwdGETX; n_popTriggerQueue; } - transition(IGMIO, All_Acks, IGMO) { + transition(IGMIO, All_Acks, IGMOU) { hh_countLocalSharersExceptL1GETXRequestorInTBE; ee_issueLocalInvExceptL1RequestorInTBE; k_forwardLocalGETXToLocalOwner; @@ -2312,7 +2324,7 @@ machine(MachineType:L2Cache, "Token protocol") n_popTriggerQueue; } - transition(OGMIO, All_Acks, IGMO) { + transition(OGMIO, All_Acks, IGMOU) { ee_issueLocalInvExceptL1RequestorInTBE; c_sendDataFromTBEToL1GETX; n_popTriggerQueue; @@ -2374,12 +2386,12 @@ machine(MachineType:L2Cache, "Token protocol") wa_wakeUpDependents; } - transition(IGMO, All_Acks) { + transition(IGMO, All_Acks, IGMOU) { c_sendDataFromTBEToL1GETX; n_popTriggerQueue; } - transition(IGMO, Exclusive_Unblock, ILX) { + transition(IGMOU, Exclusive_Unblock, ILX) { g_recordLocalExclusive; f_sendExclusiveUnblock; s_deallocateTBE; diff --git a/src/mem/ruby/protocol/MOESI_CMP_directory-dir.sm b/src/mem/ruby/protocol/MOESI_CMP_directory-dir.sm index 0dfbdb83c..ff1f91b37 100644 --- a/src/mem/ruby/protocol/MOESI_CMP_directory-dir.sm +++ b/src/mem/ruby/protocol/MOESI_CMP_directory-dir.sm @@ -56,31 +56,40 @@ machine(MachineType:Directory, "Directory protocol") MessageBuffer * requestToMemory; MessageBuffer * responseFromMemory; + + MessageBuffer * triggerQueue; { // STATES state_declaration(State, desc="Directory states", default="Directory_State_I") { // Base states I, AccessPermission:Read_Write, desc="Invalid"; - S, AccessPermission:Read_Only, desc="Shared"; + S, AccessPermission:Read_Write, desc="Shared"; O, AccessPermission:Maybe_Stale, desc="Owner"; M, AccessPermission:Maybe_Stale, desc="Modified"; - IS, AccessPermission:Busy, desc="Blocked, was in idle"; + // Transient states + // The memory has valid data in some of these + IS_M, AccessPermission:Read_Write, desc="Blocked, was in I, waiting for mem"; + IS, AccessPermission:Read_Write, desc="Blocked, was in I, data forwarded"; SS, AccessPermission:Read_Only, desc="Blocked, was in shared"; OO, AccessPermission:Busy, desc="Blocked, was in owned"; MO, AccessPermission:Busy, desc="Blocked, going to owner or maybe modified"; - MM, AccessPermission:Busy, desc="Blocked, going to modified"; + MM_M, AccessPermission:Read_Only, desc="Blocked, fetching from memory, going to MM"; + MM, AccessPermission:Busy, desc="Blocked, req or mem data forwarded, going to modified"; MI, AccessPermission:Busy, desc="Blocked on a writeback"; MIS, AccessPermission:Busy, desc="Blocked on a writeback, but don't remove from sharers when received"; OS, AccessPermission:Busy, desc="Blocked on a writeback"; OSS, AccessPermission:Busy, desc="Blocked on a writeback, but don't remove from sharers when received"; - XI_M, AccessPermission:Busy, desc="Blocked, going to I, waiting for the memory controller"; - XI_M_U, AccessPermission:Busy, desc="Blocked, going to XI_U, waiting for the memory controller"; - XI_U, AccessPermission:Busy, desc="Blocked, going to I, waiting for an unblock"; - OI_D, AccessPermission:Busy, desc="In O, going to I, waiting for data"; + // We have valid data in a TBE + WBI, AccessPermission:Read_Only, desc="Sent writeback, waiting for memory; will be I"; + WBS, AccessPermission:Read_Only, desc="Sent writeback, waiting for memory; will be S"; + XI_M, AccessPermission:Read_Only, desc="Blocked, going to I, waiting for the memory controller"; + XI_M_U, AccessPermission:Read_Only, desc="Blocked, going to XI_U, waiting for the memory controller"; + XI_U, AccessPermission:Read_Only, desc="Blocked, going to I, waiting for an unblock"; + OI_D, AccessPermission:Busy, desc="In O, going to I, waiting for data"; OD, AccessPermission:Busy, desc="In O, waiting for dma ack from L2"; MD, AccessPermission:Busy, desc="In M, waiting for dma ack from L2"; } @@ -105,6 +114,7 @@ machine(MachineType:Directory, "Directory protocol") DMA_WRITE_PARTIAL, desc="DMA Write partial line"; DMA_ACK, desc="DMA Ack"; Data, desc="Data to directory"; + All_Acks, desk="All pending acks, unblocks, etc have been received"; } // TYPES @@ -122,6 +132,8 @@ machine(MachineType:Directory, "Directory protocol") int Len, desc="Length of request"; DataBlock DataBlk, desc="DataBlk"; MachineID Requestor, desc="original requestor"; + bool WaitingWBAck, desc="DataBlk WB request sent, but no ack from mem yet"; + bool WaitingDMAAck, desc="DMA ack sent, waiting for unblock"; } structure(TBETable, external = "yes") { @@ -212,10 +224,10 @@ machine(MachineType:Directory, "Directory protocol") if (directory.isPresent(addr)) { Entry dir_entry := static_cast(Entry, "pointer", directory[addr]); if (is_valid(dir_entry)) { - DPRINTF(RubySlicc, "%s\n", Directory_State_to_permission(dir_entry.DirectoryState)); + DPRINTF(RubySlicc, "%s,%s\n", dir_entry.DirectoryState, Directory_State_to_permission(dir_entry.DirectoryState)); return Directory_State_to_permission(dir_entry.DirectoryState); } else { - DPRINTF(RubySlicc, "%s\n", Directory_State_to_permission(State:I)); + DPRINTF(RubySlicc, "%s,%s\n", State:I, Directory_State_to_permission(State:I)); return Directory_State_to_permission(State:I); } } @@ -235,11 +247,21 @@ machine(MachineType:Directory, "Directory protocol") } void functionalRead(Addr addr, Packet *pkt) { - functionalMemoryRead(pkt); + TBE tbe := TBEs[addr]; + if (is_valid(tbe) && tbe.WaitingWBAck) { + testAndRead(addr, tbe.DataBlk, pkt); + } else { + functionalMemoryRead(pkt); + } } int functionalWrite(Addr addr, Packet *pkt) { int num_functional_writes := 0; + TBE tbe := TBEs[addr]; + if (is_valid(tbe)) { + num_functional_writes := num_functional_writes + + testAndWrite(addr, tbe.DataBlk, pkt); + } num_functional_writes := num_functional_writes + functionalMemoryWrite(pkt); return num_functional_writes; } @@ -272,8 +294,23 @@ machine(MachineType:Directory, "Directory protocol") // For inserting internal unblocks only out_port(unblockNetwork_out_internal, ResponseMsg, responseToDir); + out_port(triggerQueue_out, TriggerMsg, triggerQueue); + // ** IN_PORTS ** + // Trigger Queue + in_port(triggerQueue_in, TriggerMsg, triggerQueue, rank=3) { + if (triggerQueue_in.isReady(clockEdge())) { + peek(triggerQueue_in, TriggerMsg) { + if (in_msg.Type == TriggerType:ALL_ACKS) { + trigger(Event:All_Acks, in_msg.addr, TBEs[in_msg.addr]); + } else { + error("Unexpected message"); + } + } + } + } + in_port(unblockNetwork_in, ResponseMsg, responseToDir, rank=2) { if (unblockNetwork_in.isReady(clockEdge())) { peek(unblockNetwork_in, ResponseMsg) { @@ -395,6 +432,18 @@ machine(MachineType:Directory, "Directory protocol") } } + action(clearDMA, "cD", desc="Clear DMA flag in TBE") { + assert(is_valid(tbe)); + assert(tbe.WaitingDMAAck); + tbe.WaitingDMAAck := false; + } + + action(clearWBAck, "cWb", desc="Clear WB ack flag in TBE") { + assert(is_valid(tbe)); + assert(tbe.WaitingWBAck); + tbe.WaitingWBAck := false; + } + action(c_clearOwner, "c", desc="Clear the owner field") { getDirectoryEntry(address).Owner.clear(); } @@ -509,6 +558,21 @@ machine(MachineType:Directory, "Directory protocol") unblockNetwork_in.dequeue(clockEdge()); } + action(popTriggerQueue, "pt", desc="Pop trigger queue.") { + triggerQueue_in.dequeue(clockEdge()); + } + + action(checkForCompletion, "\o", desc="Check if we have received all the messages required for completion") { + assert(is_valid(tbe)); + if ((tbe.WaitingDMAAck == false) && + (tbe.WaitingWBAck == false)) { + enqueue(triggerQueue_out, TriggerMsg) { + out_msg.addr := address; + out_msg.Type := TriggerType:ALL_ACKS; + } + } + } + action(m_addUnlockerToSharers, "m", desc="Add the unlocker to the sharer list") { peek(unblockNetwork_in, ResponseMsg) { if (in_msg.SenderMachine == MachineType:L2Cache) { @@ -544,25 +608,17 @@ machine(MachineType:Directory, "Directory protocol") action(qw_queueMemoryWBFromCacheRequest, "qw", desc="Queue off-chip writeback request") { peek(requestQueue_in, RequestMsg) { - if (is_valid(tbe)) { - enqueue(memQueue_out, MemoryMsg, to_memory_controller_latency) { - out_msg.addr := address; - out_msg.Type := MemoryRequestType:MEMORY_WB; - out_msg.Sender := tbe.Requestor; - out_msg.MessageSize := MessageSizeType:Writeback_Data; - out_msg.DataBlk := in_msg.DataBlk; - out_msg.Len := 0; - } - } else { - enqueue(memQueue_out, MemoryMsg, to_memory_controller_latency) { - out_msg.addr := address; - out_msg.Type := MemoryRequestType:MEMORY_WB; - out_msg.Sender := in_msg.Requestor; - out_msg.MessageSize := MessageSizeType:Writeback_Data; - out_msg.DataBlk := in_msg.DataBlk; - out_msg.Len := 0; - } + assert(is_valid(tbe)); + enqueue(memQueue_out, MemoryMsg, to_memory_controller_latency) { + out_msg.addr := address; + out_msg.Type := MemoryRequestType:MEMORY_WB; + out_msg.Sender := in_msg.Requestor; + out_msg.MessageSize := MessageSizeType:Writeback_Data; + out_msg.DataBlk := in_msg.DataBlk; + out_msg.Len := 0; } + tbe.DataBlk := in_msg.DataBlk; + tbe.WaitingWBAck := true; } } @@ -581,6 +637,8 @@ machine(MachineType:Directory, "Directory protocol") out_msg.DataBlk := DataBlk; out_msg.Len := 0; } + tbe.DataBlk := DataBlk; + tbe.WaitingWBAck := true; } } @@ -599,11 +657,14 @@ machine(MachineType:Directory, "Directory protocol") out_msg.DataBlk := DataBlk; out_msg.Len := 0; } + tbe.DataBlk := DataBlk; + tbe.WaitingWBAck := true; } } action(qw_queueMemoryWBFromDMARequest, "/qw", desc="Queue off-chip writeback request") { peek(requestQueue_in, RequestMsg) { + assert(is_valid(tbe)); enqueue(memQueue_out, MemoryMsg, to_memory_controller_latency) { out_msg.addr := address; out_msg.Type := MemoryRequestType:MEMORY_WB; @@ -612,6 +673,8 @@ machine(MachineType:Directory, "Directory protocol") out_msg.DataBlk := in_msg.DataBlk; out_msg.Len := 0; } + tbe.DataBlk := in_msg.DataBlk; + tbe.WaitingWBAck := true; } } @@ -622,6 +685,7 @@ machine(MachineType:Directory, "Directory protocol") action(a_sendDMAAckFromReq, "\a", desc="Send DMA Ack that write completed, along with Inv Ack count") { peek(requestQueue_in, RequestMsg) { enqueue(responseNetwork_out, ResponseMsg, 1) { + assert(is_valid(tbe)); out_msg.addr := address; out_msg.Sender := machineID; out_msg.SenderMachine := MachineType:Directory; @@ -629,6 +693,7 @@ machine(MachineType:Directory, "Directory protocol") out_msg.Acks := getDirectoryEntry(address).Sharers.count(); // for dma requests out_msg.Type := CoherenceResponseType:DMA_ACK; out_msg.MessageSize := MessageSizeType:Writeback_Control; + tbe.WaitingDMAAck := true; } } } @@ -643,28 +708,35 @@ machine(MachineType:Directory, "Directory protocol") out_msg.Acks := getDirectoryEntry(address).Sharers.count(); // for dma requests out_msg.Type := CoherenceResponseType:DMA_ACK; out_msg.MessageSize := MessageSizeType:Writeback_Control; + tbe.WaitingDMAAck := true; } } action(v_allocateTBE, "v", desc="Allocate TBE entry") { peek (requestQueue_in, RequestMsg) { + assert(is_valid(tbe) == false); TBEs.allocate(address); set_tbe(TBEs[address]); tbe.PhysicalAddress := in_msg.addr; tbe.Len := in_msg.Len; tbe.DataBlk := in_msg.DataBlk; tbe.Requestor := in_msg.Requestor; + tbe.WaitingWBAck := false; + tbe.WaitingDMAAck := false; } } action(w_deallocateTBE, "w", desc="Deallocate TBE entry") { + assert(is_valid(tbe)); + assert(tbe.WaitingWBAck == false); + assert(tbe.WaitingDMAAck == false); TBEs.deallocate(address); unset_tbe(); } // TRANSITIONS - transition(I, GETX, MM) { + transition(I, GETX, MM_M) { allocDirEntry; qf_queueMemoryFetchRequest; i_popIncomingRequestQueue; @@ -678,6 +750,7 @@ machine(MachineType:Directory, "Directory protocol") transition(I, DMA_WRITE_LINE, XI_U) { allocDirEntry; + v_allocateTBE; qw_queueMemoryWBFromDMARequest; a_sendDMAAckFromReq; // ack count may be zero i_popIncomingRequestQueue; @@ -693,7 +766,6 @@ machine(MachineType:Directory, "Directory protocol") transition(XI_M_U, Memory_Data_DMA, XI_U) { qw_queueMemoryWBFromMemResp; a_sendDMAAckFromTBE; // ack count may be zero - w_deallocateTBE; q_popMemQueue; } @@ -703,20 +775,34 @@ machine(MachineType:Directory, "Directory protocol") q_popMemQueue; } - transition(XI_U, Exclusive_Unblock, I) { + transition(XI_U, Exclusive_Unblock, XI_U) { cc_clearSharers; c_clearOwner; - deallocDirEntry; + clearDMA; + checkForCompletion; j_popIncomingUnblockQueue; } - transition(S, GETX, MM) { + transition(XI_U, Memory_Ack, XI_U) { + clearWBAck; + checkForCompletion; + q_popMemQueue; + } + + transition(XI_U, All_Acks, I) { + deallocDirEntry; + w_deallocateTBE; + popTriggerQueue; + } + + transition(S, GETX, MM_M) { qf_queueMemoryFetchRequest; g_sendInvalidations; i_popIncomingRequestQueue; } transition(S, DMA_WRITE_LINE, XI_U) { + v_allocateTBE; qw_queueMemoryWBFromDMARequest; a_sendDMAAckFromReq; // ack count may be zero g_sendInvalidations; // the DMA will collect invalidations @@ -730,7 +816,7 @@ machine(MachineType:Directory, "Directory protocol") i_popIncomingRequestQueue; } - transition(I, GETS, IS) { + transition(I, GETS, IS_M) { allocDirEntry; qf_queueMemoryFetchRequest; i_popIncomingRequestQueue; @@ -777,7 +863,6 @@ machine(MachineType:Directory, "Directory protocol") transition(OI_D, Data, XI_U) { qw_queueMemoryWBFromCacheResp; a_sendDMAAckFromTBE; // ack count may be zero - w_deallocateTBE; j_popIncomingUnblockQueue; } @@ -834,7 +919,7 @@ machine(MachineType:Directory, "Directory protocol") } - transition({MM, MO, MI, MIS, OS, OSS, XI_M, XI_M_U, XI_U, OI_D, OD, MD}, {GETS, GETX, PUTO, PUTO_SHARERS, PUTX, DMA_READ, DMA_WRITE_LINE, DMA_WRITE_PARTIAL}) { + transition({MM_M, MM, MO, MI, MIS, OS, OSS, WBI, WBS, XI_M, XI_M_U, XI_U, OI_D, OD, MD}, {GETS, GETX, PUTO, PUTO_SHARERS, PUTX, DMA_READ, DMA_WRITE_LINE, DMA_WRITE_PARTIAL}) { zz_recycleRequest; } @@ -849,11 +934,11 @@ machine(MachineType:Directory, "Directory protocol") j_popIncomingUnblockQueue; } - transition({IS, SS, OO}, {GETX, PUTO, PUTO_SHARERS, PUTX, DMA_WRITE_LINE,DMA_WRITE_PARTIAL}) { + transition({IS, IS_M, SS, OO}, {GETX, PUTO, PUTO_SHARERS, PUTX, DMA_WRITE_LINE,DMA_WRITE_PARTIAL}) { zz_recycleRequest; } - transition(IS, {GETS, DMA_READ}) { + transition({IS, IS_M}, {GETS, DMA_READ}) { zz_recycleRequest; } @@ -892,16 +977,24 @@ machine(MachineType:Directory, "Directory protocol") j_popIncomingUnblockQueue; } - transition(MI, Dirty_Writeback, I) { + transition(MI, Dirty_Writeback, WBI) { c_clearOwner; cc_clearSharers; + v_allocateTBE; qw_queueMemoryWBFromCacheRequest; - deallocDirEntry; i_popIncomingRequestQueue; } - transition(MIS, Dirty_Writeback, S) { + transition(WBI, Memory_Ack, I) { + clearWBAck; + w_deallocateTBE; + deallocDirEntry; + q_popMemQueue; + } + + transition(MIS, Dirty_Writeback, WBS) { c_moveOwnerToSharer; + v_allocateTBE; qw_queueMemoryWBFromCacheRequest; i_popIncomingRequestQueue; } @@ -911,18 +1004,26 @@ machine(MachineType:Directory, "Directory protocol") i_popIncomingRequestQueue; } - transition(OS, Dirty_Writeback, S) { + transition(OS, Dirty_Writeback, WBS) { c_clearOwner; + v_allocateTBE; qw_queueMemoryWBFromCacheRequest; i_popIncomingRequestQueue; } - transition(OSS, Dirty_Writeback, S) { + transition(OSS, Dirty_Writeback, WBS) { c_moveOwnerToSharer; + v_allocateTBE; qw_queueMemoryWBFromCacheRequest; i_popIncomingRequestQueue; } + transition(WBS, Memory_Ack, S) { + clearWBAck; + w_deallocateTBE; + q_popMemQueue; + } + transition(OSS, Clean_Writeback, S) { c_moveOwnerToSharer; i_popIncomingRequestQueue; @@ -940,15 +1041,17 @@ machine(MachineType:Directory, "Directory protocol") i_popIncomingRequestQueue; } - transition({MI, MIS}, Unblock, M) { - j_popIncomingUnblockQueue; + transition({S, O, M, SS, OO}, Memory_Data_Cache) { + d_sendDataMsg; + q_popMemQueue; } - transition({OS, OSS}, Unblock, O) { - j_popIncomingUnblockQueue; + transition(IS_M, Memory_Data_Cache, IS) { + d_sendDataMsg; + q_popMemQueue; } - transition({S, O, M, IS, SS, OO, MO, MM, MI, MIS, OS, OSS}, Memory_Data_Cache) { + transition(MM_M, Memory_Data_Cache, MM) { d_sendDataMsg; q_popMemQueue; } @@ -959,8 +1062,4 @@ machine(MachineType:Directory, "Directory protocol") q_popMemQueue; } - transition({I, S, O, M, IS, SS, OO, MO, MM, MI, MIS, OS, OSS, XI_U, XI_M, XI_M_U}, Memory_Ack) { - q_popMemQueue; - } - } diff --git a/src/mem/ruby/protocol/MOESI_CMP_directory-dma.sm b/src/mem/ruby/protocol/MOESI_CMP_directory-dma.sm index 1dc0c58be..5a52b60c3 100644 --- a/src/mem/ruby/protocol/MOESI_CMP_directory-dma.sm +++ b/src/mem/ruby/protocol/MOESI_CMP_directory-dma.sm @@ -100,6 +100,7 @@ machine(MachineType:DMA, "DMA Controller") } AccessPermission getAccessPermission(Addr addr) { + DPRINTF(RubySlicc, "AccessPermission_NotPresent\n"); return AccessPermission:NotPresent; } diff --git a/src/mem/ruby/protocol/MOESI_CMP_directory-msg.sm b/src/mem/ruby/protocol/MOESI_CMP_directory-msg.sm index 7dc582215..2dd34e4d9 100644 --- a/src/mem/ruby/protocol/MOESI_CMP_directory-msg.sm +++ b/src/mem/ruby/protocol/MOESI_CMP_directory-msg.sm @@ -109,9 +109,7 @@ structure(RequestMsg, desc="...", interface="Message") { bool functionalRead(Packet *pkt) { // Read only those messages that contain the data - if (Type == CoherenceRequestType:DMA_READ || - Type == CoherenceRequestType:DMA_WRITE || - Type == CoherenceRequestType:WRITEBACK_CLEAN_DATA || + if (Type == CoherenceRequestType:WRITEBACK_CLEAN_DATA || Type == CoherenceRequestType:WRITEBACK_DIRTY_DATA) { return testAndRead(addr, DataBlk, pkt); }