From 7246f70bfb7697b71d643334b7015b7274810b5f Mon Sep 17 00:00:00 2001 From: Matthew Poremba Date: Thu, 14 Jan 2021 19:03:00 -0600 Subject: [PATCH] mem-ruby: Fix race related to atomics in VIPER There is a race condition in VIPER where an atomic issued to the same address can occur resulting in multiple trigger messages signalling the compleition of the atomic operation. The first message was deallocating the TBE causing the second message to dereference a nullptr when looking up the TBE. A counter is added to track the number of in flight AtomicDone trigger messages. The AtomicDone is not called until the last in flight message arrives at the trigger queue. The remaining messages call AtomicNotDone which simply pops the message from the queue and keeps the TBE allocated. Change-Id: Ie1de0436861a7c393ad6d2fb2faceb83c18d4cc3 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/39175 Reviewed-by: Matt Sinclair Reviewed-by: Jason Lowe-Power Maintainer: Matt Sinclair Tested-by: kokoro --- src/mem/ruby/protocol/GPU_VIPER-TCC.sm | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/mem/ruby/protocol/GPU_VIPER-TCC.sm b/src/mem/ruby/protocol/GPU_VIPER-TCC.sm index 5edd7dbba..e21ba99fb 100644 --- a/src/mem/ruby/protocol/GPU_VIPER-TCC.sm +++ b/src/mem/ruby/protocol/GPU_VIPER-TCC.sm @@ -108,6 +108,7 @@ machine(MachineType:TCC, "TCC Cache") MachineID From, desc="Waiting for writeback from..."; NetDest Destination, desc="Data destination"; int numAtomics, desc="number remaining atomics"; + int atomicDoneCnt, desc="number AtomicDones triggered"; } structure(TBETable, external="yes") { @@ -256,9 +257,17 @@ machine(MachineType:TCC, "TCC Cache") peek(triggerQueue_in, TriggerMsg) { TBE tbe := TBEs.lookup(in_msg.addr); Entry cache_entry := getCacheEntry(in_msg.addr); - if (tbe.numAtomics == 0) { + + // There is a possible race where multiple AtomicDone triggers can be + // sent if another Atomic to the same address is issued after the + // AtomicDone is triggered but before the message arrives here. For + // that case we count the number of AtomicDones in flight for this + // address and only call AtomicDone to deallocate the TBE when it is + // the last in flight message. + if (tbe.numAtomics == 0 && tbe.atomicDoneCnt == 1) { trigger(Event:AtomicDone, in_msg.addr, cache_entry, tbe); } else { + tbe.atomicDoneCnt := tbe.atomicDoneCnt - 1; trigger(Event:AtomicNotDone, in_msg.addr, cache_entry, tbe); } } @@ -453,6 +462,7 @@ machine(MachineType:TCC, "TCC Cache") set_tbe(TBEs.lookup(address)); tbe.Destination.clear(); tbe.numAtomics := 0; + tbe.atomicDoneCnt := 0; } if (coreRequestNetwork_in.isReady(clockEdge())) { peek(coreRequestNetwork_in, CPURequestMsg) { @@ -573,6 +583,7 @@ machine(MachineType:TCC, "TCC Cache") tbe.numAtomics := tbe.numAtomics - 1; if (tbe.numAtomics==0) { enqueue(triggerQueue_out, TriggerMsg, 1) { + tbe.atomicDoneCnt := tbe.atomicDoneCnt + 1; out_msg.addr := address; out_msg.Type := TriggerType:AtomicDone; } -- 2.30.2