2 * Copyright (c) 2015-2016 ARM Limited
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39 * The interface class for dist-gem5 simulations.
42 #include "dev/net/dist_iface.hh"
47 #include "base/random.hh"
48 #include "base/trace.hh"
49 #include "cpu/thread_context.hh"
50 #include "debug/DistEthernet.hh"
51 #include "debug/DistEthernetPkt.hh"
52 #include "dev/net/etherpkt.hh"
53 #include "sim/sim_exit.hh"
54 #include "sim/sim_object.hh"
55 #include "sim/system.hh"
58 DistIface::Sync
*DistIface::sync
= nullptr;
59 System
*DistIface::sys
= nullptr;
60 DistIface::SyncEvent
*DistIface::syncEvent
= nullptr;
61 unsigned DistIface::distIfaceNum
= 0;
62 unsigned DistIface::recvThreadsNum
= 0;
63 DistIface
*DistIface::primary
= nullptr;
64 bool DistIface::isSwitch
= false;
67 DistIface::Sync::init(Tick start_tick
, Tick repeat_tick
)
69 if (start_tick
< nextAt
) {
71 inform("Next dist synchronisation tick is changed to %lu.\n", nextAt
);
75 panic("Dist synchronisation interval must be greater than zero");
77 if (repeat_tick
< nextRepeat
) {
78 nextRepeat
= repeat_tick
;
79 inform("Dist synchronisation interval is changed to %lu.\n",
85 DistIface::Sync::abort()
87 std::unique_lock
<std::mutex
> sync_lock(lock
);
94 DistIface::SyncSwitch::SyncSwitch(int num_nodes
)
104 nextAt
= std::numeric_limits
<Tick
>::max();
105 nextRepeat
= std::numeric_limits
<Tick
>::max();
109 DistIface::SyncNode::SyncNode()
112 needExit
= ReqType::none
;
113 needCkpt
= ReqType::none
;
114 needStopSync
= ReqType::none
;
118 nextAt
= std::numeric_limits
<Tick
>::max();
119 nextRepeat
= std::numeric_limits
<Tick
>::max();
124 DistIface::SyncNode::run(bool same_tick
)
126 std::unique_lock
<std::mutex
> sync_lock(lock
);
129 assert(waitNum
== 0);
131 waitNum
= DistIface::recvThreadsNum
;
132 // initiate the global synchronisation
133 header
.msgType
= MsgType::cmdSyncReq
;
134 header
.sendTick
= curTick();
135 header
.syncRepeat
= nextRepeat
;
136 header
.needCkpt
= needCkpt
;
137 header
.needStopSync
= needStopSync
;
138 if (needCkpt
!= ReqType::none
)
139 needCkpt
= ReqType::pending
;
140 header
.needExit
= needExit
;
141 if (needExit
!= ReqType::none
)
142 needExit
= ReqType::pending
;
143 if (needStopSync
!= ReqType::none
)
144 needStopSync
= ReqType::pending
;
145 DistIface::primary
->sendCmd(header
);
146 // now wait until all receiver threads complete the synchronisation
147 auto lf
= [this]{ return waitNum
== 0; };
148 cv
.wait(sync_lock
, lf
);
149 // global synchronisation is done.
150 assert(isAbort
|| !same_tick
|| (nextAt
== curTick()));
156 DistIface::SyncSwitch::run(bool same_tick
)
158 std::unique_lock
<std::mutex
> sync_lock(lock
);
160 // Wait for the sync requests from the nodes
162 auto lf
= [this]{ return waitNum
== 0; };
163 cv
.wait(sync_lock
, lf
);
165 assert(waitNum
== 0);
166 if (isAbort
) // sync aborted
168 assert(!same_tick
|| (nextAt
== curTick()));
170 // Complete the global synchronisation
171 header
.msgType
= MsgType::cmdSyncAck
;
172 header
.sendTick
= nextAt
;
173 header
.syncRepeat
= nextRepeat
;
174 if (doCkpt
|| numCkptReq
== numNodes
) {
176 header
.needCkpt
= ReqType::immediate
;
179 header
.needCkpt
= ReqType::none
;
181 if (doExit
|| numExitReq
== numNodes
) {
183 header
.needExit
= ReqType::immediate
;
185 header
.needExit
= ReqType::none
;
187 if (doStopSync
|| numStopSyncReq
== numNodes
) {
190 header
.needStopSync
= ReqType::immediate
;
192 header
.needStopSync
= ReqType::none
;
194 DistIface::primary
->sendCmd(header
);
199 DistIface::SyncSwitch::progress(Tick send_tick
,
203 ReqType need_stop_sync
)
205 std::unique_lock
<std::mutex
> sync_lock(lock
);
206 if (isAbort
) // sync aborted
210 if (send_tick
> nextAt
)
212 if (nextRepeat
> sync_repeat
)
213 nextRepeat
= sync_repeat
;
215 if (need_ckpt
== ReqType::collective
)
217 else if (need_ckpt
== ReqType::immediate
)
219 if (need_exit
== ReqType::collective
)
221 else if (need_exit
== ReqType::immediate
)
223 if (need_stop_sync
== ReqType::collective
)
225 else if (need_stop_sync
== ReqType::immediate
)
229 // Notify the simulation thread if the on-going sync is complete
234 // The receive thread must keep alive in the switch until the node
235 // closes the connection. Thus, we always return true here.
240 DistIface::SyncNode::progress(Tick max_send_tick
,
244 ReqType do_stop_sync
)
246 std::unique_lock
<std::mutex
> sync_lock(lock
);
247 if (isAbort
) // sync aborted
251 nextAt
= max_send_tick
;
252 nextRepeat
= next_repeat
;
253 doCkpt
= (do_ckpt
!= ReqType::none
);
254 doExit
= (do_exit
!= ReqType::none
);
255 doStopSync
= (do_stop_sync
!= ReqType::none
);
258 // Notify the simulation thread if the on-going sync is complete
263 // The receive thread must finish when simulation is about to exit
268 DistIface::SyncNode::requestCkpt(ReqType req
)
270 std::lock_guard
<std::mutex
> sync_lock(lock
);
271 assert(req
!= ReqType::none
);
272 if (needCkpt
!= ReqType::none
)
273 warn("Ckpt requested multiple times (req:%d)\n", static_cast<int>(req
));
274 if (needCkpt
== ReqType::none
|| req
== ReqType::immediate
)
279 DistIface::SyncNode::requestExit(ReqType req
)
281 std::lock_guard
<std::mutex
> sync_lock(lock
);
282 assert(req
!= ReqType::none
);
283 if (needExit
!= ReqType::none
)
284 warn("Exit requested multiple times (req:%d)\n", static_cast<int>(req
));
285 if (needExit
== ReqType::none
|| req
== ReqType::immediate
)
290 DistIface::Sync::drainComplete()
293 // The first DistIface object called this right before writing the
294 // checkpoint. We need to drain the underlying physical network here.
295 // Note that other gem5 peers may enter this barrier at different
296 // ticks due to draining.
298 // Only the "first" DistIface object has to perform the sync
304 DistIface::SyncNode::serialize(CheckpointOut
&cp
) const
306 int need_exit
= static_cast<int>(needExit
);
307 SERIALIZE_SCALAR(need_exit
);
311 DistIface::SyncNode::unserialize(CheckpointIn
&cp
)
314 UNSERIALIZE_SCALAR(need_exit
);
315 needExit
= static_cast<ReqType
>(need_exit
);
319 DistIface::SyncSwitch::serialize(CheckpointOut
&cp
) const
321 SERIALIZE_SCALAR(numExitReq
);
325 DistIface::SyncSwitch::unserialize(CheckpointIn
&cp
)
327 UNSERIALIZE_SCALAR(numExitReq
);
331 DistIface::SyncEvent::start()
333 // Note that this may be called either from startup() or drainResume()
335 // At this point, all DistIface objects has already called Sync::init() so
336 // we have a local minimum of the start tick and repeat for the periodic
338 repeat
= DistIface::sync
->nextRepeat
;
339 // Do a global barrier to agree on a common repeat value (the smallest
340 // one from all participating nodes.
341 if (!DistIface::sync
->run(false))
342 panic("DistIface::SyncEvent::start() aborted\n");
344 assert(!DistIface::sync
->doCkpt
);
345 assert(!DistIface::sync
->doExit
);
346 assert(!DistIface::sync
->doStopSync
);
347 assert(DistIface::sync
->nextAt
>= curTick());
348 assert(DistIface::sync
->nextRepeat
<= repeat
);
351 assert(!scheduled());
353 // Use the maximum of the current tick for all participating nodes or a
354 // user provided starting tick.
356 reschedule(DistIface::sync
->nextAt
);
358 schedule(DistIface::sync
->nextAt
);
360 inform("Dist sync scheduled at %lu and repeats %lu\n", when(),
361 DistIface::sync
->nextRepeat
);
365 DistIface::SyncEvent::process()
367 // We may not start a global periodic sync while draining before taking a
368 // checkpoint. This is due to the possibility that peer gem5 processes
369 // may not hit the same periodic sync before they complete draining and
370 // that would make this periodic sync clash with sync called from
371 // DistIface::serialize() by other gem5 processes.
372 // We would need a 'distributed drain' solution to eliminate this
374 // Note that if draining was not triggered by checkpointing then we are
375 // fine since no extra global sync will happen (i.e. all peer gem5 will
376 // hit this periodic sync eventually).
377 panic_if(_draining
&& DistIface::sync
->doCkpt
,
378 "Distributed sync is hit while draining");
380 * Note that this is a global event so this process method will be called
381 * by only exactly one thread.
384 * We hold the eventq lock at this point but the receiver thread may
385 * need the lock to schedule new recv events while waiting for the
386 * dist sync to complete.
387 * Note that the other simulation threads also release their eventq
388 * locks while waiting for us due to the global event semantics.
391 EventQueue::ScopedRelease
sr(curEventQueue());
392 // we do a global sync here that is supposed to happen at the same
393 // tick in all gem5 peers
394 if (!DistIface::sync
->run(true))
395 return; // global sync aborted
396 // global sync completed
398 if (DistIface::sync
->doCkpt
)
399 exitSimLoop("checkpoint");
400 if (DistIface::sync
->doExit
) {
401 exitSimLoop("exit request from gem5 peers");
404 if (DistIface::sync
->doStopSync
) {
405 DistIface::sync
->doStopSync
= false;
406 inform("synchronization disabled at %lu\n", curTick());
408 // The switch node needs to wait for the next sync immediately.
409 if (DistIface::isSwitch
) {
412 // Wake up thread contexts on non-switch nodes.
413 for (auto *tc
: primary
->sys
->threads
) {
414 if (tc
->status() == ThreadContext::Suspended
)
417 warn_once("Tried to wake up thread in dist-gem5, but it "
418 "was already awake!\n");
423 // schedule the next periodic sync
424 repeat
= DistIface::sync
->nextRepeat
;
425 schedule(curTick() + repeat
);
429 DistIface::RecvScheduler::init(Event
*recv_done
, Tick link_delay
)
431 // This is called from the receiver thread when it starts running. The new
432 // receiver thread shares the event queue with the simulation thread
433 // (associated with the simulated Ethernet link).
434 curEventQueue(eventManager
->eventQueue());
436 recvDone
= recv_done
;
437 linkDelay
= link_delay
;
441 DistIface::RecvScheduler::calcReceiveTick(Tick send_tick
,
445 Tick recv_tick
= send_tick
+ send_delay
+ linkDelay
;
446 // sanity check (we need atleast a send delay long window)
447 assert(recv_tick
>= prev_recv_tick
+ send_delay
);
448 panic_if(prev_recv_tick
+ send_delay
> recv_tick
,
449 "Receive window is smaller than send delay");
450 panic_if(recv_tick
<= curTick(),
451 "Simulators out of sync - missed packet receive by %llu ticks"
452 "(rev_recv_tick: %lu send_tick: %lu send_delay: %lu "
454 curTick() - recv_tick
, prev_recv_tick
, send_tick
, send_delay
,
461 DistIface::RecvScheduler::resumeRecvTicks()
463 // Schedule pending packets asap in case link speed/delay changed when
464 // restoring from the checkpoint.
465 // This may be done during unserialize except that curTick() is unknown
466 // so we call this during drainResume().
467 // If we are not restoring from a checkppint then link latency could not
468 // change so we just return.
473 while (!descQueue
.empty()) {
474 Desc d
= descQueue
.front();
476 d
.sendTick
= curTick();
477 d
.sendDelay
= d
.packet
->simLength
; // assume 1 tick/byte max link speed
484 if (recvDone
->scheduled()) {
485 assert(!descQueue
.empty());
486 eventManager
->reschedule(recvDone
, curTick());
488 assert(descQueue
.empty() && v
.empty());
494 DistIface::RecvScheduler::pushPacket(EthPacketPtr new_packet
,
498 // Note : this is called from the receiver thread
499 curEventQueue()->lock();
500 Tick recv_tick
= calcReceiveTick(send_tick
, send_delay
, prevRecvTick
);
502 DPRINTF(DistEthernetPkt
, "DistIface::recvScheduler::pushPacket "
503 "send_tick:%llu send_delay:%llu link_delay:%llu recv_tick:%llu\n",
504 send_tick
, send_delay
, linkDelay
, recv_tick
);
505 // Every packet must be sent and arrive in the same quantum
506 assert(send_tick
> primary
->syncEvent
->when() -
507 primary
->syncEvent
->repeat
);
508 // No packet may be scheduled for receive in the arrival quantum
509 assert(send_tick
+ send_delay
+ linkDelay
> primary
->syncEvent
->when());
511 // Now we are about to schedule a recvDone event for the new data packet.
512 // We use the same recvDone object for all incoming data packets. Packet
513 // descriptors are saved in the ordered queue. The currently scheduled
514 // packet is always on the top of the queue.
515 // NOTE: we use the event queue lock to protect the receive desc queue,
516 // too, which is accessed both by the receiver thread and the simulation
518 descQueue
.emplace(new_packet
, send_tick
, send_delay
);
519 if (descQueue
.size() == 1) {
520 assert(!recvDone
->scheduled());
521 eventManager
->schedule(recvDone
, recv_tick
);
523 assert(recvDone
->scheduled());
524 panic_if(descQueue
.front().sendTick
+ descQueue
.front().sendDelay
> recv_tick
,
525 "Out of order packet received (recv_tick: %lu top(): %lu\n",
526 recv_tick
, descQueue
.front().sendTick
+ descQueue
.front().sendDelay
);
528 curEventQueue()->unlock();
532 DistIface::RecvScheduler::popPacket()
534 // Note : this is called from the simulation thread when a receive done
535 // event is being processed for the link. We assume that the thread holds
536 // the event queue queue lock when this is called!
537 EthPacketPtr next_packet
= descQueue
.front().packet
;
540 if (descQueue
.size() > 0) {
541 Tick recv_tick
= calcReceiveTick(descQueue
.front().sendTick
,
542 descQueue
.front().sendDelay
,
544 eventManager
->schedule(recvDone
, recv_tick
);
546 prevRecvTick
= curTick();
551 DistIface::RecvScheduler::Desc::serialize(CheckpointOut
&cp
) const
553 SERIALIZE_SCALAR(sendTick
);
554 SERIALIZE_SCALAR(sendDelay
);
555 packet
->serialize("rxPacket", cp
);
559 DistIface::RecvScheduler::Desc::unserialize(CheckpointIn
&cp
)
561 UNSERIALIZE_SCALAR(sendTick
);
562 UNSERIALIZE_SCALAR(sendDelay
);
563 packet
= std::make_shared
<EthPacketData
>();
564 packet
->unserialize("rxPacket", cp
);
568 DistIface::RecvScheduler::serialize(CheckpointOut
&cp
) const
570 SERIALIZE_SCALAR(prevRecvTick
);
571 // serialize the receive desc queue
572 std::queue
<Desc
> tmp_queue(descQueue
);
573 unsigned n_desc_queue
= descQueue
.size();
574 assert(tmp_queue
.size() == descQueue
.size());
575 SERIALIZE_SCALAR(n_desc_queue
);
576 for (int i
= 0; i
< n_desc_queue
; i
++) {
577 tmp_queue
.front().serializeSection(cp
, csprintf("rxDesc_%d", i
));
580 assert(tmp_queue
.empty());
584 DistIface::RecvScheduler::unserialize(CheckpointIn
&cp
)
586 assert(descQueue
.size() == 0);
587 assert(!recvDone
->scheduled());
588 assert(!ckptRestore
);
590 UNSERIALIZE_SCALAR(prevRecvTick
);
591 // unserialize the receive desc queue
592 unsigned n_desc_queue
;
593 UNSERIALIZE_SCALAR(n_desc_queue
);
594 for (int i
= 0; i
< n_desc_queue
; i
++) {
596 recv_desc
.unserializeSection(cp
, csprintf("rxDesc_%d", i
));
597 descQueue
.push(recv_desc
);
602 DistIface::DistIface(unsigned dist_rank
,
608 bool is_switch
, int num_nodes
) :
609 syncStart(sync_start
), syncRepeat(sync_repeat
),
610 recvThread(nullptr), recvScheduler(em
), syncStartOnPseudoOp(use_pseudo_op
),
611 rank(dist_rank
), size(dist_size
)
613 DPRINTF(DistEthernet
, "DistIface() ctor rank:%d\n",dist_rank
);
615 if (primary
== nullptr) {
616 assert(sync
== nullptr);
617 assert(syncEvent
== nullptr);
618 isSwitch
= is_switch
;
620 sync
= new SyncSwitch(num_nodes
);
622 sync
= new SyncNode();
623 syncEvent
= new SyncEvent();
627 distIfaceId
= distIfaceNum
;
631 DistIface::~DistIface()
636 if (distIfaceNum
-- == 0) {
647 DistIface::packetOut(EthPacketPtr pkt
, Tick send_delay
)
651 // Prepare a dist header packet for the Ethernet packet we want to
653 header
.msgType
= MsgType::dataDescriptor
;
654 header
.sendTick
= curTick();
655 header
.sendDelay
= send_delay
;
657 header
.dataPacketLength
= pkt
->length
;
658 header
.simLength
= pkt
->simLength
;
660 // Send out the packet and the meta info.
661 sendPacket(header
, pkt
);
663 DPRINTF(DistEthernetPkt
,
664 "DistIface::sendDataPacket() done size:%d send_delay:%llu\n",
665 pkt
->length
, send_delay
);
669 DistIface::recvThreadFunc(Event
*recv_done
, Tick link_delay
)
671 EthPacketPtr new_packet
;
672 DistHeaderPkt::Header header
;
674 // Initialize receive scheduler parameters
675 recvScheduler
.init(recv_done
, link_delay
);
677 // Main loop to wait for and process any incoming message.
679 // recvHeader() blocks until the next dist header packet comes in.
680 if (!recvHeader(header
)) {
681 // We lost connection to the peer gem5 processes most likely
682 // because one of them called m5 exit. So we stop here.
683 // Grab the eventq lock to stop the simulation thread
684 curEventQueue()->lock();
685 exitSimLoop("connection to gem5 peer got closed");
686 curEventQueue()->unlock();
687 // The simulation thread may be blocked in processing an on-going
688 // global synchronisation. Abort the sync to give the simulation
689 // thread a chance to make progress and process the exit event.
691 // Finish receiver thread
695 // We got a valid dist header packet, let's process it
696 if (header
.msgType
== MsgType::dataDescriptor
) {
697 recvPacket(header
, new_packet
);
698 recvScheduler
.pushPacket(new_packet
,
702 // everything else must be synchronisation related command
703 if (!sync
->progress(header
.sendTick
,
707 header
.needStopSync
))
708 // Finish receiver thread if simulation is about to exit
715 DistIface::spawnRecvThread(const Event
*recv_done
, Tick link_delay
)
717 assert(recvThread
== nullptr);
719 recvThread
= new std::thread(&DistIface::recvThreadFunc
,
721 const_cast<Event
*>(recv_done
),
729 DPRINTF(DistEthernet
,"DistIFace::drain() called\n");
730 // This can be called multiple times in the same drain cycle.
732 syncEvent
->draining(true);
733 return DrainState::Drained
;
737 DistIface::drainResume() {
738 DPRINTF(DistEthernet
,"DistIFace::drainResume() called\n");
740 syncEvent
->draining(false);
741 recvScheduler
.resumeRecvTicks();
745 DistIface::serialize(CheckpointOut
&cp
) const
747 // Drain the dist interface before the checkpoint is taken. We cannot call
748 // this as part of the normal drain cycle because this dist sync has to be
749 // called exactly once after the system is fully drained.
750 sync
->drainComplete();
752 unsigned rank_orig
= rank
, dist_iface_id_orig
= distIfaceId
;
754 SERIALIZE_SCALAR(rank_orig
);
755 SERIALIZE_SCALAR(dist_iface_id_orig
);
757 recvScheduler
.serializeSection(cp
, "recvScheduler");
758 if (this == primary
) {
759 sync
->serializeSection(cp
, "Sync");
764 DistIface::unserialize(CheckpointIn
&cp
)
766 unsigned rank_orig
, dist_iface_id_orig
;
767 UNSERIALIZE_SCALAR(rank_orig
);
768 UNSERIALIZE_SCALAR(dist_iface_id_orig
);
770 panic_if(rank
!= rank_orig
, "Rank mismatch at resume (rank=%d, orig=%d)",
772 panic_if(distIfaceId
!= dist_iface_id_orig
, "Dist iface ID mismatch "
773 "at resume (distIfaceId=%d, orig=%d)", distIfaceId
,
776 recvScheduler
.unserializeSection(cp
, "recvScheduler");
777 if (this == primary
) {
778 sync
->unserializeSection(cp
, "Sync");
783 DistIface::init(const Event
*done_event
, Tick link_delay
)
785 // Init hook for the underlaying message transport to setup/finalize
786 // communication channels
789 // Spawn a new receiver thread that will process messages
790 // coming in from peer gem5 processes.
791 // The receive thread will also schedule a (receive) doneEvent
792 // for each incoming data packet.
793 spawnRecvThread(done_event
, link_delay
);
796 // Adjust the periodic sync start and interval. Different DistIface
797 // might have different requirements. The singleton sync object
798 // will select the minimum values for both params.
799 assert(sync
!= nullptr);
800 sync
->init(syncStart
, syncRepeat
);
802 // Initialize the seed for random generator to avoid the same sequence
803 // in all gem5 peer processes
804 assert(primary
!= nullptr);
806 random_mt
.init(5489 * (rank
+1) + 257);
812 DPRINTF(DistEthernet
, "DistIface::startup() started\n");
813 // Schedule synchronization unless we are not a switch in pseudo_op mode.
814 if (this == primary
&& (!syncStartOnPseudoOp
|| isSwitch
))
816 DPRINTF(DistEthernet
, "DistIface::startup() done\n");
820 DistIface::readyToCkpt(Tick delay
, Tick period
)
823 DPRINTF(DistEthernet
, "DistIface::readyToCkpt() called, delay:%lu "
824 "period:%lu\n", delay
, period
);
827 inform("m5 checkpoint called with zero delay => triggering collaborative "
829 sync
->requestCkpt(ReqType::collective
);
831 inform("m5 checkpoint called with non-zero delay => triggering immediate "
832 "checkpoint (at the next sync)\n");
833 sync
->requestCkpt(ReqType::immediate
);
836 inform("Non-zero period for m5_ckpt is ignored in "
837 "distributed gem5 runs\n");
844 DistIface::SyncNode::requestStopSync(ReqType req
)
846 std::lock_guard
<std::mutex
> sync_lock(lock
);
851 DistIface::toggleSync(ThreadContext
*tc
)
853 // Unforunate that we have to populate the system pointer member this way.
854 primary
->sys
= tc
->getSystemPtr();
856 // The invariant for both syncing and "unsyncing" is that all threads will
857 // stop executing intructions until the desired sync state has been reached
858 // for all nodes. This is the easiest way to prevent deadlock (in the case
859 // of "unsyncing") and causality errors (in the case of syncing).
860 if (primary
->syncEvent
->scheduled()) {
861 inform("Request toggling syncronization off\n");
862 primary
->sync
->requestStopSync(ReqType::collective
);
864 // At this point, we have no clue when everyone will reach the sync
865 // stop point. Suspend execution of all local thread contexts.
866 // Dist-gem5 will reactivate all thread contexts when everyone has
867 // reached the sync stop point.
868 #if THE_ISA != NULL_ISA
869 for (auto *tc
: primary
->sys
->threads
) {
870 if (tc
->status() == ThreadContext::Active
)
875 inform("Request toggling syncronization on\n");
876 primary
->syncEvent
->start();
878 // We need to suspend all CPUs until the sync point is reached by all
879 // nodes to prevent causality errors. We can also schedule CPU
880 // activation here, since we know exactly when the next sync will
882 #if THE_ISA != NULL_ISA
883 for (auto *tc
: primary
->sys
->threads
) {
884 if (tc
->status() == ThreadContext::Active
)
885 tc
->quiesceTick(primary
->syncEvent
->when() + 1);
892 DistIface::readyToExit(Tick delay
)
895 DPRINTF(DistEthernet
, "DistIface::readyToExit() called, delay:%lu\n",
898 // To successfully coordinate an exit, all nodes must be synchronising
899 if (!primary
->syncEvent
->scheduled())
900 primary
->syncEvent
->start();
903 inform("m5 exit called with zero delay => triggering collaborative "
905 sync
->requestExit(ReqType::collective
);
907 inform("m5 exit called with non-zero delay => triggering immediate "
908 "exit (at the next sync)\n");
909 sync
->requestExit(ReqType::immediate
);
917 DistIface::rankParam()
923 warn("Dist-rank parameter is queried in single gem5 simulation.");
930 DistIface::sizeParam()
936 warn("Dist-size parameter is queried in single gem5 simulation.");