misc: Replaced master/slave terminology
[gem5.git] / src / dev / net / dist_iface.hh
1 /*
2 * Copyright (c) 2015-2016 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /* @file
39 * The interface class for dist gem5 simulations.
40 *
41 * dist-gem5 is an extension to gem5 to enable parallel simulation of a
42 * distributed system (e.g. simulation of a pool of machines
43 * connected by Ethernet links). A dist gem5 run consists of seperate gem5
44 * processes running in parallel. Each gem5 process executes
45 * the simulation of a component of the simulated distributed system.
46 * (An example component can be a dist-core board with an Ethernet NIC.)
47 * The DistIface class below provides services to transfer data and
48 * control messages among the gem5 processes. The main such services are
49 * as follows.
50 *
51 * 1. Send a data packet coming from a simulated Ethernet link. The packet
52 * will be transferred to (all) the target(s) gem5 processes. The send
53 * operation is always performed by the simulation thread, i.e. the gem5
54 * thread that is processing the event queue associated with the simulated
55 * Ethernet link.
56 *
57 * 2. Spawn a receiver thread to process messages coming in from the
58 * from other gem5 processes. Each simulated Ethernet link has its own
59 * associated receiver thread. The receiver thread saves the incoming packet
60 * and schedule an appropriate receive event in the event queue.
61 *
62 * 3. Schedule a global barrier event periodically to keep the gem5
63 * processes in sync.
64 * Periodic barrier event to keep peer gem5 processes in sync. The basic idea
65 * is that no gem5 process can go ahead further than the simulated link
66 * transmission delay to ensure that a corresponding receive event can always
67 * be scheduled for any message coming in from a peer gem5 process.
68 *
69 *
70 *
71 * This interface is an abstract class. It can work with various low level
72 * send/receive service implementations (e.g. TCP/IP, MPI,...). A TCP
73 * stream socket version is implemented in src/dev/net/tcp_iface.[hh,cc].
74 */
75 #ifndef __DEV_DIST_IFACE_HH__
76 #define __DEV_DIST_IFACE_HH__
77
78 #include <array>
79 #include <mutex>
80 #include <queue>
81 #include <thread>
82 #include <utility>
83
84 #include "base/logging.hh"
85 #include "dev/net/dist_packet.hh"
86 #include "dev/net/etherpkt.hh"
87 #include "sim/core.hh"
88 #include "sim/drain.hh"
89 #include "sim/global_event.hh"
90 #include "sim/serialize.hh"
91
92 class EventManager;
93 class System;
94 class ThreadContext;
95
96 /**
97 * The interface class to talk to peer gem5 processes.
98 */
99 class DistIface : public Drainable, public Serializable
100 {
101 public:
102 typedef DistHeaderPkt::Header Header;
103
104 protected:
105 typedef DistHeaderPkt::MsgType MsgType;
106 typedef DistHeaderPkt::ReqType ReqType;
107
108 private:
109 class SyncEvent;
110 /** @class Sync
111 * This class implements global sync operations among gem5 peer processes.
112 *
113 * @note This class is used as a singleton object (shared by all DistIface
114 * objects).
115 */
116 class Sync : public Serializable
117 {
118 protected:
119 /**
120 * The lock to protect access to the Sync object.
121 */
122 std::mutex lock;
123 /**
124 * Condition variable for the simulation thread to wait on
125 * until all receiver threads completes the current global
126 * synchronisation.
127 */
128 std::condition_variable cv;
129 /**
130 * Number of receiver threads that not yet completed the current global
131 * synchronisation.
132 */
133 unsigned waitNum;
134 /**
135 * Flag is set if exit is permitted upon sync completion
136 */
137 bool doExit;
138 /**
139 * Flag is set if taking a ckpt is permitted upon sync completion
140 */
141 bool doCkpt;
142 /**
143 * Flag is set if sync is to stop upon sync completion
144 */
145 bool doStopSync;
146 /**
147 * The repeat value for the next periodic sync
148 */
149 Tick nextRepeat;
150 /**
151 * Tick for the next periodic sync (if the event is not scheduled yet)
152 */
153 Tick nextAt;
154 /**
155 * Flag is set if the sync is aborted (e.g. due to connection lost)
156 */
157 bool isAbort;
158
159 friend class SyncEvent;
160
161 public:
162 /**
163 * Initialize periodic sync params.
164 *
165 * @param start Start tick for dist synchronisation
166 * @param repeat Frequency of dist synchronisation
167 *
168 */
169 void init(Tick start, Tick repeat);
170 /**
171 * Core method to perform a full dist sync.
172 *
173 * @return true if the sync completes, false if it gets aborted
174 */
175 virtual bool run(bool same_tick) = 0;
176 /**
177 * Callback when the receiver thread gets a sync ack message.
178 *
179 * @return false if the receiver thread needs to stop (e.g.
180 * simulation is to exit)
181 */
182 virtual bool progress(Tick send_tick,
183 Tick next_repeat,
184 ReqType do_ckpt,
185 ReqType do_exit,
186 ReqType do_stop_sync) = 0;
187 /**
188 * Abort processing an on-going sync event (in case of an error, e.g.
189 * lost connection to a peer gem5)
190 */
191 void abort();
192
193 virtual void requestCkpt(ReqType req) = 0;
194 virtual void requestExit(ReqType req) = 0;
195 virtual void requestStopSync(ReqType req) = 0;
196
197 void drainComplete();
198
199 virtual void serialize(CheckpointOut &cp) const override = 0;
200 virtual void unserialize(CheckpointIn &cp) override = 0;
201 };
202
203 class SyncNode: public Sync
204 {
205 private:
206 /**
207 * Exit requested
208 */
209 ReqType needExit;
210 /**
211 * Ckpt requested
212 */
213 ReqType needCkpt;
214 /**
215 * Sync stop requested
216 */
217 ReqType needStopSync;
218
219 public:
220
221 SyncNode();
222 ~SyncNode() {}
223 bool run(bool same_tick) override;
224 bool progress(Tick max_req_tick,
225 Tick next_repeat,
226 ReqType do_ckpt,
227 ReqType do_exit,
228 ReqType do_stop_sync) override;
229
230 void requestCkpt(ReqType req) override;
231 void requestExit(ReqType req) override;
232 void requestStopSync(ReqType req) override;
233
234 void serialize(CheckpointOut &cp) const override;
235 void unserialize(CheckpointIn &cp) override;
236 };
237
238 class SyncSwitch: public Sync
239 {
240 private:
241 /**
242 * Counter for recording exit requests
243 */
244 unsigned numExitReq;
245 /**
246 * Counter for recording ckpt requests
247 */
248 unsigned numCkptReq;
249 /**
250 * Counter for recording stop sync requests
251 */
252 unsigned numStopSyncReq;
253 /**
254 * Number of connected simulated nodes
255 */
256 unsigned numNodes;
257
258 public:
259 SyncSwitch(int num_nodes);
260 ~SyncSwitch() {}
261
262 bool run(bool same_tick) override;
263 bool progress(Tick max_req_tick,
264 Tick next_repeat,
265 ReqType do_ckpt,
266 ReqType do_exit,
267 ReqType do_stop_sync) override;
268
269 void requestCkpt(ReqType) override {
270 panic("Switch requested checkpoint");
271 }
272 void requestExit(ReqType) override {
273 panic("Switch requested exit");
274 }
275 void requestStopSync(ReqType) override {
276 panic("Switch requested stop sync");
277 }
278
279 void serialize(CheckpointOut &cp) const override;
280 void unserialize(CheckpointIn &cp) override;
281 };
282
283 /**
284 * The global event to schedule periodic dist sync. It is used as a
285 * singleton object.
286 *
287 * The periodic synchronisation works as follows.
288 * 1. A SyncEvent is scheduled as a global event when startup() is
289 * called.
290 * 2. The process() method of the SyncEvent initiates a new barrier
291 * for each simulated Ethernet link.
292 * 3. Simulation thread(s) then waits until all receiver threads
293 * complete the ongoing barrier. The global sync event is done.
294 */
295 class SyncEvent : public GlobalSyncEvent
296 {
297 private:
298 /**
299 * Flag to set when the system is draining
300 */
301 bool _draining;
302 public:
303 /**
304 * Only the firstly instantiated DistIface object will
305 * call this constructor.
306 */
307 SyncEvent() : GlobalSyncEvent(Sim_Exit_Pri, 0), _draining(false) {}
308
309 ~SyncEvent() {}
310 /**
311 * Schedule the first periodic sync event.
312 */
313 void start();
314 /**
315 * This is a global event so process() will only be called by
316 * exactly one simulation thread. (See further comments in the .cc
317 * file.)
318 */
319 void process() override;
320
321 bool draining() const { return _draining; }
322 void draining(bool fl) { _draining = fl; }
323 };
324 /**
325 * Class to encapsulate information about data packets received.
326
327 * @note The main purpose of the class to take care of scheduling receive
328 * done events for the simulated network link and store incoming packets
329 * until they can be received by the simulated network link.
330 */
331 class RecvScheduler : public Serializable
332 {
333 private:
334 /**
335 * Received packet descriptor. This information is used by the receive
336 * thread to schedule receive events and by the simulation thread to
337 * process those events.
338 */
339 struct Desc : public Serializable
340 {
341 EthPacketPtr packet;
342 Tick sendTick;
343 Tick sendDelay;
344
345 Desc() : sendTick(0), sendDelay(0) {}
346 Desc(EthPacketPtr p, Tick s, Tick d) :
347 packet(p), sendTick(s), sendDelay(d) {}
348 Desc(const Desc &d) :
349 packet(d.packet), sendTick(d.sendTick), sendDelay(d.sendDelay) {}
350
351 void serialize(CheckpointOut &cp) const override;
352 void unserialize(CheckpointIn &cp) override;
353 };
354 /**
355 * The queue to store the receive descriptors.
356 */
357 std::queue<Desc> descQueue;
358 /**
359 * The tick when the most recent receive event was processed.
360 *
361 * @note This information is necessary to simulate possible receiver
362 * link contention when calculating the receive tick for the next
363 * incoming data packet (see the calcReceiveTick() method)
364 */
365 Tick prevRecvTick;
366 /**
367 * The receive done event for the simulated Ethernet link.
368 *
369 * @note This object is constructed by the simulated network link. We
370 * schedule this object for each incoming data packet.
371 */
372 Event *recvDone;
373 /**
374 * The link delay in ticks for the simulated Ethernet link.
375 *
376 * @note This value is used for calculating the receive ticks for
377 * incoming data packets.
378 */
379 Tick linkDelay;
380 /**
381 * The event manager associated with the simulated Ethernet link.
382 *
383 * @note It is used to access the event queue for scheduling receive
384 * done events for the link.
385 */
386 EventManager *eventManager;
387 /**
388 * Calculate the tick to schedule the next receive done event.
389 *
390 * @param send_tick The tick the packet was sent.
391 * @param send_delay The simulated delay at the sender side.
392 * @param prev_recv_tick Tick when the last receive event was
393 * processed.
394 *
395 * @note This method tries to take into account possible receiver link
396 * contention and adjust receive tick for the incoming packets
397 * accordingly.
398 */
399 Tick calcReceiveTick(Tick send_tick,
400 Tick send_delay,
401 Tick prev_recv_tick);
402
403 /**
404 * Flag to set if receive ticks for pending packets need to be
405 * recalculated due to changed link latencies at a resume
406 */
407 bool ckptRestore;
408
409 public:
410 /**
411 * Scheduler for the incoming data packets.
412 *
413 * @param em The event manager associated with the simulated Ethernet
414 * link.
415 */
416 RecvScheduler(EventManager *em) :
417 prevRecvTick(0), recvDone(nullptr), linkDelay(0),
418 eventManager(em), ckptRestore(false) {}
419
420 /**
421 * Initialize network link parameters.
422 *
423 * @note This method is called from the receiver thread (see
424 * recvThreadFunc()).
425 */
426 void init(Event *recv_done, Tick link_delay);
427 /**
428 * Fetch the next packet that is to be received by the simulated network
429 * link.
430 *
431 * @note This method is called from the process() method of the receive
432 * done event associated with the network link.
433 */
434 EthPacketPtr popPacket();
435 /**
436 * Push a newly arrived packet into the desc queue.
437 */
438 void pushPacket(EthPacketPtr new_packet,
439 Tick send_tick,
440 Tick send_delay);
441
442 void serialize(CheckpointOut &cp) const override;
443 void unserialize(CheckpointIn &cp) override;
444 /**
445 * Adjust receive ticks for pending packets when restoring from a
446 * checkpoint
447 *
448 * @note Link speed and delay parameters may change at resume.
449 */
450 void resumeRecvTicks();
451 };
452 /**
453 * Tick to schedule the first dist sync event.
454 * This is just as optimization : we do not need any dist sync
455 * event until the simulated NIC is brought up by the OS.
456 */
457 Tick syncStart;
458 /**
459 * Frequency of dist sync events in ticks.
460 */
461 Tick syncRepeat;
462 /**
463 * Receiver thread pointer.
464 * Each DistIface object must have exactly one receiver thread.
465 */
466 std::thread *recvThread;
467 /**
468 * Meta information about data packets received.
469 */
470 RecvScheduler recvScheduler;
471 /**
472 * Use pseudoOp to start synchronization.
473 */
474 bool syncStartOnPseudoOp;
475
476 protected:
477 /**
478 * The rank of this process among the gem5 peers.
479 */
480 unsigned rank;
481 /**
482 * The number of gem5 processes comprising this dist simulation.
483 */
484 unsigned size;
485 /**
486 * Number of DistIface objects (i.e. dist links in this gem5 process)
487 */
488 static unsigned distIfaceNum;
489 /**
490 * Unique id for the dist link
491 */
492 unsigned distIfaceId;
493
494 bool isPrimary;
495
496 private:
497 /**
498 * Number of receiver threads (in this gem5 process)
499 */
500 static unsigned recvThreadsNum;
501 /**
502 * The singleton Sync object to perform dist synchronisation.
503 */
504 static Sync *sync;
505 /**
506 * The singleton SyncEvent object to schedule periodic dist sync.
507 */
508 static SyncEvent *syncEvent;
509 /**
510 * The very first DistIface object created becomes the primary interface.
511 * We need a primary interface to co-ordinate the global synchronisation.
512 */
513 static DistIface *primary;
514 /**
515 * System pointer used to wakeup sleeping threads when stopping sync.
516 */
517 static System *sys;
518 /**
519 * Is this node a switch?
520 */
521 static bool isSwitch;
522
523 private:
524 /**
525 * Send out a data packet to the remote end.
526 * @param header Meta info about the packet (which needs to be transferred
527 * to the destination alongside the packet).
528 * @param packet Pointer to the packet to send.
529 */
530 virtual void sendPacket(const Header &header, const EthPacketPtr &packet) = 0;
531 /**
532 * Send out a control command to the remote end.
533 * @param header Meta info describing the command (e.g. sync request)
534 */
535 virtual void sendCmd(const Header &header) = 0;
536 /**
537 * Receive a header (i.e. meta info describing a data packet or a control command)
538 * from the remote end.
539 * @param header The meta info structure to store the incoming header.
540 */
541 virtual bool recvHeader(Header &header) = 0;
542 /**
543 * Receive a packet from the remote end.
544 * @param header Meta info about the incoming packet (obtanied by a previous
545 * call to the recvHedaer() method).
546 * @param Pointer to packet received.
547 */
548 virtual void recvPacket(const Header &header, EthPacketPtr &packet) = 0;
549 /**
550 * Init hook for the underlaying transport
551 */
552 virtual void initTransport() = 0;
553 /**
554 * spawn the receiver thread.
555 * @param recv_done The receive done event associated with the simulated
556 * Ethernet link.
557 * @param link_delay The link delay for the simulated Ethernet link.
558 */
559 void spawnRecvThread(const Event *recv_done, Tick link_delay);
560 /**
561 * The function executed by a receiver thread.
562 */
563 void recvThreadFunc(Event *recv_done, Tick link_delay);
564
565 public:
566
567 /**
568 * ctor
569 * @param dist_rank Rank of this gem5 process within the dist run
570 * @param sync_start Start tick for dist synchronisation
571 * @param sync_repeat Frequency for dist synchronisation
572 * @param em The event manager associated with the simulated Ethernet link
573 */
574 DistIface(unsigned dist_rank,
575 unsigned dist_size,
576 Tick sync_start,
577 Tick sync_repeat,
578 EventManager *em,
579 bool use_pseudo_op,
580 bool is_switch,
581 int num_nodes);
582
583 virtual ~DistIface();
584 /**
585 * Send out an Ethernet packet.
586 * @param pkt The Ethernet packet to send.
587 * @param send_delay The delay in ticks for the send completion event.
588 */
589 void packetOut(EthPacketPtr pkt, Tick send_delay);
590 /**
591 * Fetch the packet scheduled to be received next by the simulated
592 * network link.
593 *
594 * @note This method is called within the process() method of the link
595 * receive done event. It also schedules the next receive event if the
596 * receive queue is not empty.
597 */
598 EthPacketPtr packetIn() { return recvScheduler.popPacket(); }
599
600 DrainState drain() override;
601 void drainResume() override;
602 void init(const Event *e, Tick link_delay);
603 void startup();
604
605 void serialize(CheckpointOut &cp) const override;
606 void unserialize(CheckpointIn &cp) override;
607 /**
608 * Initiate the exit from the simulation.
609 * @param delay Delay param from the m5 exit command. If Delay is zero
610 * then a collaborative exit is requested (i.e. all nodes have to call
611 * this method before the distributed simulation can exit). If Delay is
612 * not zero then exit is requested asap (and it will happen at the next
613 * sync tick).
614 * @return False if we are in distributed mode (i.e. exit can happen only
615 * at sync), True otherwise.
616 */
617 static bool readyToExit(Tick delay);
618 /**
619 * Initiate taking a checkpoint
620 * @param delay Delay param from the m5 checkpoint command. If Delay is
621 * zero then a collaborative checkpoint is requested (i.e. all nodes have
622 * to call this method before the checkpoint can be taken). If Delay is
623 * not zero then a checkpoint is requested asap (and it will happen at the
624 * next sync tick).
625 * @return False if we are in dist mode (i.e. exit can happen only at
626 * sync), True otherwise.
627 */
628 static bool readyToCkpt(Tick delay, Tick period);
629 /**
630 * Getter for the dist rank param.
631 */
632 static uint64_t rankParam();
633 /**
634 * Getter for the dist size param.
635 */
636 static uint64_t sizeParam();
637 /**
638 * Trigger the primary to start/stop synchronization.
639 */
640 static void toggleSync(ThreadContext *tc);
641 };
642
643 #endif