From: Steve Reinhardt ext:(%2C%20Nilay%20Vaish%20%3Cnilay%40cs.wisc.edu%3E%2C%20Ali%20Saidi%20%3CAli.Saidi%40ARM.com%3E) Date: Mon, 25 Nov 2013 17:21:00 +0000 (-0600) Subject: sim: simulate with multiple threads and event queues X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=de366a16f11b7e27a5b5e064a2a773052568428e;p=gem5.git sim: simulate with multiple threads and event queues This patch adds support for simulating with multiple threads, each of which operates on an event queue. Each sim object specifies which eventq is would like to be on. A custom barrier implementation is being added using which eventqs synchronize. The patch was tested in two different configurations: 1. ruby_network_test.py: in this simulation L1 cache controllers receive requests from the cpu. The requests are replied to immediately without any communication taking place with any other level. 2. twosys-tsunami-simple-atomic: this configuration simulates a client-server system which are connected by an ethernet link. We still lack the ability to communicate using message buffers or ports. But other things like simulation start and end, synchronizing after every quantum are working. Committed by: Nilay Vaish --- diff --git a/src/base/barrier.hh b/src/base/barrier.hh new file mode 100644 index 000000000..855731d3b --- /dev/null +++ b/src/base/barrier.hh @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2013 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Ali Saidi + */ + +#ifndef __BASE_BARRIER_HH__ +#define __BASE_BARRIER_HH__ + +#include + +class Barrier +{ + private: + /// Mutex to protect access to numLeft and generation + std::mutex bMutex; + /// Condition variable for waiting on barrier + std::condition_variable bCond; + /// Number of threads we should be waiting for before completing the barrier + unsigned numWaiting; + /// Generation of this barrier + unsigned generation; + /// Number of threads remaining for the current generation + unsigned numLeft; + + public: + Barrier(unsigned _numWaiting) + : numWaiting(_numWaiting), generation(0), numLeft(_numWaiting) + {} + + bool + wait() + { + std::unique_lock lock(bMutex); + unsigned int gen = generation; + + if (--numLeft == 0) { + generation++; + numLeft = numWaiting; + bCond.notify_all(); + return true; + } + while (gen == generation) + bCond.wait(lock); + return false; + } +}; + +#endif // __BASE_BARRIER_HH__ diff --git a/src/cpu/base.cc b/src/cpu/base.cc index 25fe9bf97..3078472fd 100644 --- a/src/cpu/base.cc +++ b/src/cpu/base.cc @@ -13,6 +13,8 @@ * * Copyright (c) 2002-2005 The Regents of The University of Michigan * Copyright (c) 2011 Regents of the University of California + * Copyright (c) 2013 Advanced Micro Devices, Inc. + * Copyright (c) 2013 Mark D. Hill and David A. Wood * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -569,7 +571,7 @@ void BaseCPU::scheduleInstStop(ThreadID tid, Counter insts, const char *cause) { const Tick now(comInstEventQueue[tid]->getCurTick()); - Event *event(new SimLoopExitEvent(cause, 0)); + Event *event(new LocalSimLoopExitEvent(cause, 0)); comInstEventQueue[tid]->schedule(event, now + insts); } @@ -578,7 +580,7 @@ void BaseCPU::scheduleLoadStop(ThreadID tid, Counter loads, const char *cause) { const Tick now(comLoadEventQueue[tid]->getCurTick()); - Event *event(new SimLoopExitEvent(cause, 0)); + Event *event(new LocalSimLoopExitEvent(cause, 0)); comLoadEventQueue[tid]->schedule(event, now + loads); } diff --git a/src/cpu/kvm/base.cc b/src/cpu/kvm/base.cc index d03657a88..589964a32 100644 --- a/src/cpu/kvm/base.cc +++ b/src/cpu/kvm/base.cc @@ -506,7 +506,8 @@ BaseKvmCPU::tick() case RunningServiceCompletion: case Running: { - Tick ticksToExecute(mainEventQueue.nextTick() - curTick()); + EventQueue *q = curEventQueue(); + Tick ticksToExecute(q->nextTick() - curTick()); // We might need to update the KVM state. syncKvmState(); diff --git a/src/dev/etherlink.cc b/src/dev/etherlink.cc index 5ebc4fba5..0117bb7c2 100644 --- a/src/dev/etherlink.cc +++ b/src/dev/etherlink.cc @@ -142,7 +142,9 @@ class LinkDelayEvent : public Event void process(); virtual void serialize(ostream &os); - virtual void unserialize(Checkpoint *cp, const string §ion); + void unserialize(Checkpoint *cp, const string §ion) {} + void unserialize(Checkpoint *cp, const string §ion, + EventQueue *eventq); static Serializable *createForUnserialize(Checkpoint *cp, const string §ion); }; @@ -259,9 +261,10 @@ LinkDelayEvent::serialize(ostream &os) void -LinkDelayEvent::unserialize(Checkpoint *cp, const string §ion) +LinkDelayEvent::unserialize(Checkpoint *cp, const string §ion, + EventQueue *eventq) { - Event::unserialize(cp, section); + Event::unserialize(cp, section, eventq); EtherLink *parent; bool number; diff --git a/src/dev/x86/Pc.py b/src/dev/x86/Pc.py index 3fc2382b7..bd8b2ad98 100644 --- a/src/dev/x86/Pc.py +++ b/src/dev/x86/Pc.py @@ -57,10 +57,9 @@ class Pc(Platform): behind_pci = IsaFake(pio_addr=x86IOAddress(0xcf8), pio_size=8) # Serial port and terminal - terminal = Terminal() com_1 = Uart8250() com_1.pio_addr = x86IOAddress(0x3f8) - com_1.terminal = terminal + com_1.terminal = Terminal() # Devices to catch access to non-existant serial ports. fake_com_2 = IsaFake(pio_addr=x86IOAddress(0x2f8), pio_size=8) diff --git a/src/python/m5/SimObject.py b/src/python/m5/SimObject.py index 14499759c..9b60dfef6 100644 --- a/src/python/m5/SimObject.py +++ b/src/python/m5/SimObject.py @@ -11,7 +11,8 @@ # modified or unmodified, in source code or in binary form. # # Copyright (c) 2004-2006 The Regents of The University of Michigan -# Copyright (c) 2010 Advanced Micro Devices, Inc. +# Copyright (c) 2010-20013 Advanced Micro Devices, Inc. +# Copyright (c) 2013 Mark D. Hill and David A. Wood # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -528,8 +529,6 @@ struct PyObject; #endif #include - -class EventQueue; ''') for param in params: param.cxx_predecls(code) @@ -558,16 +557,11 @@ class EventQueue; code.indent() if cls == SimObject: code(''' - SimObjectParams() - { - extern EventQueue mainEventQueue; - eventq = &mainEventQueue; - } + SimObjectParams() {} virtual ~SimObjectParams() {} std::string name; PyObject *pyobj; - EventQueue *eventq; ''') for param in params: param.cxx_decl(code) @@ -582,6 +576,14 @@ class EventQueue; return code +# This *temporary* definition is required to support calls from the +# SimObject class definition to the MetaSimObject methods (in +# particular _set_param, which gets called for parameters with default +# values defined on the SimObject class itself). It will get +# overridden by the permanent definition (which requires that +# SimObject be defined) lower in this file. +def isSimObjectOrVector(value): + return False # The SimObject class is the root of the special hierarchy. Most of # the code in this class deals with the configuration hierarchy itself @@ -592,9 +594,10 @@ class SimObject(object): __metaclass__ = MetaSimObject type = 'SimObject' abstract = True - cxx_header = "sim/sim_object.hh" + cxx_header = "sim/sim_object.hh" cxx_bases = [ "Drainable", "Serializable" ] + eventq_index = Param.UInt32(Parent.eventq_index, "Event Queue Index") @classmethod def export_method_swig_predecls(cls, code): diff --git a/src/python/m5/event.py b/src/python/m5/event.py index 35095599d..76fc37042 100644 --- a/src/python/m5/event.py +++ b/src/python/m5/event.py @@ -1,4 +1,6 @@ # Copyright (c) 2006 The Regents of The University of Michigan +# Copyright (c) 2013 Advanced Micro Devices, Inc. +# Copyright (c) 2013 Mark D. Hill and David A. Wood # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -29,9 +31,9 @@ import m5 import internal.event -from internal.event import PythonEvent, SimLoopExitEvent as SimExit +from internal.event import PythonEvent, GlobalSimLoopExitEvent as SimExit -mainq = internal.event.cvar.mainEventQueue +mainq = None def create(obj, priority=None): if priority is None: @@ -58,4 +60,10 @@ class ProgressEvent(Event): print "Progress! Time now %fs" % (m5.curTick()/1e12) self.eventq.schedule(self, m5.curTick() + self.period) +def getEventQueue(index): + return internal.event.getEventQueue(index) + +def setEventQueue(eventq): + internal.event.curEventQueue(eventq) + __all__ = [ 'create', 'Event', 'ProgressEvent', 'SimExit', 'mainq' ] diff --git a/src/python/m5/main.py b/src/python/m5/main.py index 611bc99bb..6a6dfa772 100644 --- a/src/python/m5/main.py +++ b/src/python/m5/main.py @@ -190,6 +190,10 @@ def main(*args): fatal("Tracing is not enabled. Compile with TRACING_ON") + # Set the main event queue for the main thread. + event.mainq = event.getEventQueue(0) + event.setEventQueue(event.mainq) + if not os.path.isdir(options.outdir): os.makedirs(options.outdir) diff --git a/src/python/m5/simulate.py b/src/python/m5/simulate.py index 322257139..cbd0fb0d4 100644 --- a/src/python/m5/simulate.py +++ b/src/python/m5/simulate.py @@ -147,6 +147,13 @@ def simulate(*args, **kwargs): for obj in root.descendants(): obj.startup() need_startup = False + # Python exit handlers happen in reverse order. + # We want to dump stats last. + atexit.register(stats.dump) + + # register our C++ exit callback function with Python + atexit.register(internal.core.doExitCleanup) + for root in need_resume: resume(root) need_resume = [] @@ -157,12 +164,6 @@ def simulate(*args, **kwargs): def curTick(): return internal.core.curTick() -# Python exit handlers happen in reverse order. We want to dump stats last. -atexit.register(stats.dump) - -# register our C++ exit callback function with Python -atexit.register(internal.core.doExitCleanup) - # Drain the system in preparation of a checkpoint or memory mode # switch. def drain(root): diff --git a/src/python/swig/event.i b/src/python/swig/event.i index 788ea7cfa..23bb31364 100644 --- a/src/python/swig/event.i +++ b/src/python/swig/event.i @@ -1,5 +1,7 @@ /* * Copyright (c) 2006 The Regents of The University of Michigan + * Copyright (c) 2013 Advanced Micro Devices, Inc. + * Copyright (c) 2013 Mark D. Hill and David A. Wood * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -82,12 +84,13 @@ %include "python/swig/pyevent.hh" // minimal definition of SimExitEvent interface to wrap -class SimLoopExitEvent : public Event +class GlobalSimLoopExitEvent { public: std::string getCause(); int getCode(); - SimLoopExitEvent(const std::string &_cause, int c, Tick _repeat = 0); + GlobalSimLoopExitEvent(Tick when, const std::string &_cause, int c, + Tick _repeat = 0); }; %exception simulate { @@ -96,5 +99,8 @@ class SimLoopExitEvent : public Event return NULL; } } -SimLoopExitEvent *simulate(Tick num_cycles = MaxTick); + +GlobalSimLoopExitEvent *simulate(Tick num_cycles = MaxTick); void exitSimLoop(const std::string &message, int exit_code); +void curEventQueue( EventQueue *); +EventQueue *getEventQueue(uint32_t index); diff --git a/src/sim/Root.py b/src/sim/Root.py index 44f768c0b..c6f7cfba6 100644 --- a/src/sim/Root.py +++ b/src/sim/Root.py @@ -1,5 +1,6 @@ # Copyright (c) 2005-2007 The Regents of The University of Michigan -# Copyright (c) 2010 Advanced Micro Devices, Inc. +# Copyright (c) 2010-2013 Advanced Micro Devices, Inc. +# Copyright (c) 2013 Mark D. Hill and David A. Wood # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -60,6 +61,14 @@ class Root(SimObject): type = 'Root' cxx_header = "sim/root.hh" + # By default, root sim object and hence all other sim objects schedule + # event on the eventq with index 0. + eventq_index = 0 + + # Simulation Quantum for multiple main event queue simulation. + # Needs to be set explicitly for a multi-eventq simulation. + sim_quantum = Param.Tick(0, "simulation quantum") + full_system = Param.Bool("if this is a full system simulation") # Time syncing prevents the simulation from running faster than real time. diff --git a/src/sim/SConscript b/src/sim/SConscript index 850af230e..769c8240d 100644 --- a/src/sim/SConscript +++ b/src/sim/SConscript @@ -42,6 +42,7 @@ Source('async.cc') Source('core.cc') Source('debug.cc') Source('eventq.cc') +Source('global_event.cc') Source('init.cc') Source('main.cc', main=True, skip_lib=True) Source('root.cc') diff --git a/src/sim/core.cc b/src/sim/core.cc index aa618bdb3..1333c8b22 100644 --- a/src/sim/core.cc +++ b/src/sim/core.cc @@ -1,5 +1,7 @@ /* * Copyright (c) 2006 The Regents of The University of Michigan + * Copyright (c) 2013 Advanced Micro Devices, Inc. + * Copyright (c) 2013 Mark D. Hill and David A. Wood * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/src/sim/core.hh b/src/sim/core.hh index b5a082bcb..e0a6af91c 100644 --- a/src/sim/core.hh +++ b/src/sim/core.hh @@ -1,5 +1,7 @@ /* * Copyright (c) 2006 The Regents of The University of Michigan + * Copyright (c) 2013 Advanced Micro Devices, Inc. + * Copyright (c) 2013 Mark D. Hill and David A. Wood * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -42,7 +44,7 @@ #include "sim/eventq.hh" /// The universal simulation clock. -inline Tick curTick() { return mainEventQueue.getCurTick(); } +inline Tick curTick() { return _curEventQueue->getCurTick(); } const Tick retryTime = 1000; diff --git a/src/sim/debug.cc b/src/sim/debug.cc index bfb28198d..0dd16a88d 100644 --- a/src/sim/debug.cc +++ b/src/sim/debug.cc @@ -37,6 +37,7 @@ #include "base/debug.hh" #include "sim/debug.hh" #include "sim/eventq_impl.hh" +#include "sim/global_event.hh" #include "sim/sim_events.hh" #include "sim/sim_exit.hh" @@ -46,9 +47,9 @@ using namespace std; // Debug event: place a breakpoint on the process function and // schedule the event to break at a particular cycle // -struct DebugBreakEvent : public Event +struct DebugBreakEvent : public GlobalEvent { - DebugBreakEvent(); + DebugBreakEvent(Tick when); void process(); // process event virtual const char *description() const; }; @@ -56,8 +57,8 @@ struct DebugBreakEvent : public Event // // constructor: schedule at specified time // -DebugBreakEvent::DebugBreakEvent() - : Event(Debug_Break_Pri, AutoDelete) +DebugBreakEvent::DebugBreakEvent(Tick when) + : GlobalEvent(when, Debug_Break_Pri, AutoDelete) { } @@ -84,7 +85,7 @@ DebugBreakEvent::description() const void schedBreak(Tick when) { - mainEventQueue.schedule(new DebugBreakEvent, when); + new DebugBreakEvent(when); warn("need to stop all queues"); } @@ -102,8 +103,9 @@ takeCheckpoint(Tick when) void eventqDump() { - mainEventQueue.dump(); - warn("need to dump all queues"); + for (uint32_t i = 0; i < numMainEventQueues; ++i) { + mainEventQueue[i]->dump(); + } } void diff --git a/src/sim/eventq.cc b/src/sim/eventq.cc index d81937a86..0735a011b 100644 --- a/src/sim/eventq.cc +++ b/src/sim/eventq.cc @@ -1,6 +1,7 @@ /* * Copyright (c) 2000-2005 The Regents of The University of Michigan * Copyright (c) 2008 The Hewlett-Packard Development Company + * Copyright (c) 2013 Advanced Micro Devices, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -46,13 +47,30 @@ using namespace std; +Tick simQuantum = 0; + // -// Main Event Queue +// Main Event Queues // -// Events on this queue are processed at the *beginning* of each +// Events on these queues are processed at the *beginning* of each // cycle, before the pipeline simulation is performed. // -EventQueue mainEventQueue("Main Event Queue"); +uint32_t numMainEventQueues = 0; +vector mainEventQueue; +__thread EventQueue *_curEventQueue = NULL; +bool inParallelMode = false; + +EventQueue * +getEventQueue(uint32_t index) +{ + while (numMainEventQueues <= index) { + numMainEventQueues++; + mainEventQueue.push_back( + new EventQueue(csprintf("MainEventQueue-%d", index))); + } + + return mainEventQueue[index]; +} #ifndef NDEBUG Counter Event::instanceCounter = 0; @@ -156,6 +174,8 @@ EventQueue::remove(Event *event) if (head == NULL) panic("event not found!"); + assert(event->queue == this); + // deal with an event on the head's 'in bin' list (event has the same // time as the head) if (*head == *event) { @@ -231,9 +251,14 @@ Event::serialize(std::ostream &os) void Event::unserialize(Checkpoint *cp, const string §ion) +{ +} + +void +Event::unserialize(Checkpoint *cp, const string §ion, EventQueue *eventq) { if (scheduled()) - mainEventQueue.deschedule(this); + eventq->deschedule(this); UNSERIALIZE_SCALAR(_when); UNSERIALIZE_SCALAR(_priority); @@ -259,7 +284,7 @@ Event::unserialize(Checkpoint *cp, const string §ion) if (wasScheduled) { DPRINTF(Config, "rescheduling at %d\n", _when); - mainEventQueue.schedule(this, _when); + eventq->schedule(this, _when); } } @@ -388,7 +413,9 @@ EventQueue::replaceHead(Event* s) void dumpMainQueue() { - mainEventQueue.dump(); + for (uint32_t i = 0; i < numMainEventQueues; ++i) { + mainEventQueue[i]->dump(); + } } @@ -432,5 +459,29 @@ Event::dump() const } EventQueue::EventQueue(const string &n) - : objName(n), head(NULL), _curTick(0) -{} + : objName(n), head(NULL), _curTick(0), + async_queue_mutex(new std::mutex()) +{ +} + +void +EventQueue::asyncInsert(Event *event) +{ + async_queue_mutex->lock(); + async_queue.push_back(event); + async_queue_mutex->unlock(); +} + +void +EventQueue::handleAsyncInsertions() +{ + assert(this == curEventQueue()); + async_queue_mutex->lock(); + + while (!async_queue.empty()) { + insert(async_queue.front()); + async_queue.pop_front(); + } + + async_queue_mutex->unlock(); +} diff --git a/src/sim/eventq.hh b/src/sim/eventq.hh index 223b4941c..66b324c4f 100644 --- a/src/sim/eventq.hh +++ b/src/sim/eventq.hh @@ -1,5 +1,7 @@ /* * Copyright (c) 2000-2005 The Regents of The University of Michigan + * Copyright (c) 2013 Advanced Micro Devices, Inc. + * Copyright (c) 2013 Mark D. Hill and David A. Wood * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -40,6 +42,7 @@ #include #include #include +#include #include #include "base/flags.hh" @@ -49,20 +52,48 @@ #include "sim/serialize.hh" class EventQueue; // forward declaration +class BaseGlobalEvent; -extern EventQueue mainEventQueue; +//! Simulation Quantum for multiple eventq simulation. +//! The quantum value is the period length after which the queues +//! synchronize themselves with each other. This means that any +//! event to scheduled on Queue A which is generated by an event on +//! Queue B should be at least simQuantum ticks away in future. +extern Tick simQuantum; -/* - * An item on an event queue. The action caused by a given - * event is specified by deriving a subclass and overriding the - * process() member function. - * - * Caution, the order of members is chosen to maximize data packing. +//! Current number of allocated main event queues. +extern uint32_t numMainEventQueues; + +//! Array for main event queues. +extern std::vector mainEventQueue; + +#ifndef SWIG +//! The current event queue for the running thread. Access to this queue +//! does not require any locking from the thread. + +extern __thread EventQueue *_curEventQueue; + +#endif + +//! Current mode of execution: parallel / serial +extern bool inParallelMode; + +//! Function for returning eventq queue for the provided +//! index. The function allocates a new queue in case one +//! does not exist for the index, provided that the index +//! is with in bounds. +EventQueue *getEventQueue(uint32_t index); + +inline EventQueue *curEventQueue() { return _curEventQueue; } +inline void curEventQueue(EventQueue *q) { _curEventQueue = q; } + +/** + * Common base class for Event and GlobalEvent, so they can share flag + * and priority definitions and accessor functions. This class should + * not be used directly. */ -class Event : public Serializable +class EventBase { - friend class EventQueue; - protected: typedef unsigned short FlagsType; typedef ::Flags Flags; @@ -78,15 +109,76 @@ class Event : public Serializable static const FlagsType Initialized = 0x7a40; // somewhat random bits static const FlagsType InitMask = 0xffc0; // mask for init bits - bool - initialized() const - { - return this && (flags & InitMask) == Initialized; - } - public: typedef int8_t Priority; + /// Event priorities, to provide tie-breakers for events scheduled + /// at the same cycle. Most events are scheduled at the default + /// priority; these values are used to control events that need to + /// be ordered within a cycle. + + /// Minimum priority + static const Priority Minimum_Pri = SCHAR_MIN; + + /// If we enable tracing on a particular cycle, do that as the + /// very first thing so we don't miss any of the events on + /// that cycle (even if we enter the debugger). + static const Priority Debug_Enable_Pri = -101; + + /// Breakpoints should happen before anything else (except + /// enabling trace output), so we don't miss any action when + /// debugging. + static const Priority Debug_Break_Pri = -100; + + /// CPU switches schedule the new CPU's tick event for the + /// same cycle (after unscheduling the old CPU's tick event). + /// The switch needs to come before any tick events to make + /// sure we don't tick both CPUs in the same cycle. + static const Priority CPU_Switch_Pri = -31; + + /// For some reason "delayed" inter-cluster writebacks are + /// scheduled before regular writebacks (which have default + /// priority). Steve? + static const Priority Delayed_Writeback_Pri = -1; + + /// Default is zero for historical reasons. + static const Priority Default_Pri = 0; + + /// Serailization needs to occur before tick events also, so + /// that a serialize/unserialize is identical to an on-line + /// CPU switch. + static const Priority Serialize_Pri = 32; + + /// CPU ticks must come after other associated CPU events + /// (such as writebacks). + static const Priority CPU_Tick_Pri = 50; + + /// Statistics events (dump, reset, etc.) come after + /// everything else, but before exit. + static const Priority Stat_Event_Pri = 90; + + /// Progress events come at the end. + static const Priority Progress_Event_Pri = 95; + + /// If we want to exit on this cycle, it's the very last thing + /// we do. + static const Priority Sim_Exit_Pri = 100; + + /// Maximum priority + static const Priority Maximum_Pri = SCHAR_MAX; +}; + +/* + * An item on an event queue. The action caused by a given + * event is specified by deriving a subclass and overriding the + * process() member function. + * + * Caution, the order of members is chosen to maximize data packing. + */ +class Event : public EventBase, public Serializable +{ + friend class EventQueue; + private: // The event queue is now a linked list of linked lists. The // 'nextBin' pointer is to find the bin, where a bin is defined as @@ -139,6 +231,12 @@ class Event : public Serializable #endif } + bool + initialized() const + { + return this && (flags & InitMask) == Initialized; + } + protected: /// Accessor for flags. Flags @@ -179,60 +277,6 @@ class Event : public Serializable virtual void trace(const char *action); //!< trace event activity public: - /// Event priorities, to provide tie-breakers for events scheduled - /// at the same cycle. Most events are scheduled at the default - /// priority; these values are used to control events that need to - /// be ordered within a cycle. - - /// Minimum priority - static const Priority Minimum_Pri = SCHAR_MIN; - - /// If we enable tracing on a particular cycle, do that as the - /// very first thing so we don't miss any of the events on - /// that cycle (even if we enter the debugger). - static const Priority Debug_Enable_Pri = -101; - - /// Breakpoints should happen before anything else (except - /// enabling trace output), so we don't miss any action when - /// debugging. - static const Priority Debug_Break_Pri = -100; - - /// CPU switches schedule the new CPU's tick event for the - /// same cycle (after unscheduling the old CPU's tick event). - /// The switch needs to come before any tick events to make - /// sure we don't tick both CPUs in the same cycle. - static const Priority CPU_Switch_Pri = -31; - - /// For some reason "delayed" inter-cluster writebacks are - /// scheduled before regular writebacks (which have default - /// priority). Steve? - static const Priority Delayed_Writeback_Pri = -1; - - /// Default is zero for historical reasons. - static const Priority Default_Pri = 0; - - /// Serailization needs to occur before tick events also, so - /// that a serialize/unserialize is identical to an on-line - /// CPU switch. - static const Priority Serialize_Pri = 32; - - /// CPU ticks must come after other associated CPU events - /// (such as writebacks). - static const Priority CPU_Tick_Pri = 50; - - /// Statistics events (dump, reset, etc.) come after - /// everything else, but before exit. - static const Priority Stat_Event_Pri = 90; - - /// Progress events come at the end. - static const Priority Progress_Event_Pri = 95; - - /// If we want to exit on this cycle, it's the very last thing - /// we do. - static const Priority Sim_Exit_Pri = 100; - - /// Maximum priority - static const Priority Maximum_Pri = SCHAR_MAX; /* * Event constructor @@ -295,9 +339,21 @@ class Event : public Serializable /// Get the event priority Priority priority() const { return _priority; } + //! If this is part of a GlobalEvent, return the pointer to the + //! Global Event. By default, there is no GlobalEvent, so return + //! NULL. (Overridden in GlobalEvent::BarrierEvent.) + virtual BaseGlobalEvent *globalEvent() { return NULL; } + #ifndef SWIG virtual void serialize(std::ostream &os); virtual void unserialize(Checkpoint *cp, const std::string §ion); + + //! This function is required to support restoring from checkpoints + //! when running with multiple queues. Since we still have not thrashed + //! out all the details on checkpointing, this function is most likely + //! to be revisited in future. + virtual void unserialize(Checkpoint *cp, const std::string §ion, + EventQueue *eventq); #endif }; @@ -352,20 +408,40 @@ class EventQueue : public Serializable Event *head; Tick _curTick; + //! Mutex to protect async queue. + std::mutex *async_queue_mutex; + + //! List of events added by other threads to this event queue. + std::list async_queue; + + //! Insert / remove event from the queue. Should only be called + //! by thread operating this queue. void insert(Event *event); void remove(Event *event); + //! Function for adding events to the async queue. The added events + //! are added to main event queue later. Threads, other than the + //! owning thread, should call this function instead of insert(). + void asyncInsert(Event *event); + EventQueue(const EventQueue &); - const EventQueue &operator=(const EventQueue &); public: EventQueue(const std::string &n); virtual const std::string name() const { return objName; } + void name(const std::string &st) { objName = st; } + + //! Schedule the given event on this queue. Safe to call from any + //! thread. + void schedule(Event *event, Tick when, bool global = false); - // schedule the given event on this queue - void schedule(Event *event, Tick when); + //! Deschedule the specified event. Should be called only from the + //! owning thread. void deschedule(Event *event); + + //! Reschedule the specified event. Should be called only from + //! the owning thread. void reschedule(Event *event, Tick when, bool always = false); Tick nextTick() const { return head->when(); } @@ -402,6 +478,9 @@ class EventQueue : public Serializable bool debugVerify() const; + //! Function for moving events from the async_queue to the main queue. + void handleAsyncInsertions(); + /** * function for replacing the head of the event queue, so that a * different set of events can run without disturbing events that have diff --git a/src/sim/eventq_impl.hh b/src/sim/eventq_impl.hh index c53a4da77..360731d7b 100644 --- a/src/sim/eventq_impl.hh +++ b/src/sim/eventq_impl.hh @@ -1,6 +1,7 @@ /* * Copyright (c) 2012 The Regents of The University of Michigan - * Copyright (c) 2012 Mark D. Hill and David A. Wood + * Copyright (c) 2012-2013 Mark D. Hill and David A. Wood + * Copyright (c) 2013 Advanced Micro Devices, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -38,19 +39,26 @@ #include "sim/eventq.hh" inline void -EventQueue::schedule(Event *event, Tick when) +EventQueue::schedule(Event *event, Tick when, bool global) { assert(when >= getCurTick()); assert(!event->scheduled()); assert(event->initialized()); event->setWhen(when, this); - insert(event); + + // The check below is to make sure of two things + // a. a thread schedules local events on other queues through the asyncq + // b. a thread schedules global events on the asyncq, whether or not + // this event belongs to this eventq. This is required to maintain + // a total order amongst the global events. See global_event.{cc,hh} + // for more explanation. + if (inParallelMode && (this != curEventQueue() || global)) { + asyncInsert(event); + } else { + insert(event); + } event->flags.set(Event::Scheduled); - if (this == &mainEventQueue) - event->flags.set(Event::IsMainQueue); - else - event->flags.clear(Event::IsMainQueue); if (DTRACE(Event)) event->trace("scheduled"); @@ -61,6 +69,7 @@ EventQueue::deschedule(Event *event) { assert(event->scheduled()); assert(event->initialized()); + assert(!inParallelMode || this == curEventQueue()); remove(event); @@ -80,6 +89,7 @@ EventQueue::reschedule(Event *event, Tick when, bool always) assert(when >= getCurTick()); assert(always || event->scheduled()); assert(event->initialized()); + assert(!inParallelMode || this == curEventQueue()); if (event->scheduled()) remove(event); @@ -88,10 +98,6 @@ EventQueue::reschedule(Event *event, Tick when, bool always) insert(event); event->flags.clear(Event::Squashed); event->flags.set(Event::Scheduled); - if (this == &mainEventQueue) - event->flags.set(Event::IsMainQueue); - else - event->flags.clear(Event::IsMainQueue); if (DTRACE(Event)) event->trace("rescheduled"); diff --git a/src/sim/global_event.cc b/src/sim/global_event.cc new file mode 100644 index 000000000..fedee351f --- /dev/null +++ b/src/sim/global_event.cc @@ -0,0 +1,166 @@ +/* + * Copyright (c) 2011-2013 Advanced Micro Devices, Inc. + * Copyright (c) 2013 Mark D. Hill and David A. Wood + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Steve Reinhardt + */ + +#include "sim/global_event.hh" + +std::mutex BaseGlobalEvent::globalQMutex; + +BaseGlobalEvent::BaseGlobalEvent(Priority p, Flags f) +{ + barrierEvent.resize(numMainEventQueues); + barrier = new Barrier(numMainEventQueues); +} + + +BaseGlobalEvent::~BaseGlobalEvent() +{ + // see GlobalEvent::BarrierEvent::~BarrierEvent() comments + if (barrierEvent[0] != NULL) { + for (int i = 0; i < numMainEventQueues; ++i) + delete barrierEvent[i]; + } +} + + +void BaseGlobalEvent::schedule(Tick when) +{ + // This function is scheduling a global event, which actually is a + // set of local events, one event on each eventq. Global events need + // to have a total order. A thread cannot start executing events that + // follow a global event till all other threads have executed that global + // event as well. If global events were not in a total order, a deadlock + // would occur for there will be two threads who would be waiting for + // each other to execute the global events they themselves have executed. + // + // To ensure this total order, we do two things. + // First, before scheduling any global event, a thread needs to acquire + // the lock globalQMutex. This ensures that only one thread can schedule + // global events at any given time. + // Second, the local events corresponding to a global event are always + // first inserted in to the asyncq, irrespective of whether or not the + // thread scheduling the event owns the eventq on which the event is + // being scheduled. Thus global events have the same order in the asyncq + // of each thread. When they are inserted in the actual eventq, the + // comparators in the Event class ensure that the total order is + // maintained. + + globalQMutex.lock(); + + for (int i = 0; i < numMainEventQueues; ++i) { + mainEventQueue[i]->schedule(barrierEvent[i], when, true); + } + + globalQMutex.unlock(); +} + +void BaseGlobalEvent::deschedule() +{ + EventQueue *q = curEventQueue(); + for (uint32_t i = 0; i < numMainEventQueues; ++i) { + if (barrierEvent[i]->scheduled()) { + curEventQueue(mainEventQueue[i]); + mainEventQueue[i]->deschedule(barrierEvent[i]); + } + } + + curEventQueue(q); +} + +void BaseGlobalEvent::reschedule(Tick when) +{ + // Read the comment in the schedule() function above. + globalQMutex.lock(); + + for (uint32_t i = 0; i < numMainEventQueues; ++i) { + if (barrierEvent[i]->scheduled()) + mainEventQueue[i]->reschedule(barrierEvent[i], when); + else + mainEventQueue[i]->schedule(barrierEvent[i], when, true); + } + + globalQMutex.unlock(); +} + +BaseGlobalEvent::BarrierEvent::~BarrierEvent() +{ + // if AutoDelete is set, local events will get deleted in event + // loop, but we need to delete GlobalEvent object too... so let + // the local event in slot 0 do it + if (isFlagSet(AutoDelete) && _globalEvent->barrierEvent[0] == this) { + // set backpointer to NULL so that global event knows not to + // turn around and recursively delete local events + _globalEvent->barrierEvent[0] = NULL; + delete _globalEvent; + } +} + + +void +GlobalEvent::BarrierEvent::process() +{ + // wait for all queues to arrive at barrier, then process event + if (globalBarrier()) { + _globalEvent->process(); + } + + // second barrier to force all queues to wait for event processing + // to finish before continuing + globalBarrier(); +} + + +void +GlobalSyncEvent::BarrierEvent::process() +{ + // wait for all queues to arrive at barrier, then process event + if (globalBarrier()) { + _globalEvent->process(); + } + + // second barrier to force all queues to wait for event processing + // to finish before continuing + globalBarrier(); + curEventQueue()->handleAsyncInsertions(); +} + +void +GlobalSyncEvent::process() +{ + if (repeat) { + schedule(curTick() + repeat); + } +} + +const char * +GlobalSyncEvent::description() const +{ + return "GlobalSyncEvent"; +} diff --git a/src/sim/global_event.hh b/src/sim/global_event.hh new file mode 100644 index 000000000..7b5fd7485 --- /dev/null +++ b/src/sim/global_event.hh @@ -0,0 +1,230 @@ +/* + * Copyright (c) 2011-2013 Advanced Micro Devices, Inc. + * Copyright (c) 2013 Mark D. Hill and David A. Wood + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Steve Reinhardt + */ + +#ifndef __SIM_GLOBAL_EVENT_HH__ +#define __SIM_GLOBAL_EVENT_HH__ + +#include +#include + +#include "base/barrier.hh" +#include "sim/eventq_impl.hh" + +/** + * @file sim/global_event.hh + * Global events and related declarations. + * + * A global event is an event that occurs across all threads, i.e., + * globally. It consists of a set of "local" (regular) Events, one + * per thread/event queue, a barrier object, and common state. The + * local events are scheduled for the same tick. The local event + * process() method enters the barrier to wait for other threads; once + * all threads reach that tick (and enter the associated barrier), the + * global event is triggered and its associated activity is performed. + * + * There are two basic global event patterns, GlobalEvent and + * GlobalSyncEvent. GlobalEvent is the base class for typical global + * events, while GlobalSyncEvent is optimized for global + * synchronization operations. + */ + +/** + * Common base class for GlobalEvent and GlobalSyncEvent. + */ +class BaseGlobalEvent : public EventBase +{ + private: + //! Mutex variable for providing exculsive right to schedule global + //! events. This is necessary so that a total order can be maintained + //! amongst the global events. Without ensuring the total order, it is + //! possible that threads execute global events in different orders, + //! which can result in a deadlock. + static std::mutex globalQMutex; + + protected: + + /// The base class for the local events that will synchronize + /// threads to perform the global event. This class is abstract, + /// since it derives from the abstract Event class but still does + /// not define the required process() method. + class BarrierEvent : public Event + { + protected: + BaseGlobalEvent *_globalEvent; + + BarrierEvent(BaseGlobalEvent *global_event, Priority p, Flags f) + : Event(p, f), _globalEvent(global_event) + { + } + + ~BarrierEvent(); + + friend class BaseGlobalEvent; + + bool globalBarrier() + { + return _globalEvent->barrier->wait(); + } + + public: + virtual BaseGlobalEvent *globalEvent() { return _globalEvent; } + }; + + //! The barrier that all threads wait on before performing the + //! global event. + Barrier *barrier; + + //! The individual local event instances (one per thread/event queue). + std::vector barrierEvent; + + public: + BaseGlobalEvent(Priority p, Flags f); + + virtual ~BaseGlobalEvent(); + + virtual void process() = 0; + + virtual const char *description() const = 0; + + void schedule(Tick when); + + bool scheduled() const + { + bool sched = false; + for (uint32_t i = 0; i < numMainEventQueues; ++i) { + sched = sched || barrierEvent[i]->scheduled(); + } + + return sched; + } + + Tick when() const + { + assert(numMainEventQueues > 0); + return barrierEvent[0]->when(); + } + + void deschedule(); + void reschedule(Tick when); +}; + + +/** + * Funky intermediate class to support CRTP so that we can have a + * common constructor to create the local events, even though the + * types of the local events are defined in the derived classes. + */ +template +class BaseGlobalEventTemplate : public BaseGlobalEvent +{ + protected: + BaseGlobalEventTemplate(Priority p, Flags f) + : BaseGlobalEvent(p, f) + { + for (int i = 0; i < numMainEventQueues; ++i) + barrierEvent[i] = new typename Derived::BarrierEvent(this, p, f); + } +}; + + +/** + * The main global event class. Ordinary global events should derive + * from this class, and define process() to specify the action to be + * taken when the event is reached. All threads will synchronize at a + * barrier, exactly one of the threads will execute the process() + * method, then the threads will synchronize again so that none of + * them continue until process() is complete. + */ +class GlobalEvent : public BaseGlobalEventTemplate +{ + public: + typedef BaseGlobalEventTemplate Base; + + class BarrierEvent : public Base::BarrierEvent + { + public: + void process(); + BarrierEvent(Base *global_event, Priority p, Flags f) + : Base::BarrierEvent(global_event, p, f) + { } + }; + + GlobalEvent(Priority p, Flags f) + : Base(p, f) + { } + + GlobalEvent(Tick when, Priority p, Flags f) + : Base(p, f) + { + schedule(when); + } + + virtual void process() = 0; +}; + +/** + * A special global event that synchronizes all threads and forces + * them to process asynchronously enqueued events. Useful for + * separating quanta in a quantum-based parallel simulation. + */ +class GlobalSyncEvent : public BaseGlobalEventTemplate +{ + public: + typedef BaseGlobalEventTemplate Base; + + class BarrierEvent : public Base::BarrierEvent + { + public: + void process(); + BarrierEvent(Base *global_event, Priority p, Flags f) + : Base::BarrierEvent(global_event, p, f) + { } + }; + + GlobalSyncEvent(Priority p, Flags f) + : Base(p, f) + { } + + GlobalSyncEvent(Tick when, Tick _repeat, Priority p, Flags f) + : Base(p, f), repeat(_repeat) + { + schedule(when); + } + + void process(); + + const char *description() const; + + Tick repeat; +}; + + +#endif // __SIM_GLOBAL_EVENT_HH__ diff --git a/src/sim/root.cc b/src/sim/root.cc index f77159486..9bb8b4f05 100644 --- a/src/sim/root.cc +++ b/src/sim/root.cc @@ -112,6 +112,8 @@ Root::Root(RootParams *p) : SimObject(p), _enabled(false), assert(_root == NULL); _root = this; lastTime.setTimer(); + + simQuantum = p->sim_quantum; } void diff --git a/src/sim/serialize.cc b/src/sim/serialize.cc index 9ef92d449..18af044d0 100644 --- a/src/sim/serialize.cc +++ b/src/sim/serialize.cc @@ -1,5 +1,7 @@ /* * Copyright (c) 2002-2005 The Regents of The University of Michigan + * Copyright (c) 2013 Advanced Micro Devices, Inc. + * Copyright (c) 2013 Mark D. Hill and David A. Wood * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -456,8 +458,12 @@ Globals::serialize(ostream &os) nameOut(os); paramOut(os, "curTick", curTick()); - nameOut(os, "MainEventQueue"); - mainEventQueue.serialize(os); + paramOut(os, "numMainEventQueues", numMainEventQueues); + + for (uint32_t i = 0; i < numMainEventQueues; ++i) { + nameOut(os, "MainEventQueue"); + mainEventQueue[i]->serialize(os); + } } void @@ -465,9 +471,12 @@ Globals::unserialize(Checkpoint *cp, const std::string §ion) { Tick tick; paramIn(cp, section, "curTick", tick); - mainEventQueue.setCurTick(tick); + paramIn(cp, section, "numMainEventQueues", numMainEventQueues); - mainEventQueue.unserialize(cp, "MainEventQueue"); + for (uint32_t i = 0; i < numMainEventQueues; ++i) { + mainEventQueue[i]->setCurTick(tick); + mainEventQueue[i]->unserialize(cp, "MainEventQueue"); + } } Serializable::Serializable() diff --git a/src/sim/serialize.hh b/src/sim/serialize.hh index 1e9566519..6d4207090 100644 --- a/src/sim/serialize.hh +++ b/src/sim/serialize.hh @@ -49,6 +49,7 @@ class IniFile; class Serializable; class Checkpoint; class SimObject; +class EventQueue; /** The current version of the checkpoint format. * This should be incremented by 1 and only 1 for every new version, where a new diff --git a/src/sim/sim_events.cc b/src/sim/sim_events.cc index 5380ddd83..5e47adca1 100644 --- a/src/sim/sim_events.cc +++ b/src/sim/sim_events.cc @@ -12,6 +12,8 @@ * modified or unmodified, in source code or in binary form. * * Copyright (c) 2002-2005 The Regents of The University of Michigan + * Copyright (c) 2013 Advanced Micro Devices, Inc. + * Copyright (c) 2013 Mark D. Hill and David A. Wood * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -51,53 +53,71 @@ using namespace std; -SimLoopExitEvent::SimLoopExitEvent() +GlobalSimLoopExitEvent::GlobalSimLoopExitEvent(Tick when, + const std::string &_cause, + int c, Tick r, bool serialize) + : GlobalEvent(when, Sim_Exit_Pri, + IsExitEvent | (serialize ? AutoSerialize : 0)), + cause(_cause), code(c), repeat(r) +{ +} + +const char * +GlobalSimLoopExitEvent::description() const +{ + return "global simulation loop exit"; +} + +// +// handle termination event +// +void +GlobalSimLoopExitEvent::process() +{ + if (repeat) { + schedule(curTick() + repeat); + } +} + +void +exitSimLoop(const std::string &message, int exit_code, Tick when, Tick repeat, + bool serialize) +{ + new GlobalSimLoopExitEvent(when + simQuantum, message, exit_code, repeat, + serialize); +} + +LocalSimLoopExitEvent::LocalSimLoopExitEvent() : Event(Sim_Exit_Pri, IsExitEvent | AutoSerialize), cause(""), code(0), repeat(0) { } -SimLoopExitEvent::SimLoopExitEvent(const std::string &_cause, int c, Tick r, - bool serialize) +LocalSimLoopExitEvent::LocalSimLoopExitEvent(const std::string &_cause, int c, + Tick r, bool serialize) : Event(Sim_Exit_Pri, IsExitEvent | (serialize ? AutoSerialize : 0)), cause(_cause), code(c), repeat(r) { } - // // handle termination event // void -SimLoopExitEvent::process() +LocalSimLoopExitEvent::process() { - // if this got scheduled on a different queue (e.g. the committed - // instruction queue) then make a corresponding event on the main - // queue. - if (!isFlagSet(IsMainQueue)) { - exitSimLoop(cause, code); - setFlags(AutoDelete); - } - - // otherwise do nothing... the IsExitEvent flag takes care of - // exiting the simulation loop and returning this object to Python - - // but if you are doing this on intervals, don't forget to make another - if (repeat) { - assert(isFlagSet(IsMainQueue)); - mainEventQueue.schedule(this, curTick() + repeat); - } + exitSimLoop(cause, 0); } const char * -SimLoopExitEvent::description() const +LocalSimLoopExitEvent::description() const { return "simulation loop exit"; } void -SimLoopExitEvent::serialize(ostream &os) +LocalSimLoopExitEvent::serialize(ostream &os) { paramOut(os, "type", string("SimLoopExitEvent")); Event::serialize(os); @@ -108,7 +128,7 @@ SimLoopExitEvent::serialize(ostream &os) } void -SimLoopExitEvent::unserialize(Checkpoint *cp, const string §ion) +LocalSimLoopExitEvent::unserialize(Checkpoint *cp, const string §ion) { Event::unserialize(cp, section); @@ -117,22 +137,26 @@ SimLoopExitEvent::unserialize(Checkpoint *cp, const string §ion) UNSERIALIZE_SCALAR(repeat); } -Serializable * -SimLoopExitEvent::createForUnserialize(Checkpoint *cp, const string §ion) +void +LocalSimLoopExitEvent::unserialize(Checkpoint *cp, const string §ion, + EventQueue *eventq) { - return new SimLoopExitEvent(); -} + Event::unserialize(cp, section, eventq); -REGISTER_SERIALIZEABLE("SimLoopExitEvent", SimLoopExitEvent) + UNSERIALIZE_SCALAR(cause); + UNSERIALIZE_SCALAR(code); + UNSERIALIZE_SCALAR(repeat); +} -void -exitSimLoop(const std::string &message, int exit_code, Tick when, Tick repeat, - bool serialize) +Serializable * +LocalSimLoopExitEvent::createForUnserialize(Checkpoint *cp, + const string §ion) { - Event *event = new SimLoopExitEvent(message, exit_code, repeat, serialize); - mainEventQueue.schedule(event, when); + return new LocalSimLoopExitEvent(); } +REGISTER_SERIALIZEABLE("LocalSimLoopExitEvent", LocalSimLoopExitEvent) + // // constructor: automatically schedules at specified time // diff --git a/src/sim/sim_events.hh b/src/sim/sim_events.hh index 4abfb317c..5be2609fd 100644 --- a/src/sim/sim_events.hh +++ b/src/sim/sim_events.hh @@ -12,6 +12,8 @@ * modified or unmodified, in source code or in binary form. * * Copyright (c) 2002-2005 The Regents of The University of Michigan + * Copyright (c) 2013 Advanced Micro Devices, Inc. + * Copyright (c) 2013 Mark D. Hill and David A. Wood * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -43,13 +45,13 @@ #ifndef __SIM_SIM_EVENTS_HH__ #define __SIM_SIM_EVENTS_HH__ -#include "sim/eventq.hh" +#include "sim/global_event.hh" #include "sim/serialize.hh" // // Event to terminate simulation at a particular cycle/instruction // -class SimLoopExitEvent : public Event +class GlobalSimLoopExitEvent : public GlobalEvent { protected: // string explaining why we're terminating @@ -59,12 +61,33 @@ class SimLoopExitEvent : public Event public: // non-scheduling version for createForUnserialize() - SimLoopExitEvent(); - SimLoopExitEvent(const std::string &_cause, int c, Tick repeat = 0, - bool serialize = false); + GlobalSimLoopExitEvent(); + GlobalSimLoopExitEvent(Tick when, const std::string &_cause, int c, + Tick repeat = 0, bool serialize = false); - std::string getCause() { return cause; } - int getCode() { return code; } + const std::string getCause() const { return cause; } + const int getCode() const { return code; } + + void process(); // process event + + virtual const char *description() const; +}; + +class LocalSimLoopExitEvent : public Event +{ + protected: + // string explaining why we're terminating + std::string cause; + int code; + Tick repeat; + + public: + LocalSimLoopExitEvent(); + LocalSimLoopExitEvent(const std::string &_cause, int c, Tick repeat = 0, + bool serialize = false); + + const std::string getCause() const { return cause; } + const int getCode() const { return code; } void process(); // process event @@ -72,6 +95,8 @@ class SimLoopExitEvent : public Event virtual void serialize(std::ostream &os); virtual void unserialize(Checkpoint *cp, const std::string §ion); + virtual void unserialize(Checkpoint *cp, const std::string §ion, + EventQueue *eventq); static Serializable *createForUnserialize(Checkpoint *cp, const std::string §ion); }; @@ -89,7 +114,7 @@ class CountedDrainEvent : public Event void setCount(int _count) { count = _count; } - int getCount() { return count; } + const int getCount() const { return count; } }; // diff --git a/src/sim/sim_exit.hh b/src/sim/sim_exit.hh index ef73d822d..218db9aeb 100644 --- a/src/sim/sim_exit.hh +++ b/src/sim/sim_exit.hh @@ -39,8 +39,6 @@ // forward declaration class Callback; -class EventQueue; -class SimLoopExitEvent; /// Register a callback to be called when Python exits. Defined in /// sim/main.cc. diff --git a/src/sim/sim_object.cc b/src/sim/sim_object.cc index 345fb85cb..0edad719e 100644 --- a/src/sim/sim_object.cc +++ b/src/sim/sim_object.cc @@ -60,7 +60,7 @@ SimObject::SimObjectList SimObject::simObjectList; // SimObject constructor: used to maintain static simObjectList // SimObject::SimObject(const Params *p) - : EventManager(p->eventq), _params(p) + : EventManager(getEventQueue(p->eventq_index)), _params(p) { #ifdef DEBUG doDebugBreak = false; diff --git a/src/sim/simulate.cc b/src/sim/simulate.cc index 6962fab9f..78695688a 100644 --- a/src/sim/simulate.cc +++ b/src/sim/simulate.cc @@ -1,5 +1,7 @@ /* * Copyright (c) 2006 The Regents of The University of Michigan + * Copyright (c) 2013 Advanced Micro Devices, Inc. + * Copyright (c) 2013 Mark D. Hill and David A. Wood * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -29,6 +31,9 @@ * Steve Reinhardt */ +#include +#include + #include "base/misc.hh" #include "base/pollevent.hh" #include "base/types.hh" @@ -39,14 +44,60 @@ #include "sim/simulate.hh" #include "sim/stat_control.hh" +//! Mutex for handling async events. +std::mutex asyncEventMutex; + +//! Global barrier for synchronizing threads entering/exiting the +//! simulation loop. +Barrier *threadBarrier; + +//! forward declaration +Event *doSimLoop(EventQueue *); + +/** + * The main function for all subordinate threads (i.e., all threads + * other than the main thread). These threads start by waiting on + * threadBarrier. Once all threads have arrived at threadBarrier, + * they enter the simulation loop concurrently. When they exit the + * loop, they return to waiting on threadBarrier. This process is + * repeated until the simulation terminates. + */ +static void +thread_loop(EventQueue *queue) +{ + while (true) { + threadBarrier->wait(); + doSimLoop(queue); + } +} + /** Simulate for num_cycles additional cycles. If num_cycles is -1 * (the default), do not limit simulation; some other event must * terminate the loop. Exported to Python via SWIG. * @return The SimLoopExitEvent that caused the loop to exit. */ -SimLoopExitEvent * +GlobalSimLoopExitEvent * simulate(Tick num_cycles) { + // The first time simulate() is called from the Python code, we need to + // create a thread for each of event queues referenced by the + // instantiated sim objects. + static bool threads_initialized = false; + static std::vector threads; + + if (!threads_initialized) { + threadBarrier = new Barrier(numMainEventQueues); + + // the main thread (the one we're currently running on) + // handles queue 0, so we only need to allocate new threads + // for queues 1..N-1. We'll call these the "subordinate" threads. + for (uint32_t i = 1; i < numMainEventQueues; i++) { + threads.push_back(new std::thread(thread_loop, mainEventQueue[i])); + } + + threads_initialized = true; + } + inform("Entering event queue @ %d. Starting simulation...\n", curTick()); if (num_cycles < MaxTick - curTick()) @@ -54,38 +105,99 @@ simulate(Tick num_cycles) else // counter would roll over or be set to MaxTick anyhow num_cycles = MaxTick; - Event *limit_event = - new SimLoopExitEvent("simulate() limit reached", 0); - mainEventQueue.schedule(limit_event, num_cycles); + GlobalEvent *limit_event = new GlobalSimLoopExitEvent(num_cycles, + "simulate() limit reached", 0, 0); + + GlobalSyncEvent *quantum_event = NULL; + if (numMainEventQueues > 1) { + if (simQuantum == 0) { + fatal("Quantum for multi-eventq simulation not specified"); + } + + quantum_event = new GlobalSyncEvent(simQuantum, simQuantum, + EventBase::Progress_Event_Pri, 0); + + inParallelMode = true; + } + + // all subordinate (created) threads should be waiting on the + // barrier; the arrival of the main thread here will satisfy the + // barrier, and all threads will enter doSimLoop in parallel + threadBarrier->wait(); + Event *local_event = doSimLoop(mainEventQueue[0]); + assert(local_event != NULL); + + inParallelMode = false; + + // locate the global exit event and return it to Python + BaseGlobalEvent *global_event = local_event->globalEvent(); + assert(global_event != NULL); + + GlobalSimLoopExitEvent *global_exit_event = + dynamic_cast(global_event); + assert(global_exit_event != NULL); + + // if we didn't hit limit_event, delete it. + if (global_exit_event != limit_event) { + assert(limit_event->scheduled()); + limit_event->deschedule(); + delete limit_event; + } + + //! Delete the simulation quantum event. + if (quantum_event != NULL) { + quantum_event->deschedule(); + delete quantum_event; + } + + return global_exit_event; +} + +/** + * Test and clear the global async_event flag, such that each time the + * flag is cleared, only one thread returns true (and thus is assigned + * to handle the corresponding async event(s)). + */ +static bool +testAndClearAsyncEvent() +{ + bool was_set = false; + asyncEventMutex.lock(); + + if (async_event) { + was_set = true; + async_event = false; + } + + asyncEventMutex.unlock(); + return was_set; +} + +/** + * The main per-thread simulation loop. This loop is executed by all + * simulation threads (the main thread and the subordinate threads) in + * parallel. + */ +Event * +doSimLoop(EventQueue *eventq) +{ + // set the per thread current eventq pointer + curEventQueue(eventq); + eventq->handleAsyncInsertions(); while (1) { // there should always be at least one event (the SimLoopExitEvent // we just scheduled) in the queue - assert(!mainEventQueue.empty()); - assert(curTick() <= mainEventQueue.nextTick() && + assert(!eventq->empty()); + assert(curTick() <= eventq->nextTick() && "event scheduled in the past"); - Event *exit_event = mainEventQueue.serviceOne(); + Event *exit_event = eventq->serviceOne(); if (exit_event != NULL) { - // hit some kind of exit event; return to Python - // event must be subclass of SimLoopExitEvent... - SimLoopExitEvent *se_event; - se_event = dynamic_cast(exit_event); - - if (se_event == NULL) - panic("Bogus exit event class!"); - - // if we didn't hit limit_event, delete it - if (se_event != limit_event) { - assert(limit_event->scheduled()); - limit_event->squash(); - hack_once("be nice to actually delete the event here"); - } - - return se_event; + return exit_event; } - if (async_event) { + if (async_event && testAndClearAsyncEvent()) { async_event = false; if (async_statdump || async_statreset) { Stats::schedStatEvent(async_statdump, async_statreset); @@ -113,4 +225,3 @@ simulate(Tick num_cycles) // not reached... only exit is return on SimLoopExitEvent } - diff --git a/src/sim/simulate.hh b/src/sim/simulate.hh index 978082f82..5e51c76b6 100644 --- a/src/sim/simulate.hh +++ b/src/sim/simulate.hh @@ -32,4 +32,4 @@ #include "base/types.hh" #include "sim/sim_events.hh" -SimLoopExitEvent *simulate(Tick num_cycles = MaxTick); +GlobalSimLoopExitEvent *simulate(Tick num_cycles = MaxTick); diff --git a/src/sim/stat_control.cc b/src/sim/stat_control.cc index 7a8d48ae2..83089f399 100644 --- a/src/sim/stat_control.cc +++ b/src/sim/stat_control.cc @@ -12,6 +12,8 @@ * modified or unmodified, in source code or in binary form. * * Copyright (c) 2004-2005 The Regents of The University of Michigan + * Copyright (c) 2013 Advanced Micro Devices, Inc. + * Copyright (c) 2013 Mark D. Hill and David A. Wood * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -53,7 +55,7 @@ #include "base/statistics.hh" #include "base/time.hh" #include "cpu/base.hh" -#include "sim/eventq_impl.hh" +#include "sim/global_event.hh" #include "sim/stat_control.hh" using namespace std; @@ -68,7 +70,7 @@ namespace Stats { Time statTime(true); Tick startTick; -Event *dumpEvent; +GlobalEvent *dumpEvent; struct SimTicksReset : public Callback { @@ -210,7 +212,7 @@ initSimStats() /** * Event to dump and/or reset the statistics. */ -class StatEvent : public Event +class StatEvent : public GlobalEvent { private: bool dump; @@ -218,8 +220,8 @@ class StatEvent : public Event Tick repeat; public: - StatEvent(bool _dump, bool _reset, Tick _repeat) - : Event(Stat_Event_Pri, AutoDelete), + StatEvent(Tick _when, bool _dump, bool _reset, Tick _repeat) + : GlobalEvent(_when, Stat_Event_Pri, 0), dump(_dump), reset(_reset), repeat(_repeat) { } @@ -237,13 +239,18 @@ class StatEvent : public Event Stats::schedStatEvent(dump, reset, curTick() + repeat, repeat); } } + + const char *description() const { return "GlobalStatEvent"; } }; void schedStatEvent(bool dump, bool reset, Tick when, Tick repeat) { - dumpEvent = new StatEvent(dump, reset, repeat); - mainEventQueue.schedule(dumpEvent, when); + // simQuantum is being added to the time when the stats would be + // dumped so as to ensure that this event happens only after the next + // sync amongst the event queues. Asingle event queue simulation + // should remain unaffected. + dumpEvent = new StatEvent(when + simQuantum, dump, reset, repeat); } void @@ -258,7 +265,7 @@ periodicStatDump(Tick period) */ if (dumpEvent != NULL && (period == 0 || dumpEvent->scheduled())) { // Event should AutoDelete, so we do not need to free it. - mainEventQueue.deschedule(dumpEvent); + dumpEvent->deschedule(); } /* @@ -288,7 +295,7 @@ updateEvents() (dumpEvent->scheduled() && dumpEvent->when() < curTick())) { // shift by curTick() and reschedule Tick _when = dumpEvent->when(); - mainEventQueue.reschedule(dumpEvent, _when + curTick()); + dumpEvent->reschedule(_when + curTick()); } }