From ff6a3a61719974de808b2695153144a9ea9c3d1d Mon Sep 17 00:00:00 2001 From: Earl Ou Date: Tue, 22 Sep 2020 14:06:30 +0800 Subject: [PATCH] base,sim: implement a faster mutex for single thread case This change applies an atomic variable to check if we really need to obtain a mutex, and uses a condition variable to notify. See about 5% improvement in the simulation speed. Change-Id: I7e165987dcb587b27fae90978b9b3fde6f5563ef Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/34915 Reviewed-by: Andreas Sandberg Reviewed-by: Richard Cooper Reviewed-by: Daniel Carvalho Reviewed-by: Giacomo Travaglini Maintainer: Gabe Black Tested-by: kokoro --- src/base/SConscript | 1 + src/base/uncontended_mutex.hh | 118 +++++++++++++++++++++++++++++ src/base/uncontended_mutex.test.cc | 88 +++++++++++++++++++++ src/sim/eventq.cc | 1 + src/sim/eventq.hh | 6 +- 5 files changed, 211 insertions(+), 3 deletions(-) create mode 100644 src/base/uncontended_mutex.hh create mode 100644 src/base/uncontended_mutex.test.cc diff --git a/src/base/SConscript b/src/base/SConscript index 6514de0b6..e04d84a71 100644 --- a/src/base/SConscript +++ b/src/base/SConscript @@ -73,6 +73,7 @@ Source('trace.cc') GTest('trie.test', 'trie.test.cc') Source('types.cc') GTest('types.test', 'types.test.cc', 'types.cc') +GTest('uncontended_mutex.test', 'uncontended_mutex.test.cc') Source('stats/group.cc') Source('stats/text.cc') diff --git a/src/base/uncontended_mutex.hh b/src/base/uncontended_mutex.hh new file mode 100644 index 000000000..721712f05 --- /dev/null +++ b/src/base/uncontended_mutex.hh @@ -0,0 +1,118 @@ +/* + * Copyright 2020 Google, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __BASE_UNCONTENDED_MUTEX_HH__ +#define __BASE_UNCONTENDED_MUTEX_HH__ + +#include +#include +#include + +/* + * The std::mutex implementation is slower than expected because of many mode + * checking and legacy support. + * + * The UncontendedMutex uses an atomic flag to check if we really need to + * obtain a mutex lock. For most cases without multi-threads event queues, + * e.g. non-KVM simulation, this avoid the usage of mutex and speed up the + * simulation. + */ +class UncontendedMutex +{ + private: + /* + * A flag to record the current status: + * 0: no one has the lock + * 1: exactly one thread has the lock + * >1: one or more threads are waiting for the lock + */ + std::atomic flag; + std::mutex m; + std::condition_variable cv; + + bool + testAndSet(int expected, int desired) + { + return flag.compare_exchange_strong(expected, desired); + } + + public: + UncontendedMutex() : flag(0) {} + + void + lock() + { + /* + * Here we use 'flag' to check if we are the first thread to get the + * lock. If not, we try to obtain the real mutex, and use the condition + * variable to wait for the thread who has the lock to release it. + * + * The flag will be updated to more than 1, so the thread with lock + * knows that there is another thread waiting for the lock. + */ + while (!testAndSet(0, 1)) { + std::unique_lock ul(m); + /* + * It is possible that just before we obtain the mutex lock, the + * first thread releases the flag and thus flag becomes zero. In + * such case, we shouldn't wait for the condition variable because + * there is no the other thread to notify us. + */ + if (flag++ == 0) + break; + cv.wait(ul); + } + } + + void + unlock() + { + /* In case there are no other threads waiting, we will just clear the + * flag and return. + */ + if (testAndSet(1, 0)) + return; + + /* + * Otherwise, clear the flag and notify all the waiting threads. We + * need to protect the flag by mutex here so that there won't be + * another thread waiting but the flag is already set to 0. + */ + { + std::lock_guard g(m); + flag = 0; + } + /* + * It's possible to update the algorithm and use notify_one() here. + * However, tests show that notify_one() is much slower than + * notify_all() in this case. Here we choose to use notify_all(). + */ + cv.notify_all(); + } +}; + +#endif // __BASE_UNCONTENDED_MUTEX_HH__ diff --git a/src/base/uncontended_mutex.test.cc b/src/base/uncontended_mutex.test.cc new file mode 100644 index 000000000..6ce929b4b --- /dev/null +++ b/src/base/uncontended_mutex.test.cc @@ -0,0 +1,88 @@ +/* + * Copyright 2020 Google, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include + +#include "base/uncontended_mutex.hh" + +TEST(UncontendedMutex, Lock) +{ + int data = 0; + UncontendedMutex m; + + std::thread t1([&] () { + std::lock_guard g(m); + // Simulate += operation with a racing change between read and write. + int tmp = data; + std::this_thread::sleep_for(std::chrono::milliseconds(200)); + data = tmp + 1; + }); + + std::thread t2([&] () { + std::this_thread::sleep_for(std::chrono::milliseconds(100)); + std::lock_guard g(m); + data = data + 1; + }); + + std::thread t3([&] () { + std::this_thread::sleep_for(std::chrono::milliseconds(100)); + std::lock_guard g(m); + data = data + 1; + }); + t1.join(); + t2.join(); + t3.join(); + + EXPECT_EQ(data, 3); +} + +TEST(UncontendedMutex, HeavyContention) +{ + int num_of_iter = 1000; + int num_of_thread = 1000; + std::vector threads; + + int data = 0; + UncontendedMutex m; + + for (int t = 0 ; t < num_of_thread; ++t) { + threads.emplace_back([&] () { + for (int k = 0; k < num_of_iter; ++k) { + std::lock_guard g(m); + data++; + } + }); + } + + for (auto& t : threads) { + t.join(); + } + EXPECT_EQ(data, num_of_iter * num_of_thread); +} diff --git a/src/sim/eventq.cc b/src/sim/eventq.cc index bc4864c9b..adce51ef5 100644 --- a/src/sim/eventq.cc +++ b/src/sim/eventq.cc @@ -32,6 +32,7 @@ #include #include +#include #include #include #include diff --git a/src/sim/eventq.hh b/src/sim/eventq.hh index aa5472203..45a5ab8d7 100644 --- a/src/sim/eventq.hh +++ b/src/sim/eventq.hh @@ -41,12 +41,12 @@ #include #include #include -#include #include #include "base/debug.hh" #include "base/flags.hh" #include "base/types.hh" +#include "base/uncontended_mutex.hh" #include "debug/Event.hh" #include "sim/serialize.hh" @@ -622,7 +622,7 @@ class EventQueue Tick _curTick; //! Mutex to protect async queue. - std::mutex async_queue_mutex; + UncontendedMutex async_queue_mutex; //! List of events added by other threads to this event queue. std::list async_queue; @@ -647,7 +647,7 @@ class EventQueue * @see EventQueue::lock() * @see EventQueue::unlock() */ - std::mutex service_mutex; + UncontendedMutex service_mutex; //! Insert / remove event from the queue. Should only be called //! by thread operating this queue. -- 2.30.2