From ecb29549cf226ed129caa7d43ce79e8b2e4d9575 Mon Sep 17 00:00:00 2001 From: Jacob Lifshay Date: Thu, 21 Jul 2022 03:18:09 -0700 Subject: [PATCH] benchmarking c++11 atomics works --- .vscode/launch.json | 18 ++ .vscode/settings.json | 1 + CMakeLists.txt | 10 +- Makefile | 1 + harness.cpp | 8 - harness.h | 57 ---- src/aarch64/aarch64_benchmarks.cpp | 20 ++ src/aarch64/aarch64_benchmarks.h | 5 + src/all_benchmarks.cpp | 28 ++ src/all_benchmarks.h | 5 + .../c11_atomics/c11_atomics_benchmarks.cpp | 223 +++++++++++++++ .../c11_atomics/c11_atomics_benchmarks.h | 5 + src/common/common_benchmarks.cpp | 8 + src/common/common_benchmarks.h | 5 + src/harness.cpp | 262 ++++++++++++++++++ src/harness.h | 117 ++++++++ main.cpp => src/main.cpp | 104 ++++++- src/powerpc64le/powerpc64le_benchmarks.cpp | 21 ++ src/powerpc64le/powerpc64le_benchmarks.h | 5 + src/x86_64/x86_64_benchmarks.cpp | 20 ++ src/x86_64/x86_64_benchmarks.h | 5 + 21 files changed, 857 insertions(+), 71 deletions(-) create mode 100644 .vscode/launch.json delete mode 100644 harness.cpp delete mode 100644 harness.h create mode 100644 src/aarch64/aarch64_benchmarks.cpp create mode 100644 src/aarch64/aarch64_benchmarks.h create mode 100644 src/all_benchmarks.cpp create mode 100644 src/all_benchmarks.h create mode 100644 src/common/c11_atomics/c11_atomics_benchmarks.cpp create mode 100644 src/common/c11_atomics/c11_atomics_benchmarks.h create mode 100644 src/common/common_benchmarks.cpp create mode 100644 src/common/common_benchmarks.h create mode 100644 src/harness.cpp create mode 100644 src/harness.h rename main.cpp => src/main.cpp (75%) create mode 100644 src/powerpc64le/powerpc64le_benchmarks.cpp create mode 100644 src/powerpc64le/powerpc64le_benchmarks.h create mode 100644 src/x86_64/x86_64_benchmarks.cpp create mode 100644 src/x86_64/x86_64_benchmarks.h diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..1a14a9d --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,18 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "type": "lldb", + "request": "launch", + "name": "Debug", + "program": "${workspaceFolder}/build-x86_64/benchmarks", + "args": [ + "-j1" + ], + "cwd": "${workspaceFolder}" + } + ] +} \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json index 630965c..c096ee7 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -4,4 +4,5 @@ "-DCMAKE_TOOLCHAIN_FILE=toolchain-x86_64-linux-gnu.cmake" ], "cmake.copyCompileCommands": "${workspaceFolder}/compile_commands.json", + "cmake.generator": "Unix Makefiles", } \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index f239c80..025462d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,11 +1,17 @@ -cmake_minimum_required(VERSION 3.11.0) +cmake_minimum_required(VERSION 3.12.0) project(benchmarks VERSION 0.1.0) set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) +find_package(Threads REQUIRED) + add_compile_options(-Wall -Wextra -Wimplicit-fallthrough) -add_executable(benchmarks main.cpp harness.cpp) +file(GLOB_RECURSE sources + RELATIVE ${CMAKE_SOURCE_DIR} + CONFIGURE_DEPENDS src/*.cpp src/*.c) +add_executable(benchmarks "${sources}") +target_link_libraries(benchmarks Threads::Threads) set(CPACK_PROJECT_NAME ${PROJECT_NAME}) set(CPACK_PROJECT_VERSION ${PROJECT_VERSION}) diff --git a/Makefile b/Makefile index a701537..64bacb5 100644 --- a/Makefile +++ b/Makefile @@ -16,6 +16,7 @@ all: $(foreach arch,$(enabled_arches),build-$(arch)/benchmarks) common_cmake_flags = -S . common_cmake_flags += -DCMAKE_EXPORT_COMPILE_COMMANDS:BOOL=TRUE +common_cmake_flags += -DCMAKE_BUILD_TYPE=RelWithDebInfo reset_make_env = "MAKEFLAGS=" "MFLAGS=" "MAKELEVEL=" "MAKE_TERMERR=" "MAKE_TERMOUT=" diff --git a/harness.cpp b/harness.cpp deleted file mode 100644 index 2de66f0..0000000 --- a/harness.cpp +++ /dev/null @@ -1,8 +0,0 @@ -#include "harness.h" - -void BenchHarnessBase::base_run( - Config config, void (*fn)(BenchHarnessBase *bench_harness_base, - std::uint64_t iteration_count)) -{ - // FIXME: finish -} \ No newline at end of file diff --git a/harness.h b/harness.h deleted file mode 100644 index 3c4cd28..0000000 --- a/harness.h +++ /dev/null @@ -1,57 +0,0 @@ -#pragma once - -#include -#include -#include -#include -#include - -struct Config final -{ - std::optional thread_count; - std::optional iteration_count; -}; - -template > -class BenchHarness; - -class BenchHarnessBase -{ - template - friend class BenchHarness; - - private: - void base_run(Config config, - void (*fn)(BenchHarnessBase *bench_harness_base, - std::uint64_t iteration_count)); -}; - -template -class BenchHarness> final - : private BenchHarnessBase -{ - private: - Fn fn; - - public: - void run(Config config) - { - base_run(config, [](BenchHarnessBase *bench_harness_base, - std::uint64_t iteration_count) { - auto &fn = static_cast(bench_harness_base)->fn; - for (std::uint64_t i = 0; i < iteration_count; i++) - { - Input input; - - // optimization barrier - asm("" : : "r"(std::addressof(input)) : "memory"); - - auto output = fn(input); - - // optimization barrier - asm("" : : "r"(std::addressof(output)) : "memory"); - } - }); - } -}; \ No newline at end of file diff --git a/src/aarch64/aarch64_benchmarks.cpp b/src/aarch64/aarch64_benchmarks.cpp new file mode 100644 index 0000000..81f729c --- /dev/null +++ b/src/aarch64/aarch64_benchmarks.cpp @@ -0,0 +1,20 @@ +#include "aarch64_benchmarks.h" + +#ifdef __aarch64__ + +std::vector aarch64_benchmarks(Config config) +{ + std::vector retval; + // TODO: add aarch64 benchmarks + (void)config; + return retval; +} + +#else + +std::vector aarch64_benchmarks(Config) +{ + return {}; +} + +#endif \ No newline at end of file diff --git a/src/aarch64/aarch64_benchmarks.h b/src/aarch64/aarch64_benchmarks.h new file mode 100644 index 0000000..b79cc9f --- /dev/null +++ b/src/aarch64/aarch64_benchmarks.h @@ -0,0 +1,5 @@ +#pragma once + +#include "../harness.h" + +std::vector aarch64_benchmarks(Config config); \ No newline at end of file diff --git a/src/all_benchmarks.cpp b/src/all_benchmarks.cpp new file mode 100644 index 0000000..748541c --- /dev/null +++ b/src/all_benchmarks.cpp @@ -0,0 +1,28 @@ +#include "all_benchmarks.h" +#include "aarch64/aarch64_benchmarks.h" +#include "common/common_benchmarks.h" +#include "powerpc64le/powerpc64le_benchmarks.h" +#include "x86_64/x86_64_benchmarks.h" +#include +#include + +std::vector all_benchmarks(Config config) +{ + std::vector retval = common_benchmarks(config); + { + auto benchmarks = x86_64_benchmarks(config); + std::move(benchmarks.begin(), benchmarks.end(), + std::back_inserter(retval)); + } + { + auto benchmarks = aarch64_benchmarks(config); + std::move(benchmarks.begin(), benchmarks.end(), + std::back_inserter(retval)); + } + { + auto benchmarks = powerpc64le_benchmarks(config); + std::move(benchmarks.begin(), benchmarks.end(), + std::back_inserter(retval)); + } + return retval; +} \ No newline at end of file diff --git a/src/all_benchmarks.h b/src/all_benchmarks.h new file mode 100644 index 0000000..9447131 --- /dev/null +++ b/src/all_benchmarks.h @@ -0,0 +1,5 @@ +#pragma once + +#include "harness.h" + +std::vector all_benchmarks(Config config); \ No newline at end of file diff --git a/src/common/c11_atomics/c11_atomics_benchmarks.cpp b/src/common/c11_atomics/c11_atomics_benchmarks.cpp new file mode 100644 index 0000000..13dc5af --- /dev/null +++ b/src/common/c11_atomics/c11_atomics_benchmarks.cpp @@ -0,0 +1,223 @@ +#include "c11_atomics_benchmarks.h" +#include +#include +#include +#include +#include +#include + +template struct IntTypeName; + +template +inline constexpr std::string_view int_type_name = IntTypeName::name; + +#define INT_TYPE_NAME(sz, ui, uint) \ + template <> struct IntTypeName final \ + { \ + static constexpr std::string_view name = #ui #sz; \ + }; + +INT_TYPE_NAME(8, u, uint) +INT_TYPE_NAME(16, u, uint) +INT_TYPE_NAME(32, u, uint) +INT_TYPE_NAME(64, u, uint) +INT_TYPE_NAME(8, i, int) +INT_TYPE_NAME(16, i, int) +INT_TYPE_NAME(32, i, int) +INT_TYPE_NAME(64, i, int) + +template struct MemoryOrderName; + +template +inline constexpr std::string_view memory_order_name = + MemoryOrderName::name; + +#define MEMORY_ORDER_NAME(order) \ + template <> struct MemoryOrderName final \ + { \ + static constexpr std::string_view name = #order; \ + }; + +MEMORY_ORDER_NAME(relaxed) +MEMORY_ORDER_NAME(acquire) +MEMORY_ORDER_NAME(release) +MEMORY_ORDER_NAME(acq_rel) +MEMORY_ORDER_NAME(seq_cst) + +template using Buf = std::shared_ptr>>; + +template +static void push_atomic_bench(std::vector &benches, Config config, + Buf buf, Fn fn, NameParts &&...name_parts) +{ + auto log2_stride = config.log2_stride; + std::size_t index_mask = 1; + index_mask <<= config.log2_stride; + index_mask <<= config.log2_memory_location_count; + index_mask--; + push_bench( + benches, + [buf, fn, index_mask, log2_stride](T input, std::uint64_t iteration, + std::uint32_t thread_num) { + std::size_t index = iteration; + index ^= static_cast(thread_num) * 0x12345; + index <<= log2_stride; + index &= index_mask; + std::atomic *atomic = &(*buf)[index]; + input ^= static_cast(iteration); + return fn(input, atomic); + }, + T{}, name_parts...); +} + +template +static void rmw_benchmarks(std::vector &benches, Config config, + Buf buf) +{ + push_atomic_bench( + benches, config, buf, + [](T input, std::atomic *atomic) { + return std::atomic_exchange_explicit(atomic, input, order); + }, + "atomic_exchange_", int_type_name, "_", memory_order_name); + push_atomic_bench( + benches, config, buf, + [](T input, std::atomic *atomic) { + return std::atomic_fetch_add_explicit(atomic, input, order); + }, + "atomic_fetch_add_", int_type_name, "_", memory_order_name); + push_atomic_bench( + benches, config, buf, + [](T input, std::atomic *atomic) { + return std::atomic_fetch_sub_explicit(atomic, input, order); + }, + "atomic_fetch_sub_", int_type_name, "_", memory_order_name); + push_atomic_bench( + benches, config, buf, + [](T input, std::atomic *atomic) { + return std::atomic_fetch_and_explicit(atomic, input, order); + }, + "atomic_fetch_and_", int_type_name, "_", memory_order_name); + push_atomic_bench( + benches, config, buf, + [](T input, std::atomic *atomic) { + return std::atomic_fetch_or_explicit(atomic, input, order); + }, + "atomic_fetch_or_", int_type_name, "_", memory_order_name); + push_atomic_bench( + benches, config, buf, + [](T input, std::atomic *atomic) { + return std::atomic_fetch_xor_explicit(atomic, input, order); + }, + "atomic_fetch_xor_", int_type_name, "_", memory_order_name); +} + +template +static void load_benchmarks(std::vector &benches, Config config, + Buf buf) +{ + push_atomic_bench( + benches, config, buf, + [](T, std::atomic *atomic) { + return std::atomic_load_explicit(atomic, order); + }, + "atomic_load_", int_type_name, "_", memory_order_name); +} + +template +static void store_benchmarks(std::vector &benches, Config config, + Buf buf) +{ + push_atomic_bench( + benches, config, buf, + [](T input, std::atomic *atomic) { + return std::atomic_store_explicit(atomic, input, order); + }, + "atomic_store_", int_type_name, "_", memory_order_name); +} + +template +static void cmp_xchg_benchmarks(std::vector &benches, Config config, + Buf buf) +{ + push_atomic_bench( + benches, config, buf, + [](T input, std::atomic *atomic) { + T expected = input >> 1; + bool succeeded = std::atomic_compare_exchange_weak_explicit( + atomic, &expected, input, succ, fail); + return std::pair(expected, succeeded); + }, + "atomic_compare_exchange_weak_", int_type_name, "_", + memory_order_name, "_", memory_order_name); + push_atomic_bench( + benches, config, buf, + [](T input, std::atomic *atomic) { + T expected = input >> 1; + bool succeeded = std::atomic_compare_exchange_strong_explicit( + atomic, &expected, input, succ, fail); + return std::pair(expected, succeeded); + }, + "atomic_compare_exchange_strong_", int_type_name, "_", + memory_order_name, "_", memory_order_name); +} + +template +static void benchmarks(std::vector &benches, Config config) +{ + std::size_t buf_size = 1; + buf_size <<= config.log2_memory_location_count; + buf_size <<= config.log2_stride; + Buf buf = std::make_shared>>(buf_size); + + rmw_benchmarks(benches, config, buf); + rmw_benchmarks(benches, config, buf); + rmw_benchmarks(benches, config, buf); + rmw_benchmarks(benches, config, buf); + rmw_benchmarks(benches, config, buf); + + load_benchmarks(benches, config, buf); + load_benchmarks(benches, config, buf); + load_benchmarks(benches, config, buf); + + store_benchmarks(benches, config, buf); + store_benchmarks(benches, config, buf); + store_benchmarks(benches, config, buf); + + cmp_xchg_benchmarks(benches, config, buf); + + cmp_xchg_benchmarks(benches, config, buf); + cmp_xchg_benchmarks(benches, config, buf); + + cmp_xchg_benchmarks(benches, config, buf); + + cmp_xchg_benchmarks(benches, config, buf); + cmp_xchg_benchmarks(benches, config, buf); + + cmp_xchg_benchmarks(benches, config, buf); + cmp_xchg_benchmarks(benches, config, buf); + cmp_xchg_benchmarks(benches, config, buf); +} + +std::vector c11_atomics_benchmarks(Config config) +{ + std::vector benches; + benchmarks(benches, config); + benchmarks(benches, config); + benchmarks(benches, config); + benchmarks(benches, config); + benchmarks(benches, config); + benchmarks(benches, config); + benchmarks(benches, config); + benchmarks(benches, config); + return benches; +} \ No newline at end of file diff --git a/src/common/c11_atomics/c11_atomics_benchmarks.h b/src/common/c11_atomics/c11_atomics_benchmarks.h new file mode 100644 index 0000000..9966297 --- /dev/null +++ b/src/common/c11_atomics/c11_atomics_benchmarks.h @@ -0,0 +1,5 @@ +#pragma once + +#include "../../harness.h" + +std::vector c11_atomics_benchmarks(Config config); \ No newline at end of file diff --git a/src/common/common_benchmarks.cpp b/src/common/common_benchmarks.cpp new file mode 100644 index 0000000..42007a4 --- /dev/null +++ b/src/common/common_benchmarks.cpp @@ -0,0 +1,8 @@ +#include "common_benchmarks.h" +#include "c11_atomics/c11_atomics_benchmarks.h" + +std::vector common_benchmarks(Config config) +{ + auto retval = c11_atomics_benchmarks(config); + return retval; +} \ No newline at end of file diff --git a/src/common/common_benchmarks.h b/src/common/common_benchmarks.h new file mode 100644 index 0000000..9505408 --- /dev/null +++ b/src/common/common_benchmarks.h @@ -0,0 +1,5 @@ +#pragma once + +#include "../harness.h" + +std::vector common_benchmarks(Config config); \ No newline at end of file diff --git a/src/harness.cpp b/src/harness.cpp new file mode 100644 index 0000000..affe919 --- /dev/null +++ b/src/harness.cpp @@ -0,0 +1,262 @@ +#include "harness.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef NDEBUG // assert needs to work even in release mode +#undef NDEBUG +#endif +#include + +using std::chrono::steady_clock; + +class BenchHarnessBase::ThreadCache final +{ + private: + std::vector threads; + std::shared_mutex state_lock; + std::unique_lock locked_state; + std::condition_variable_any cond_var; + struct UnlockGuard final + { + std::shared_mutex &state_lock; + UnlockGuard(std::shared_mutex &state_lock) : state_lock(state_lock) + { + state_lock.unlock(); + } + ~UnlockGuard() + { + state_lock.lock(); + } + }; + struct Task final + { + std::function fn; + }; + struct ThreadState final + { + std::unique_ptr task; + std::mutex mutex; + }; + std::vector> states; + bool shutting_down = false; + std::atomic_size_t tasks_left_to_drain = 0; + void add_thread() + { + auto thread_state = std::make_shared(); + states.push_back(thread_state); + threads.push_back(std::thread([this, thread_state]() { + auto shared_lock = std::shared_lock(state_lock); + while (true) + { + auto lock = std::unique_lock(thread_state->mutex); + auto task = std::move(thread_state->task); + lock.unlock(); + if (task) + { + task->fn(); + task.reset(); + tasks_left_to_drain--; + cond_var.notify_all(); + continue; + } + + if (this->shutting_down) + return; + + cond_var.wait(shared_lock); + } + })); + } + + public: + ThreadCache() + { + locked_state = std::unique_lock(state_lock); + } + ThreadCache(const ThreadCache &) = delete; + ThreadCache &operator=(const ThreadCache &) = delete; + ~ThreadCache() + { + shutting_down = true; + cond_var.notify_all(); + locked_state.unlock(); + for (auto &thread : threads) + { + thread.join(); + } + } + static std::shared_ptr get() + { + // weak so it's destroyed before returning from main() + static std::weak_ptr static_thread_cache; + + std::shared_ptr thread_cache = static_thread_cache.lock(); + if (!thread_cache) + { + thread_cache = std::make_shared(); + static_thread_cache = thread_cache; + } + return thread_cache; + } + static std::shared_ptr get(BenchHarnessBase &bhb, + std::uint32_t thread_count) + { + std::shared_ptr thread_cache = get(); + bhb.thread_cache = thread_cache; + while (thread_cache->threads.size() < thread_count) + thread_cache->add_thread(); + return thread_cache; + } + void drain() + { + while (tasks_left_to_drain > 0) + { + // unlocks state_lock, allowing all threads to proceed + // simultaneously + cond_var.wait(locked_state); + } + } + template void schedule_on(std::uint32_t thread_num, Fn fn) + { + auto lock = std::unique_lock(states[thread_num]->mutex); + assert(!states[thread_num]->task); + tasks_left_to_drain++; + states[thread_num]->task = std::make_unique(Task{.fn = fn}); + cond_var.notify_all(); + } +}; + +struct WriteDuration final +{ + std::chrono::duration dur; + friend std::ostream &operator<<(std::ostream &os, + const WriteDuration &wdur) + { + double dur = wdur.dur.count(); + if (!std::isfinite(dur) || std::fabs(dur) > 0.1) + { + os << dur << " sec"; + } + else if (std::fabs(dur) > 0.1e-3) + { + os << dur * 1e3 << " ms"; + } + else if (std::fabs(dur) > 0.1e-6) + { + os << dur * 1e6 << " us"; + } + else if (std::fabs(dur) > 0.1e-9) + { + os << dur * 1e9 << " ns"; + } + else if (std::fabs(dur) > 0.1e-12) + { + os << dur * 1e12 << " ps"; + } + else + { + os << dur << " sec"; + } + return os; + } +}; + +void BenchHarnessBase::base_run( + Config config, + void (*fn)(BenchHarnessBase *bench_harness_base, + std::uint64_t iteration_count, std::uint32_t thread_num)) +{ + + std::uint32_t thread_count = + config.thread_count.value_or(std::thread::hardware_concurrency()); + bool no_threads = + thread_count == 0 || (thread_count == 1 && !config.thread_count); + if (no_threads) + { + thread_count = 1; + } + + std::vector elapsed(thread_count); + auto run_base = [&](std::uint64_t iteration_count, + std::uint32_t thread_num) { + auto start_time = steady_clock::now(); + fn(this, iteration_count, thread_num); + auto end_time = steady_clock::now(); + elapsed[thread_num] = end_time - start_time; + }; + auto run = [&](std::uint64_t iteration_count) { + if (no_threads) + { + return run_base(iteration_count, 0); + } + auto thread_cache = ThreadCache::get(*this, thread_count); + for (std::uint32_t thread_num = 0; thread_num < thread_count; + thread_num++) + { + thread_cache->schedule_on( + thread_num, [&run_base, iteration_count, thread_num]() { + run_base(iteration_count, thread_num); + }); + } + thread_cache->drain(); + }; + std::uint64_t iteration_count = 1; + if (config.iteration_count) + { + iteration_count = *config.iteration_count; + run(iteration_count); + } + else + { + while (true) + { + run(iteration_count); + steady_clock::duration total_elapsed{}; + for (auto i : elapsed) + { + total_elapsed += i; + } + auto target_average_elapsed = std::chrono::milliseconds(500); + if (total_elapsed > thread_count * target_average_elapsed) + { + break; + } + iteration_count <<= 1; + } + } + steady_clock::duration total_elapsed{}; + for (std::uint32_t thread_num = 0; thread_num < thread_count; thread_num++) + { + total_elapsed += elapsed[thread_num]; + if (thread_count > 1) + { + auto dur = std::chrono::duration(elapsed[thread_num]); + std::cout << "Thread #" << thread_num << " took " + << WriteDuration{dur} << " for " << iteration_count + << " iterations -- " + << WriteDuration{dur / iteration_count} << "/iter.\n"; + } + } + auto total = std::chrono::duration(total_elapsed); + std::cout << "Average elapsed time: " + << WriteDuration{total / thread_count} << " for " + << iteration_count << " iterations -- " + << WriteDuration{total / thread_count / iteration_count} + << "/iter.\n" + << std::endl; +} + +std::shared_ptr BenchHarnessBase::get_thread_cache() +{ + return ThreadCache::get(); +} \ No newline at end of file diff --git a/src/harness.h b/src/harness.h new file mode 100644 index 0000000..aacd27a --- /dev/null +++ b/src/harness.h @@ -0,0 +1,117 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct Config final +{ + std::optional thread_count; + std::optional iteration_count; + std::uint32_t log2_memory_location_count = 0; + std::uint32_t log2_stride = 0; + static constexpr std::uint32_t max_sum_log2_mem_loc_count_and_stride = 28; +}; + +template class BenchHarness; + +class BenchHarnessBase +{ + template friend class BenchHarness; + + private: + std::shared_ptr thread_cache; + class ThreadCache; + friend class ThreadCache; + void base_run(Config config, + void (*fn)(BenchHarnessBase *bench_harness_base, + std::uint64_t iteration_count, + std::uint32_t thread_num)); + + public: + static std::shared_ptr get_thread_cache(); +}; + +template +class BenchHarness final : private BenchHarnessBase +{ + private: + Fn fn; + Input input; + + public: + BenchHarness(Fn fn, Input input) + : fn(std::move(fn)), input(std::move(input)) + { + } + void run(Config config) + { + base_run(config, [](BenchHarnessBase *bench_harness_base, + std::uint64_t iteration_count, + std::uint32_t thread_num) { + auto self = static_cast(bench_harness_base); + auto &fn = self->fn; + // copy for repeatability, also so optimization barrier is on copy, + // not self + auto input = self->input; + for (std::uint64_t i = 0; i < iteration_count; i++) + { + // optimization barrier + asm("" : : "r"(std::addressof(input)) : "memory"); + + if constexpr (std::is_void_v>) + { + fn(input, i, thread_num); + } + else + { + auto output = fn(input, i, thread_num); + + // optimization barrier + asm("" : : "r"(std::addressof(output)) : "memory"); + } + } + }); + } +}; + +class Benchmark final +{ + private: + std::string m_name; + std::function m_run; + + public: + template + explicit Benchmark(Fn fn, Input input, std::string name) + : m_name(std::move(name)), m_run([fn, input](Config config) { + return BenchHarness(std::move(fn), std::move(input)).run(config); + }) + { + } + void run(Config config) + { + return m_run(config); + } + const std::string &name() const + { + return m_name; + } +}; + +template +void push_bench(std::vector &benches, Fn fn, Input input, + NameParts &&...name_parts) +{ + std::ostringstream os; + (os << ... << std::forward(name_parts)); + benches.push_back(Benchmark(std::move(fn), std::move(input), os.str())); +} diff --git a/main.cpp b/src/main.cpp similarity index 75% rename from main.cpp rename to src/main.cpp index 2008393..feafbb1 100644 --- a/main.cpp +++ b/src/main.cpp @@ -1,15 +1,18 @@ +#include "all_benchmarks.h" #include "harness.h" #include #include #include #include #include +#include #include #include #include #include #include #include +#include #include using namespace std::literals; @@ -315,9 +318,17 @@ class OptionsParser final } std::exit(sizeof...(error_msg) == 0 ? 0 : 1); } + template struct ParseIntLimits; + template + struct ParseIntLimits>> final + { + Int min_value = std::numeric_limits::min(); + Int max_value = std::numeric_limits::max(); + }; template std::enable_if_t, void> parse_int( - std::optional value, Int &i_value) + std::optional value, Int &i_value, + ParseIntLimits limits = {}) { i_value = Int(); if (!value) @@ -347,6 +358,13 @@ class OptionsParser final { result.ec = std::errc::invalid_argument; } + if (result.ec == std::errc()) + { + if (i_value < limits.min_value || i_value > limits.max_value) + { + result.ec = std::errc::result_out_of_range; + } + } if (result.ec == std::errc::result_out_of_range) { help_and_exit("value out of range: ", current_option(), "=", @@ -360,7 +378,7 @@ class OptionsParser final template std::enable_if_t, void> parse_int( std::optional value, std::optional &i_value, - bool required = true) + bool required = true, ParseIntLimits limits = {}) { if (!required && !value) { @@ -368,7 +386,7 @@ class OptionsParser final return; } i_value.emplace(); - this->parse_int(value, i_value.value()); + this->parse_int(value, i_value.value(), limits); } }; @@ -381,6 +399,7 @@ inline std::vector Options::parse( int main(int, char **argv) { Config config{}; + std::optional> enabled_benchmarks; Options options{ Option{ .short_name = 'h', @@ -406,6 +425,44 @@ int main(int, char **argv) [&](OptionsParser &parser, auto value) { parser.parse_int(value, config.iteration_count); }}, + Option{.long_name = "log2-mem-loc-count", + .description = + "Log base 2 of the number of memory locations to access", + .value_kind = OptionValueKind::Required, + .parse_value = + [&](OptionsParser &parser, auto value) { + parser.parse_int( + value, config.log2_memory_location_count, + {.max_value = + Config::max_sum_log2_mem_loc_count_and_stride - + config.log2_stride}); + }}, + Option{ + .long_name = "log2-stride", + .description = + "Log base 2 of the stride used for accessing memory locations", + .value_kind = OptionValueKind::Required, + .parse_value = + [&](OptionsParser &parser, auto value) { + parser.parse_int( + value, config.log2_stride, + {.max_value = + Config::max_sum_log2_mem_loc_count_and_stride - + config.log2_memory_location_count}); + }}, + Option{ + .short_name = 'b', + .long_name = "bench", + .description = "List of benchmarks that should be run", + .value_kind = OptionValueKind::Required, + .parse_value = + [&](OptionsParser &, std::optional value) { + if (!enabled_benchmarks) + { + enabled_benchmarks.emplace(); + } + enabled_benchmarks->emplace(value.value_or("")); + }}, }; OptionsParser parser(options, argv); auto args = parser.parse(); @@ -413,6 +470,45 @@ int main(int, char **argv) { parser.help_and_exit("unexpected argument"); } - // TODO: invoke benchmarks + auto benchmarks = all_benchmarks(config); + if (enabled_benchmarks) + { + enabled_benchmarks->erase(""); + enabled_benchmarks->erase("help"); + enabled_benchmarks->erase("list"); + if (enabled_benchmarks->empty()) + { + std::cout << "Available Benchmarks:\n"; + for (auto &benchmark : benchmarks) + { + std::cout << benchmark.name() << "\n"; + } + std::cout << std::endl; + return 0; + } + std::unordered_set unknown_benchmarks = + *enabled_benchmarks; + for (auto &benchmark : benchmarks) + { + unknown_benchmarks.erase(benchmark.name()); + } + if (!unknown_benchmarks.empty()) + { + parser.help_and_exit( + "unknown benchmark: ", *unknown_benchmarks.begin(), + "\nrun with `--bench=list` to see all supported benchmarks."); + } + } + auto thread_cache = BenchHarnessBase::get_thread_cache(); + for (auto &benchmark : benchmarks) + { + if (enabled_benchmarks && !enabled_benchmarks->count(benchmark.name())) + { + continue; + } + std::cout << "Running: " << benchmark.name() << std::endl; + benchmark.run(config); + } + std::cout << std::endl; return 0; } diff --git a/src/powerpc64le/powerpc64le_benchmarks.cpp b/src/powerpc64le/powerpc64le_benchmarks.cpp new file mode 100644 index 0000000..da0a672 --- /dev/null +++ b/src/powerpc64le/powerpc64le_benchmarks.cpp @@ -0,0 +1,21 @@ +#include "powerpc64le_benchmarks.h" +#include + +#if defined(__powerpc64__) && BYTE_ORDER == LITTLE_ENDIAN + +std::vector powerpc64le_benchmarks(Config config) +{ + std::vector retval; + // TODO: add powerpc64le benchmarks + (void)config; + return retval; +} + +#else + +std::vector powerpc64le_benchmarks(Config) +{ + return {}; +} + +#endif \ No newline at end of file diff --git a/src/powerpc64le/powerpc64le_benchmarks.h b/src/powerpc64le/powerpc64le_benchmarks.h new file mode 100644 index 0000000..808685d --- /dev/null +++ b/src/powerpc64le/powerpc64le_benchmarks.h @@ -0,0 +1,5 @@ +#pragma once + +#include "../harness.h" + +std::vector powerpc64le_benchmarks(Config config); \ No newline at end of file diff --git a/src/x86_64/x86_64_benchmarks.cpp b/src/x86_64/x86_64_benchmarks.cpp new file mode 100644 index 0000000..9bc145f --- /dev/null +++ b/src/x86_64/x86_64_benchmarks.cpp @@ -0,0 +1,20 @@ +#include "x86_64_benchmarks.h" + +#ifdef __x86_64__ + +std::vector x86_64_benchmarks(Config config) +{ + std::vector retval; + // TODO: add x86_64 benchmarks + (void)config; + return retval; +} + +#else + +std::vector x86_64_benchmarks(Config) +{ + return {}; +} + +#endif \ No newline at end of file diff --git a/src/x86_64/x86_64_benchmarks.h b/src/x86_64/x86_64_benchmarks.h new file mode 100644 index 0000000..ed228af --- /dev/null +++ b/src/x86_64/x86_64_benchmarks.h @@ -0,0 +1,5 @@ +#pragma once + +#include "../harness.h" + +std::vector x86_64_benchmarks(Config config); \ No newline at end of file -- 2.30.2