From: Jacob Lifshay Date: Thu, 1 Jun 2017 11:21:07 +0000 (-0700) Subject: everything builds X-Git-Tag: gsoc-2017~117 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=2219c6e85eae8a07eb0a6687ea1bf68355cde0c7;p=kazan.git everything builds --- diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 23627cc..28989b5 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -21,4 +21,5 @@ cmake_minimum_required(VERSION 3.1 FATAL_ERROR) add_subdirectory(spirv) add_subdirectory(demo) +add_subdirectory(json) add_subdirectory(util) \ No newline at end of file diff --git a/src/json/CMakeLists.txt b/src/json/CMakeLists.txt new file mode 100644 index 0000000..9666a76 --- /dev/null +++ b/src/json/CMakeLists.txt @@ -0,0 +1,24 @@ +# Copyright 2017 Jacob Lifshay +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +cmake_minimum_required(VERSION 3.1 FATAL_ERROR) +set(sources json.cpp) +add_library(json STATIC ${sources}) +target_link_libraries(json util) \ No newline at end of file diff --git a/src/json/json.cpp b/src/json/json.cpp new file mode 100644 index 0000000..8e2a938 --- /dev/null +++ b/src/json/json.cpp @@ -0,0 +1,420 @@ +/* + * Copyright 2017 Jacob Lifshay + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ +#include "json.h" +#include +#include +#include +#include +#include +#include +#include "../util/soft_float.h" + +namespace vulkan_cpu +{ +namespace json +{ +namespace ast +{ +namespace soft_float = util::soft_float; + +void null_value::write(std::ostream &os) const +{ + os << "null"; +} + +void boolean_value::write(std::ostream &os) const +{ + os << (value ? "true" : "false"); +} + +namespace +{ +constexpr char get_digit_char(unsigned digit, bool uppercase) noexcept +{ + if(digit < 10) + return '0' + digit; + if(uppercase) + return digit - 10 + 'A'; + return digit - 10 + 'a'; +} +} + +void string_value::write(std::ostream &os, const std::string &value) +{ + os << '\"'; + for(unsigned char ch : value) + { + switch(ch) + { + case '\\': + case '\"': + os << '\\' << ch; + break; + case '\b': + os << "\\b"; + break; + case '\f': + os << "\\f"; + break; + case '\n': + os << "\\n"; + break; + case '\r': + os << "\\r"; + break; + case '\t': + os << "\\t"; + break; + default: + if(ch < 0x20U) + os << "\\u00" << get_digit_char(ch >> 4, true) << get_digit_char(ch & 0xFU, true); + else + os << ch; + } + } + os << '\"'; +} + +namespace +{ +template +void write_string(Write_Char write_char, const char *str) noexcept(noexcept(write_char('0'))) +{ + while(*str) + write_char(*str++); +} + +template +void write_array(Write_Char write_char, + const char *array, + std::size_t size) noexcept(noexcept(write_char('0'))) +{ + for(std::size_t i = 0; i < size; i++) + write_char(array[i]); +} + +constexpr std::array make_base_2_logs() noexcept +{ + return std::array{{ + log2(soft_float::ExtendedFloat(static_cast(0))), + log2(soft_float::ExtendedFloat(static_cast(1))), + log2(soft_float::ExtendedFloat(static_cast(2))), + log2(soft_float::ExtendedFloat(static_cast(3))), + log2(soft_float::ExtendedFloat(static_cast(4))), + log2(soft_float::ExtendedFloat(static_cast(5))), + log2(soft_float::ExtendedFloat(static_cast(6))), + log2(soft_float::ExtendedFloat(static_cast(7))), + log2(soft_float::ExtendedFloat(static_cast(8))), + log2(soft_float::ExtendedFloat(static_cast(9))), + log2(soft_float::ExtendedFloat(static_cast(10))), + log2(soft_float::ExtendedFloat(static_cast(11))), + log2(soft_float::ExtendedFloat(static_cast(12))), + log2(soft_float::ExtendedFloat(static_cast(13))), + log2(soft_float::ExtendedFloat(static_cast(14))), + log2(soft_float::ExtendedFloat(static_cast(15))), + log2(soft_float::ExtendedFloat(static_cast(16))), + log2(soft_float::ExtendedFloat(static_cast(17))), + log2(soft_float::ExtendedFloat(static_cast(18))), + log2(soft_float::ExtendedFloat(static_cast(19))), + log2(soft_float::ExtendedFloat(static_cast(20))), + log2(soft_float::ExtendedFloat(static_cast(21))), + log2(soft_float::ExtendedFloat(static_cast(22))), + log2(soft_float::ExtendedFloat(static_cast(23))), + log2(soft_float::ExtendedFloat(static_cast(24))), + log2(soft_float::ExtendedFloat(static_cast(25))), + log2(soft_float::ExtendedFloat(static_cast(26))), + log2(soft_float::ExtendedFloat(static_cast(27))), + log2(soft_float::ExtendedFloat(static_cast(28))), + log2(soft_float::ExtendedFloat(static_cast(29))), + log2(soft_float::ExtendedFloat(static_cast(30))), + log2(soft_float::ExtendedFloat(static_cast(31))), + log2(soft_float::ExtendedFloat(static_cast(32))), + log2(soft_float::ExtendedFloat(static_cast(33))), + log2(soft_float::ExtendedFloat(static_cast(34))), + log2(soft_float::ExtendedFloat(static_cast(35))), + log2(soft_float::ExtendedFloat(static_cast(36))), + }}; +} + +constexpr std::size_t max_integer_buffer_size = 64; // max number of digits is base 2 with 64 digits + +template +void write_unsigned_integer(Write_Char write_char, + std::uint64_t value, + unsigned base) noexcept(noexcept(write_char('0'))) +{ + assert(base >= number_value::min_base && base <= number_value::max_base); + char buffer[max_integer_buffer_size]{}; + std::size_t buffer_used = 0; + do + { + assert(buffer_used < max_integer_buffer_size); + buffer[buffer_used++] = get_digit_char(value % base, false); + value /= base; + } while(value != 0); + for(std::size_t i = 0, j = buffer_used - 1; i < buffer_used; i++, j--) + write_char(buffer[j]); +} + +template +void write_signed_integer(Write_Char write_char, + std::int64_t value, + unsigned base) noexcept(noexcept(write_char('0'))) +{ + if(value < 0) + { + write_char('-'); + write_unsigned_integer(write_char, + -static_cast(value), + base); // cast to unsigned first to handle minimum value + } + else + { + write_unsigned_integer(write_char, static_cast(value), base); + } +} + +template +void write_number(Write_Char write_char, + double valueIn, + unsigned base) noexcept(noexcept(write_char('0'))) +{ + // code modified from + // https://github.com/programmerjake/javascript-tasklets/blob/master/javascript_tasklets/value.cpp + // based on the ECMAScript ToString algorithm for numbers + assert(base >= number_value::min_base && base <= number_value::max_base); + const char exponent_char = base == 10 ? 'e' : base == 16 ? 'h' : base == 8 ? 'o' : 'E'; + soft_float::ExtendedFloat value(valueIn), base_f(static_cast(base)); + auto inv_base_f = soft_float::ExtendedFloat::One() / base_f; + static constexpr auto base_2_logs = make_base_2_logs(); + auto limit_21 = + static_cast(round(soft_float::ExtendedFloat(static_cast(21)) + * (base_2_logs[10] / base_2_logs[base]))); + assert(limit_21 > 0); + auto limit_6 = + static_cast(round(soft_float::ExtendedFloat(static_cast(6)) + * (base_2_logs[10] / base_2_logs[base]))); + assert(limit_6 > 0); + if(value.isNaN()) + { + write_string(write_char, "NaN"); + return; + } + if(value.isZero()) + { + write_char('0'); + return; + } + if(value.isInfinite()) + { + if(value.signBit()) + write_string(write_char, "-Infinity"); + else + write_string(write_char, "Infinity"); + return; + } + if(value.signBit()) + { + write_char('-'); + value = -value; + valueIn = -valueIn; + } + auto n_f = log2(value) / base_2_logs[base] + soft_float::ExtendedFloat::One(); + auto n = static_cast(floor(n_f)); + soft_float::ExtendedFloat base_to_the_power_of_n = pow(base_f, n); + soft_float::ExtendedFloat base_to_the_power_of_minus_n = + soft_float::ExtendedFloat::One() / base_to_the_power_of_n; + auto scaled_value = value * base_to_the_power_of_minus_n; + if(scaled_value + scalbn(soft_float::ExtendedFloat::One(), -62) + < inv_base_f) // extra is to handle round-off error + { + n--; + base_to_the_power_of_n *= inv_base_f; + base_to_the_power_of_minus_n *= base_f; + scaled_value = value * base_to_the_power_of_minus_n; + } + else if(scaled_value >= soft_float::ExtendedFloat::One()) + { + n++; + base_to_the_power_of_n *= base_f; + base_to_the_power_of_minus_n *= inv_base_f; + scaled_value = value * base_to_the_power_of_minus_n; + } + std::int64_t k = 0; + soft_float::ExtendedFloat s_f = soft_float::ExtendedFloat::One(); + auto base_to_the_power_of_k = soft_float::ExtendedFloat::One(); + auto base_to_the_power_of_minus_k = soft_float::ExtendedFloat::One(); + while(s_f < soft_float::ExtendedFloat::TwoToThe64()) + { + k++; + base_to_the_power_of_k *= base_f; + base_to_the_power_of_minus_k *= inv_base_f; + s_f = round(scaled_value * base_to_the_power_of_k); + if(valueIn + == static_cast(s_f * base_to_the_power_of_minus_k * base_to_the_power_of_n)) + break; + } + std::uint64_t s = static_cast(s_f); + char s_digits[max_integer_buffer_size]{}; + std::size_t s_digits_size = 0; + write_unsigned_integer( + [&](char ch) + { + assert(s_digits_size < max_integer_buffer_size); + s_digits[s_digits_size++] = ch; + }, + s, + base); + assert(s_digits_size == static_cast(k)); + if(k <= n && n <= limit_21) + { + write_array(write_char, s_digits, s_digits_size); + for(std::size_t i = n - k; i > 0; i--) + write_char('0'); + } + else if(0 < n && n <= limit_21) + { + for(std::int64_t i = 0; i < n; i++) + write_char(s_digits[i]); + write_char('.'); + for(std::int64_t i = n; i < k; i++) + write_char(s_digits[i]); + } + else if(-limit_6 < n && n <= 0) + { + write_string(write_char, "0."); + for(std::size_t i = -n; i > 0; i--) + write_char('0'); + write_array(write_char, s_digits, s_digits_size); + } + else if(k == 1) + { + write_array(write_char, s_digits, s_digits_size); + write_char(exponent_char); + if(n - 1 >= 0) + { + write_char('+'); + write_signed_integer(write_char, n - 1, 10); + } + else + write_signed_integer(write_char, n - 1, 10); + } + else + { + write_char(s_digits[0]); + write_char('.'); + for(std::int64_t i = 1; i < k; i++) + write_char(s_digits[i]); + write_char(exponent_char); + if(n - 1 >= 0) + { + write_char('+'); + write_signed_integer(write_char, n - 1, 10); + } + else + write_signed_integer(write_char, n - 1, 10); + } +} +} + +std::string number_value::to_string(std::string buffer_in, unsigned base) const +{ + buffer_in.clear(); + std::string retval = std::move(buffer_in); + write_number( + [&](char ch) + { + retval += ch; + }, + value, + base); + return retval; +} + +std::size_t number_value::to_string(char *output_buffer, + std::size_t output_buffer_size, + bool require_null_terminator, + unsigned base) const noexcept +{ + if(output_buffer_size == 0) + return 0; + std::size_t used_buffer_size = 0; + std::size_t output_buffer_size_without_terminator = output_buffer_size; + if(require_null_terminator) + output_buffer_size_without_terminator--; + write_number( + [&](char ch) + { + if(used_buffer_size < output_buffer_size_without_terminator) + output_buffer[used_buffer_size++] = ch; + }, + value, + base); + if(used_buffer_size < output_buffer_size) + output_buffer[used_buffer_size] = '\0'; // add the null terminator if there is space + return used_buffer_size; // report used buffer excluding the null terminator +} + +void number_value::write(std::ostream &os, unsigned base) const +{ + write_number( + [&](char ch) + { + os << ch; + }, + value, + base); +} + +void object::write(std::ostream &os) const +{ + os << '{'; + auto seperator = ""; + for(auto &entry : values) + { + const std::string &key = std::get<0>(entry); + const value &value = std::get<1>(entry); + os << seperator; + seperator = ","; + string_value::write(os, key); + os << ':'; + ast::write(os, value); + } + os << '}'; +} + +void array::write(std::ostream &os) const +{ + os << '['; + auto seperator = ""; + for(const value &v : values) + { + os << seperator; + seperator = ","; + ast::write(os, v); + } + os << ']'; +} +} +} +} diff --git a/src/json/json.h b/src/json/json.h new file mode 100644 index 0000000..08b2e9c --- /dev/null +++ b/src/json/json.h @@ -0,0 +1,219 @@ +/* + * Copyright 2017 Jacob Lifshay + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef JSON_JSON_H_ +#define JSON_JSON_H_ + +#include +#include +#include +#include +#include +#include +#include +#include "../util/variant.h" + +namespace vulkan_cpu +{ +namespace json +{ +namespace ast +{ +struct composite_value +{ + composite_value() = default; + virtual ~composite_value() = default; + virtual void write(std::ostream &os) const = 0; + virtual std::unique_ptr duplicate() const = 0; +}; + +struct null_value final +{ + void write(std::ostream &os) const; + null_value duplicate() const noexcept + { + return {}; + } + const null_value *operator->() const noexcept + { + return this; + } + const null_value &operator*() const noexcept + { + return *this; + } +}; + +struct boolean_value final +{ + bool value; + constexpr boolean_value(bool value) noexcept : value(value) + { + } + void write(std::ostream &os) const; + boolean_value duplicate() const noexcept + { + return *this; + } + const boolean_value *operator->() const noexcept + { + return this; + } + const boolean_value &operator*() const noexcept + { + return *this; + } +}; + +struct string_value final +{ + std::string value; + string_value(std::string value) noexcept : value(std::move(value)) + { + } + string_value(const char *value) : value(std::move(value)) + { + } + static void write(std::ostream &os, const std::string &value); + void write(std::ostream &os) const + { + write(os, value); + } + string_value duplicate() const noexcept + { + return *this; + } + const string_value *operator->() const noexcept + { + return this; + } + const string_value &operator*() const noexcept + { + return *this; + } +}; + +struct number_value final +{ + double value; + static_assert(std::numeric_limits::is_iec559 && std::numeric_limits::radix == 2, + "double is not a ieee754 float64"); + number_value(double value) noexcept : value(value) + { + } + explicit operator std::string() const + { + return to_string(); + } + static constexpr unsigned max_base = 36; + static constexpr unsigned min_base = 2; + static constexpr unsigned default_base = 10; // the json spec only supports base 10 + std::string to_string(std::string buffer_in = {}, unsigned base = default_base) const; + std::size_t to_string(char *output_buffer, + std::size_t output_buffer_size, + bool require_null_terminator = true, + unsigned base = default_base) const noexcept; + void write(std::ostream &os, unsigned base = default_base) const; + number_value duplicate() const noexcept + { + return *this; + } + const number_value *operator->() const noexcept + { + return this; + } + const number_value &operator*() const noexcept + { + return *this; + } +}; + +typedef util:: + variant> + value; + +inline value duplicate(const value &v) +{ + return util::visit( + [](const auto &v) -> value + { + return v->duplicate(); + }, + v); +} + +inline void write(std::ostream &os, const value &v) +{ + util::visit( + [&](const auto &v) -> void + { + return v->write(os); + }, + v); +} + +struct object final : public composite_value +{ + std::unordered_map values; + object() : values() + { + } + object(std::unordered_map values) noexcept : values(std::move(values)) + { + } + virtual void write(std::ostream &os) const override; + virtual std::unique_ptr duplicate() const override + { + std::unordered_map new_values; + for(auto &entry : values) + { + new_values.emplace(std::get<0>(entry), ast::duplicate(std::get<1>(entry))); + } + return std::unique_ptr(new object(std::move(new_values))); + } +}; + +struct array final : public composite_value +{ + std::vector values; + array() : values() + { + } + array(std::vector values) noexcept : values(std::move(values)) + { + } + virtual void write(std::ostream &os) const override; + virtual std::unique_ptr duplicate() const override + { + std::vector new_values; + new_values.reserve(values.size()); + for(auto &value : values) + new_values.emplace_back(ast::duplicate(value)); + return std::unique_ptr(new array(std::move(new_values))); + } +}; +} +} +} + +#endif /* JSON_JSON_H_ */ diff --git a/src/spirv/CMakeLists.txt b/src/spirv/CMakeLists.txt index c470ba2..46e589e 100644 --- a/src/spirv/CMakeLists.txt +++ b/src/spirv/CMakeLists.txt @@ -20,4 +20,5 @@ # cmake_minimum_required(VERSION 3.1 FATAL_ERROR) set(sources spirv.cpp) -add_library(spirv STATIC ${sources}) \ No newline at end of file +add_library(spirv STATIC ${sources}) +target_link_libraries(spirv util) \ No newline at end of file diff --git a/src/util/CMakeLists.txt b/src/util/CMakeLists.txt index 2059497..387804f 100644 --- a/src/util/CMakeLists.txt +++ b/src/util/CMakeLists.txt @@ -19,12 +19,14 @@ # SOFTWARE. # cmake_minimum_required(VERSION 3.1 FATAL_ERROR) -set(sources copy_cv_ref.cpp +set(sources bit_intrinsics.cpp + copy_cv_ref.cpp in_place.cpp invoke.cpp is_referenceable.cpp is_swappable.cpp optional.cpp + soft_float.cpp variant.cpp void_t.cpp) add_library(util STATIC ${sources}) diff --git a/src/util/bit_intrinsics.cpp b/src/util/bit_intrinsics.cpp new file mode 100644 index 0000000..25fe663 --- /dev/null +++ b/src/util/bit_intrinsics.cpp @@ -0,0 +1,23 @@ +/* + * Copyright 2017 Jacob Lifshay + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ +#include "bit_intrinsics.h" diff --git a/src/util/bit_intrinsics.h b/src/util/bit_intrinsics.h new file mode 100644 index 0000000..4550042 --- /dev/null +++ b/src/util/bit_intrinsics.h @@ -0,0 +1,159 @@ +/* + * Copyright 2017 Jacob Lifshay + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef UTIL_BIT_INTRINSICS_H_ +#define UTIL_BIT_INTRINSICS_H_ + +#include +#include + +#if defined(__clang__) +#if defined(__apple_build_version__) +#if __clang_major__ > 5 || (__clang_major__ == 5 && __clang_minor__ >= 1) +#define VULKAN_CPU_UTIL_CONSTEXPR_BUILTIN_CLZ_CTZ_SUPPORTED 1 +#endif +#else +#if __clang_major__ > 3 || (__clang_major__ == 3 && __clang_minor__ >= 4) +#define VULKAN_CPU_UTIL_CONSTEXPR_BUILTIN_CLZ_CTZ_SUPPORTED 1 +#endif +#endif +#elif defined(__INTEL_COMPILER) +#warning figure out icc version numbers for constexpr __builtin_clz and __builtin_ctz +#elif defined(__GNUC__) +// gcc supports constexpr __builtin_clz and __builtin_ctz before it supports c++14 +#define VULKAN_CPU_UTIL_CONSTEXPR_BUILTIN_CLZ_CTZ_SUPPORTED 1 +#endif + +#if 1 +#ifdef VULKAN_CPU_UTIL_CONSTEXPR_BUILTIN_CLZ_CTZ_SUPPORTED +#undef VULKAN_CPU_UTIL_CONSTEXPR_BUILTIN_CLZ_CTZ_SUPPORTED +#endif +#endif + +namespace vulkan_cpu +{ +namespace util +{ +constexpr std::uint32_t clz4(std::uint8_t v) noexcept +{ +#ifdef VULKAN_CPU_UTIL_CONSTEXPR_BUILTIN_CLZ_CTZ_SUPPORTED + return v == 0 ? 4 : __builtin_clz(v) - __builtin_clz(0x8U); +#else + typedef const std::uint_fast8_t LookupTableType[0x10]; + return LookupTableType + { + 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 + } + [v]; +#endif +} + +constexpr std::uint32_t clz8(std::uint8_t v) noexcept +{ +#ifdef VULKAN_CPU_UTIL_CONSTEXPR_BUILTIN_CLZ_CTZ_SUPPORTED + return v == 0 ? 8 : __builtin_clz(v) - __builtin_clz(0x80U); +#else + return ((v & 0xF0) == 0) ? 4 + clz4(v) : clz4(v >> 4); +#endif +} + +constexpr std::uint32_t clz16(std::uint16_t v) noexcept +{ +#ifdef VULKAN_CPU_UTIL_CONSTEXPR_BUILTIN_CLZ_CTZ_SUPPORTED + return v == 0 ? 16 : __builtin_clz(v) - (std::numeric_limits::digits - 16); +#else + return ((v & 0xFF00U) == 0) ? 8 + clz8(v) : clz8(v >> 8); +#endif +} + +constexpr std::uint32_t clz32(std::uint32_t v) noexcept +{ +#ifdef VULKAN_CPU_UTIL_CONSTEXPR_BUILTIN_CLZ_CTZ_SUPPORTED + return v == 0 ? 32 : __builtin_clzl(v) - (std::numeric_limits::digits - 32); +#else + return ((v & 0xFFFF0000UL) == 0) ? 16 + clz16(v) : clz16(v >> 16); +#endif +} + +constexpr std::uint32_t clz64(std::uint64_t v) noexcept +{ +#ifdef VULKAN_CPU_UTIL_CONSTEXPR_BUILTIN_CLZ_CTZ_SUPPORTED + return v == 0 ? 64 : __builtin_clzll(v) - (std::numeric_limits::digits - 64); +#else + return ((v & 0xFFFFFFFF00000000ULL) == 0) ? 32 + clz32(v) : clz32(v >> 32); +#endif +} + +constexpr std::uint32_t ctz4(std::uint8_t v) noexcept +{ +#ifdef VULKAN_CPU_UTIL_CONSTEXPR_BUILTIN_CLZ_CTZ_SUPPORTED + return v == 0 ? 4 : __builtin_ctz(v); +#else + typedef const std::uint_fast8_t LookupTableType[0x10]; + return LookupTableType + { + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 + } + [v]; +#endif +} + +constexpr std::uint32_t ctz8(std::uint8_t v) noexcept +{ +#ifdef VULKAN_CPU_UTIL_CONSTEXPR_BUILTIN_CLZ_CTZ_SUPPORTED + return v == 0 ? 8 : __builtin_ctz(v); +#else + return ((v & 0xF0) == 0) ? ctz4(v) : 4 + ctz4(v >> 4); +#endif +} + +constexpr std::uint32_t ctz16(std::uint16_t v) noexcept +{ +#ifdef VULKAN_CPU_UTIL_CONSTEXPR_BUILTIN_CLZ_CTZ_SUPPORTED + return v == 0 ? 16 : __builtin_ctz(v); +#else + return ((v & 0xFF00U) == 0) ? ctz8(v) : 8 + ctz8(v >> 8); +#endif +} + +constexpr std::uint32_t ctz32(std::uint32_t v) noexcept +{ +#ifdef VULKAN_CPU_UTIL_CONSTEXPR_BUILTIN_CLZ_CTZ_SUPPORTED + return v == 0 ? 32 : __builtin_ctzl(v); +#else + return ((v & 0xFFFF0000UL) == 0) ? ctz16(v) : 16 + ctz16(v >> 16); +#endif +} + +constexpr std::uint32_t ctz64(std::uint64_t v) noexcept +{ +#ifdef VULKAN_CPU_UTIL_CONSTEXPR_BUILTIN_CLZ_CTZ_SUPPORTED + return v == 0 ? 64 : __builtin_ctzll(v); +#else + return ((v & 0xFFFFFFFF00000000ULL) == 0) ? ctz32(v) : 32 + ctz32(v >> 32); +#endif +} +} +} + +#endif /* UTIL_BIT_INTRINSICS_H_ */ diff --git a/src/util/copy_cv_ref.h b/src/util/copy_cv_ref.h index ce68ca6..c01e06e 100644 --- a/src/util/copy_cv_ref.h +++ b/src/util/copy_cv_ref.h @@ -21,8 +21,8 @@ * */ -#ifndef SOURCE_UTIL_COPY_CV_REF_H_ -#define SOURCE_UTIL_COPY_CV_REF_H_ +#ifndef UTIL_COPY_CV_REF_H_ +#define UTIL_COPY_CV_REF_H_ namespace vulkan_cpu { @@ -99,4 +99,4 @@ using copy_cv_ref_t = typename copy_cv_ref::type; } } -#endif /* SOURCE_UTIL_COPY_CV_REF_H_ */ +#endif /* UTIL_COPY_CV_REF_H_ */ diff --git a/src/util/invoke.h b/src/util/invoke.h index 0a8f05c..9680040 100644 --- a/src/util/invoke.h +++ b/src/util/invoke.h @@ -21,8 +21,8 @@ * */ -#ifndef SOURCE_UTIL_INVOKE_H_ -#define SOURCE_UTIL_INVOKE_H_ +#ifndef UTIL_INVOKE_H_ +#define UTIL_INVOKE_H_ #include #include @@ -274,4 +274,4 @@ constexpr bool is_nothrow_invocable_r_v = is_nothrow_invocable_r } } -#endif /* SOURCE_UTIL_INVOKE_H_ */ +#endif /* UTIL_INVOKE_H_ */ diff --git a/src/util/soft_float.cpp b/src/util/soft_float.cpp new file mode 100644 index 0000000..7f4d402 --- /dev/null +++ b/src/util/soft_float.cpp @@ -0,0 +1,961 @@ +/* + * Copyright 2016-2017 Jacob Lifshay + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +// derived from +// https://github.com/programmerjake/javascript-tasklets/blob/master/javascript_tasklets/soft_float.cpp + +#if 1 +#include "soft_float.h" +#if 0 +#include +#include +#include +#include +#include +namespace +{ +using namespace vulkan_cpu::util::soft_float; +std::string hexValue(const ExtendedFloat &v) +{ + if(v.isNaN()) + { + return "NaN"; + } + if(v.isInfinite()) + { + if(v.sign) + return "-Infinity"; + return "+Infinity"; + } + std::ostringstream ss; + ss << std::hex << std::uppercase; + ss.fill('0'); + if(v.sign) + ss << "-"; + else + ss << "+"; + ss << "0x"; + std::int32_t exponent = v.exponent; + exponent -= ExtendedFloat::exponentBias(); + if(v.isZero()) + exponent = 0; + std::uint64_t mantissa = v.mantissa; + unsigned firstDigitBits = 1 + (exponent & 3); + ss << (mantissa >> (64 - firstDigitBits)); + mantissa <<= firstDigitBits; + exponent &= ~3; + ss << "."; + ss.width(16); + ss << mantissa; + ss << "p"; + ss << std::dec << std::showpos; + ss << exponent; + return ss.str(); +} +std::string hexValue(long double v) +{ + if(std::isnan(v)) + { + return "NaN"; + } + if(std::isinf(v)) + { + if(v < 0) + return "-Infinity"; + return "+Infinity"; + } + const std::size_t strSize = 64; + char str[strSize]; + std::snprintf(str, sizeof(str), "%+1.16LA", v); + for(char &ch : str) + { + if(ch == '\0') + break; + if(ch == 'X') + ch = 'x'; + else if(ch == 'P') + ch = 'p'; + } + return str; +} +std::string hexValue(std::int64_t v) +{ + std::ostringstream ss; + ss << std::hex << std::uppercase; + ss.fill('0'); + if(v < 0) + ss << "-"; + else + ss << "+"; + ss << "0x"; + ss.width(16); + if(v < 0) + ss << -static_cast(v); + else + ss << static_cast(v); + return ss.str(); +} +std::string hexValue(std::uint64_t v) +{ + std::ostringstream ss; + ss << std::hex << std::uppercase; + ss.fill('0'); + ss << "0x"; + ss.width(16); + ss << static_cast(v); + return ss.str(); +} +bool sameValue(long double a, long double b) +{ + if(std::isnan(a)) + return std::isnan(b); + if(a == 0) + { + return b == 0 && std::signbit(a) == std::signbit(b); + } + return a == b; +} +void writeArgs() +{ +} +template +void writeArgs(Arg arg, Args... args) +{ + std::cout << " " << hexValue(arg); + writeArgs(args...); +} +constexpr bool displayPassedTests = true; +template +void testCase(const char *name, TestFn1 &&testFn1, TestFn2 &&testFn2, Args... args) +{ + long double result1 = static_cast(testFn1(args...)); + long double result2 = static_cast(testFn2(args...)); + if(!sameValue(result1, result2)) + { + std::cout << name; + writeArgs(args...); + std::cout << " -> "; + std::cout << hexValue(result1) << " != " << hexValue(result2) << std::endl; + } + else if(displayPassedTests) + { + std::cout << name; + writeArgs(args...); + std::cout << " -> "; + std::cout << hexValue(result1) << std::endl; + } +} +template +void testCaseI(const char *name, TestFn1 &&testFn1, TestFn2 &&testFn2, Args... args) +{ + auto result1 = testFn1(args...); + auto result2 = testFn2(args...); + if(result1 != result2) + { + std::cout << name; + writeArgs(args...); + std::cout << " -> "; + std::cout << hexValue(result1) << " != " << hexValue(result2) << std::endl; + } + else if(displayPassedTests) + { + std::cout << name; + writeArgs(args...); + std::cout << " -> "; + std::cout << hexValue(result1) << std::endl; + } +} +template +void roundTestCases(const char *name, TestFn1 &&testFn1, TestFn2 &&testFn2) +{ + const long double NaN = std::numeric_limits::quiet_NaN(); + const long double Infinity = std::numeric_limits::infinity(); + auto testBothSigns = [&](long double value) + { + testCase(name, testFn1, testFn2, value); + testCase(name, testFn1, testFn2, -value); + }; + testCase(name, testFn1, testFn2, NaN); + testBothSigns(0.0L); + testBothSigns(Infinity); + testBothSigns(1.0L); + testBothSigns(0x1.0p-1L); + testBothSigns(0x1.8p0L); + testBothSigns(0x1.Fp0L); + testBothSigns(0x1.Fp-30L); + testBothSigns(0x1.Fp30L); + testBothSigns(0x1.Fp62L); + testBothSigns(0x1.Fp63L); + testBothSigns(0x1.Fp64L); + testBothSigns(0x1.Fp65L); + testBothSigns(0x1.Fp62L + 0.5L); + testBothSigns(0x1.Fp63L + 0.5L); + testBothSigns(0x1.Fp64L + 0.5L); + testBothSigns(0x1.Fp65L + 0.5L); + testBothSigns(0x1.Fp62L + 1); + testBothSigns(0x1.Fp63L + 1); + testBothSigns(0x1.Fp64L + 1); + testBothSigns(0x1.Fp65L + 1); +} +template +void toIntTestCases(const char *name, TestFn1 &&testFn1, TestFn2 &&testFn2) +{ + const long double NaN = std::numeric_limits::quiet_NaN(); + const long double Infinity = std::numeric_limits::infinity(); + auto testBothSigns = [&](long double value) + { + testCaseI(name, testFn1, testFn2, value); + testCaseI(name, testFn1, testFn2, -value); + }; + testCaseI(name, testFn1, testFn2, NaN); + testBothSigns(0.0L); + testBothSigns(Infinity); + testBothSigns(1.0L); + testBothSigns(0x1.0p-1L); + testBothSigns(0x1.8p0L); + testBothSigns(0x1.Fp0L); + testBothSigns(0x1.Fp-30L); + testBothSigns(0x1.Fp30L); + testBothSigns(0x1.Fp62L); + testBothSigns(0x1.Fp63L); + testBothSigns(0x1.Fp64L); + testBothSigns(0x1.Fp65L); + testBothSigns(0x1.Fp62L + 0.5L); + testBothSigns(0x1.Fp63L + 0.5L); + testBothSigns(0x1.Fp64L + 0.5L); + testBothSigns(0x1.Fp65L + 0.5L); + testBothSigns(0x1.Fp62L + 1); + testBothSigns(0x1.Fp63L + 1); + testBothSigns(0x1.Fp64L + 1); + testBothSigns(0x1.Fp65L + 1); +} +void mainFn() +{ + auto add1 = [](long double a, long double b) -> long double + { + return a + b; + }; + auto add2 = [](long double a, long double b) -> ExtendedFloat + { + return ExtendedFloat(a) + ExtendedFloat(b); + }; + auto mul1 = [](long double a, long double b) -> long double + { + return a * b; + }; + auto mul2 = [](long double a, long double b) -> ExtendedFloat + { + return ExtendedFloat(a) * ExtendedFloat(b); + }; + auto div1 = [](long double a, long double b) -> long double + { + return a / b; + }; + auto div2 = [](long double a, long double b) -> ExtendedFloat + { + return ExtendedFloat(a) / ExtendedFloat(b); + }; + auto floor1 = [](long double a) -> long double + { + return std::floor(a); + }; + auto floor2 = [](long double a) -> ExtendedFloat + { + return floor(ExtendedFloat(a)); + }; + auto ceil1 = [](long double a) -> long double + { + return std::ceil(a); + }; + auto ceil2 = [](long double a) -> ExtendedFloat + { + return ceil(ExtendedFloat(a)); + }; + auto round1 = [](long double a) -> long double + { + return std::round(a); + }; + auto round2 = [](long double a) -> ExtendedFloat + { + return round(ExtendedFloat(a)); + }; + auto trunc1 = [](long double a) -> long double + { + return std::trunc(a); + }; + auto trunc2 = [](long double a) -> ExtendedFloat + { + return trunc(ExtendedFloat(a)); + }; + auto toUInt1 = [](long double a) -> std::uint64_t + { + if(std::isnan(a)) + return 0; + if(a < std::numeric_limits::min()) + return std::numeric_limits::min(); + if(a > std::numeric_limits::max()) + return std::numeric_limits::max(); + return static_cast(a); + }; + auto toUInt2 = [](long double a) -> std::uint64_t + { + return static_cast(ExtendedFloat(a)); + }; + auto toInt1 = [](long double a) -> std::int64_t + { + if(std::isnan(a)) + return 0; + if(a < std::numeric_limits::min()) + return std::numeric_limits::min(); + if(a > std::numeric_limits::max()) + return std::numeric_limits::max(); + return static_cast(a); + }; + auto toInt2 = [](long double a) -> std::int64_t + { + return static_cast(ExtendedFloat(a)); + }; + auto pow1 = [](long double base, int exponent) -> long double + { + if(exponent < 0) + { + base = 1 / base; + exponent = -exponent; + } + else if(exponent == 0) + return 1; + long double retval = 1; + for(;;) + { + if(exponent == 0) + return retval; + else if(exponent == 1) + return retval * base; + if(exponent & 1) + { + retval *= base; + } + base *= base; + exponent >>= 1; + } + }; + auto pow2 = [](long double base, int exponent) -> ExtendedFloat + { + return pow(ExtendedFloat(base), static_cast(exponent)); + }; + auto scalbn1 = [](long double a, std::int64_t exponent) -> long double + { + return std::scalbln(a, static_cast(exponent)); + }; + auto scalbn2 = [](long double a, std::int64_t exponent) -> ExtendedFloat + { + return scalbn(ExtendedFloat(a), exponent); + }; + auto log2_1 = [](long double a) -> long double + { + return std::log2(a); + }; + auto log2_2 = [](long double a) -> ExtendedFloat + { + return log2(ExtendedFloat(a)); + }; + auto log10_1 = [](long double a) -> long double + { + return std::log10(a); + }; + auto log10_2 = [](long double a) -> ExtendedFloat + { + return log10(ExtendedFloat(a)); + }; + const long double NaN = std::numeric_limits::quiet_NaN(); + const long double Infinity = std::numeric_limits::infinity(); + testCase("add", add1, add2, +0.0L, +0.0L); + testCase("add", add1, add2, +0.0L, -0.0L); + testCase("add", add1, add2, -0.0L, +0.0L); + testCase("add", add1, add2, -0.0L, -0.0L); + testCase("add", add1, add2, 0.0L, NaN); + testCase("add", add1, add2, NaN, 0.0L); + testCase("add", add1, add2, NaN, NaN); + testCase("add", add1, add2, +Infinity, +Infinity); + testCase("add", add1, add2, +Infinity, -Infinity); + testCase("add", add1, add2, -Infinity, +Infinity); + testCase("add", add1, add2, -Infinity, -Infinity); + testCase("add", add1, add2, 0x1.0000000000000002p0L, -0x1.0p-64L); + testCase("add", add1, add2, 0x1.p0L, -0x1.0p-65L); + testCase("add", add1, add2, 0x1.p0L, -0x0.Fp-65L); + testCase("add", add1, add2, 0x1.p0L, -0x1.1p-65L); + testCase("add", add1, add2, 0x1.0000000000000002p0L, -0x2.0p-65L); + testCase("add", add1, add2, 0x1.0000000000000002p0L, -0x1.Fp-65L); + testCase("add", add1, add2, 0x1.0000000000000002p0L, -0x2.1p-65L); + testCase("add", add1, add2, 0x1p-16445L, 0x1p-16445L); + testCase("add", add1, add2, 0x1p+16383L, 0x1p+16383L); + testCase("mul", mul1, mul2, +0.0L, +0.0L); + testCase("mul", mul1, mul2, +0.0L, -0.0L); + testCase("mul", mul1, mul2, -0.0L, +0.0L); + testCase("mul", mul1, mul2, -0.0L, -0.0L); + testCase("mul", mul1, mul2, 0.0L, NaN); + testCase("mul", mul1, mul2, NaN, 0.0L); + testCase("mul", mul1, mul2, NaN, NaN); + testCase("mul", mul1, mul2, +Infinity, +Infinity); + testCase("mul", mul1, mul2, +Infinity, -Infinity); + testCase("mul", mul1, mul2, -Infinity, +Infinity); + testCase("mul", mul1, mul2, -Infinity, -Infinity); + testCase("mul", mul1, mul2, 0x1p0L, 0x1p0L); + testCase("mul", mul1, mul2, 0x1p16000L, 0x1p383L); + testCase("mul", mul1, mul2, 0x1p16000L, 0x1p384L); + testCase("mul", mul1, mul2, 0x1p-16000L, 0x1p-445L); + testCase("mul", mul1, mul2, 0x1p-16000L, 0x1p-446L); + testCase("mul", mul1, mul2, 0x1.0000001p0L, 0x1.000000001p0L); + testCase("mul", mul1, mul2, 0x1.0000001p0L, 0x1.0000000018p0L); + testCase("mul", mul1, mul2, 0x1.0000001p0L, 0x1.000000002p0L); + testCase("mul", mul1, mul2, 0x1.0000001p0L, 0x1.0000000028p0L); + testCase("mul", mul1, mul2, 0x1.0000001p0L, 0x1.000000003p0L); + testCase("mul", mul1, mul2, 0x1.0000001p0L, 0x1.0000000038p0L); + testCase("mul", mul1, mul2, 0x1.0000001p0L, 0x1.000000004p0L); + testCase("mul", mul1, mul2, 0x1.0000001p0L, 0x1.0000000048p0L); + testCase("mul", mul1, mul2, 0x1.0000001p0L, 0x1.000000005p0L); + testCase("mul", mul1, mul2, 0x1.0000001p0L, 0x1.0000000058p0L); + testCase("mul", mul1, mul2, 0x1.0000001p0L, 0x1.000000006p0L); + testCase("mul", mul1, mul2, 0x1.0000001p0L, 0x1.0000000068p0L); + testCase("mul", mul1, mul2, 0x1.0000001p0L, 0x1.000000007p0L); + testCase("mul", mul1, mul2, 0x1.0000001p0L, 0x1.0000000078p0L); + testCase("mul", + mul1, + mul2, + 3.1415926535897932384626433832795L, + 0.318309886183790671537767526745028724L); + testCase("mul", + mul1, + mul2, + 2.718281828459045235360287471352662497757L, + 0.3678794411714423215955237701614608674458L); + testCase("div", div1, div2, +0.0L, +0.0L); + testCase("div", div1, div2, +1.0L, +0.0L); + testCase("div", div1, div2, +1.0L, -0.0L); + testCase("div", div1, div2, -1.0L, +0.0L); + testCase("div", div1, div2, -1.0L, -0.0L); + testCase("div", div1, div2, +0.0L, +1.0L); + testCase("div", div1, div2, +0.0L, -1.0L); + testCase("div", div1, div2, -0.0L, +1.0L); + testCase("div", div1, div2, -0.0L, -1.0L); + testCase("div", div1, div2, 0.0L, NaN); + testCase("div", div1, div2, NaN, 0.0L); + testCase("div", div1, div2, NaN, NaN); + testCase("div", div1, div2, +Infinity, +Infinity); + testCase("div", div1, div2, +1.0L, +Infinity); + testCase("div", div1, div2, +1.0L, -Infinity); + testCase("div", div1, div2, -1.0L, +Infinity); + testCase("div", div1, div2, -1.0L, -Infinity); + testCase("div", div1, div2, 1.0L, 3.0L); + testCase("div", div1, div2, 1.0L, 5.0L); + testCase("div", div1, div2, 1.0L, 7.0L); + testCase("div", div1, div2, 1.0L, 9.0L); + testCase("div", div1, div2, 1.0L, 11.0L); + testCase("div", div1, div2, 1.0L, 3.1415926535897932384626433832795L); + testCase("div", div1, div2, 1.0L, 2.718281828459045235360287471352662497757L); + testCase("div", div1, div2, 0x1p16000L, 0x1p-383L); + testCase("div", div1, div2, 0x1p16000L, 0x1p-384L); + testCase("div", div1, div2, 0x1p-16000L, 0x1p445L); + testCase("div", div1, div2, 0x1p-16000L, 0x1p446L); + roundTestCases("floor", floor1, floor2); + roundTestCases("round", round1, round2); + roundTestCases("ceil", ceil1, ceil2); + roundTestCases("trunc", trunc1, trunc2); + toIntTestCases("uint64", toUInt1, toUInt2); + toIntTestCases("int64", toInt1, toInt2); + testCase("pow", pow1, pow2, 1.0L, static_cast(0)); + testCase("pow", pow1, pow2, 1.0L, static_cast(5000)); + testCase("pow", pow1, pow2, 1.0L, static_cast(-5000)); + testCase("pow", pow1, pow2, 2.0L, static_cast(3000)); + testCase("pow", pow1, pow2, 2.0L, static_cast(-3000)); + testCase("pow", pow1, pow2, 3.0L, static_cast(3000)); + testCase("pow", pow1, pow2, 3.0L, static_cast(-3000)); + testCase("pow", pow1, pow2, 10.0L, static_cast(3000)); + testCase("pow", pow1, pow2, 10.0L, static_cast(-3000)); + testCase("pow", pow1, pow2, 36.0L, static_cast(3000)); + testCase("pow", pow1, pow2, 36.0L, static_cast(-3000)); + testCase("scalbn", scalbn1, scalbn2, 1.0L, static_cast(16384)); + testCase("scalbn", scalbn1, scalbn2, 1.0L, static_cast(16383)); + testCase("scalbn", scalbn1, scalbn2, 1.0L, static_cast(3000)); + testCase("scalbn", scalbn1, scalbn2, 1.0L, static_cast(-3000)); + testCase("scalbn", scalbn1, scalbn2, 1.0L, static_cast(-16383)); + testCase("scalbn", scalbn1, scalbn2, 1.0L, static_cast(-16384)); + testCase("scalbn", scalbn1, scalbn2, 1.0L, static_cast(-16445)); + testCase("scalbn", scalbn1, scalbn2, 1.0L, static_cast(-16446)); + testCase("log2", log2_1, log2_2, NaN); + testCase("log2", log2_1, log2_2, Infinity); + testCase("log2", log2_1, log2_2, -Infinity); + testCase("log2", log2_1, log2_2, 0.0L); + testCase("log2", log2_1, log2_2, -0.0L); + testCase("log2", log2_1, log2_2, -1.0L); + testCase("log2", log2_1, log2_2, 1.0L); + testCase("log2", log2_1, log2_2, 2.0L); + testCase("log2", log2_1, log2_2, 0x1.0p-16445L); + testCase("log2", log2_1, log2_2, 0x1.0p16383L); + testCase("log2", log2_1, log2_2, 3.0L); + testCase("log2", log2_1, log2_2, 5.0L); + testCase("log2", log2_1, log2_2, 7.0L); + testCase("log2", log2_1, log2_2, 9.0L); + testCase("log2", log2_1, log2_2, 11.0L); + testCase("log2", log2_1, log2_2, 1e100L); + testCase("log2", log2_1, log2_2, 1e-1L); + testCase("log2", log2_1, log2_2, 1e-2L); + testCase("log2", log2_1, log2_2, 1.5L); + testCase("log2", log2_1, log2_2, 0.693147180559945309417232121458176568L); + testCase("log2", log2_1, log2_2, static_cast(ExtendedFloat::Log10Of2())); + testCase("log2", log2_1, log2_2, static_cast(ExtendedFloat::LogOf2())); + testCase("log10", log10_1, log10_2, 1e1001L); + testCase("log10", log10_1, log10_2, 1.5L); +} +struct Init +{ + Init() + { + mainFn(); + std::exit(0); + } +}; +Init init; +} +#endif +#else +#include +#include +#include +#include +#include +#include +#include +namespace +{ +unsigned clz8(std::uint8_t v) +{ + return __builtin_clz(v) - __builtin_clz(0x80U); +} +unsigned ctz8(std::uint8_t v) +{ + return v == 0 ? 8 : __builtin_ctz(v); +} +struct UInt16 final +{ + std::uint8_t high; + std::uint8_t low; + explicit UInt16(std::uint8_t low = 0) : high(0), low(low) + { + } + UInt16(std::uint8_t high, std::uint8_t low) : high(high), low(low) + { + } + friend unsigned clz16(UInt16 v) + { + return v.high == 0 ? 8 + clz8(v.low) : clz8(v.high); + } + friend unsigned ctz16(UInt16 v) + { + return v.low == 0 ? 8 + ctz8(v.high) : ctz8(v.low); + } + static UInt16 mul8x8(std::uint8_t a, std::uint8_t b) + { + unsigned v = a; + v *= b; + return UInt16(v >> 8, v & 0xFFU); + } + static bool addCarry(std::uint8_t a, std::uint8_t b) + { + return static_cast(a) + b > 0xFFU; + } + static bool addCarry(std::uint8_t a, std::uint8_t b, bool carry) + { + return static_cast(a) + b + carry > 0xFFU; + } + static bool subBorrow(std::uint8_t a, std::uint8_t b) + { + return a < b; + } + static bool subBorrow(std::uint8_t a, std::uint8_t b, bool borrow) + { + return a < b || (a == b && borrow); + } + friend UInt16 operator+(UInt16 a, UInt16 b) + { + return UInt16(a.high + b.high + addCarry(a.low, b.low), a.low + b.low); + } + friend UInt16 operator-(UInt16 a, UInt16 b) + { + return UInt16(a.high - b.high - subBorrow(a.low, b.low), a.low - b.low); + } + friend UInt16 operator<<(UInt16 v, unsigned shiftAmount) + { + return shiftAmount == 0 ? v : shiftAmount < 8 ? + UInt16((v.high << shiftAmount) | (v.low >> (8 - shiftAmount)), + v.low << shiftAmount) : + UInt16(v.low << (shiftAmount - 8), 0); + } + friend UInt16 operator>>(UInt16 v, unsigned shiftAmount) + { + return shiftAmount == 0 ? v : shiftAmount < 8 ? + UInt16(v.high >> shiftAmount, + (v.low >> shiftAmount) | (v.high << (8 - shiftAmount))) : + UInt16(v.high >> (shiftAmount - 8)); + } + struct DivModResult8 final + { + std::uint8_t divResult; + std::uint8_t modResult; + DivModResult8(std::uint8_t divResult, std::uint8_t modResult) + : divResult(divResult), modResult(modResult) + { + } + }; + static DivModResult8 divMod16x8(UInt16 n, std::uint8_t d) + { + assert(d != 0); + std::uint16_t v = n.high; + v <<= 8; + v |= n.low; + std::uint16_t divResult = v / d; + std::uint16_t modResult = v % d; + assert(divResult <= 0xFFU); + assert(modResult <= 0xFFU); + return DivModResult8(divResult, modResult); + } + struct DivModResult; + static DivModResult divMod(UInt16 uIn, UInt16 vIn); + static DivModResult divMod2(UInt16 n, UInt16 d); + friend bool operator==(UInt16 a, UInt16 b) noexcept + { + return a.high == b.high && a.low == b.low; + } + friend bool operator!=(UInt16 a, UInt16 b) noexcept + { + return a.high != b.high || a.low != b.low; + } + friend bool operator<(UInt16 a, UInt16 b) noexcept + { + return a.high < b.high || (a.high == b.high && a.low < b.low); + } + friend bool operator<=(UInt16 a, UInt16 b) noexcept + { + return a.high < b.high || (a.high == b.high && a.low <= b.low); + } + friend bool operator>(UInt16 a, UInt16 b) noexcept + { + return a.high > b.high || (a.high == b.high && a.low > b.low); + } + friend bool operator>=(UInt16 a, UInt16 b) noexcept + { + return a.high > b.high || (a.high == b.high && a.low >= b.low); + } +}; +struct UInt16::DivModResult final +{ + UInt16 divResult; + UInt16 modResult; + DivModResult(UInt16 divResult, UInt16 modResult) : divResult(divResult), modResult(modResult) + { + } +}; +UInt16::DivModResult UInt16::divMod2(UInt16 n, UInt16 d) +{ + std::uint16_t nv = n.high; + nv <<= 8; + nv |= n.low; + std::uint16_t dv = d.high; + dv <<= 8; + dv |= d.low; + std::uint16_t qv = nv / dv; + std::uint16_t rv = nv % dv; + return DivModResult(UInt16(qv >> 8, qv & 0xFF), UInt16(rv >> 8, rv & 0xFF)); +} +template +void divMod(const Digit(&numerator)[NumberSizes], + const Digit(&denominator)[NumberSizes], + Digit("ient)[NumberSizes], + Digit(&remainder)[NumberSizes]) +{ + constexpr Digit DigitMax = (static_cast(1) << DigitBitCount) - 1; + static_assert(NumberSizes != 0, "bad size"); + std::size_t m = NumberSizes; + for(std::size_t i = 0; i < NumberSizes; i++) + { + if(denominator[i] != 0) + { + m = i; + break; + } + } + const std::size_t n = NumberSizes - m; + if(n <= 1) + { + assert(denominator[NumberSizes - 1] != 0); + for(std::size_t i = 0; i < NumberSizes - 1; i++) + { + remainder[i] = 0; + } + Digit currentRemainder = 0; + for(std::size_t i = 0; i < NumberSizes; i++) + { + DoubleDigit n = currentRemainder; + n <<= DigitBitCount; + n |= numerator[i]; + quotient[i] = n / denominator[NumberSizes - 1]; + currentRemainder = n % denominator[NumberSizes - 1]; + } + remainder[NumberSizes - 1] = currentRemainder; + return; + } + // from algorithm D, section 4.3.1 in Art of Computer Programming volume 2 by Knuth. + unsigned log2D = DigitCLZFn()(denominator[m]); + Digit u[NumberSizes + 1]; + u[NumberSizes] = (numerator[NumberSizes - 1] << log2D) & DigitMax; + u[0] = ((static_cast(numerator[0]) << log2D) >> DigitBitCount) & DigitMax; + for(std::size_t i = 1; i < NumberSizes; i++) + { + DoubleDigit value = numerator[i - 1]; + value <<= DigitBitCount; + value |= numerator[i]; + value <<= log2D; + u[i] = (value >> DigitBitCount) & DigitMax; + } + Digit v[NumberSizes + 1] = {}; + v[n] = (denominator[NumberSizes - 1] << log2D) & DigitMax; + for(std::size_t i = 1; i < n; i++) + { + DoubleDigit value = denominator[m + i - 1]; + value <<= DigitBitCount; + value |= denominator[m + i]; + value <<= log2D; + v[i] = (value >> DigitBitCount) & DigitMax; + quotient[i - 1] = 0; + } + for(std::size_t j = 0; j <= m; j++) + { + DoubleDigit qHat; + if(u[j] == v[1]) + { + qHat = DigitMax; + } + else + { + qHat = ((static_cast(u[j]) << DigitBitCount) | u[j + 1]) / v[1]; + } + { + DoubleDigit lhs = v[2] * qHat; + DoubleDigit rhsHigh = + ((static_cast(u[j]) << DigitBitCount) | u[j + 1]) - qHat * v[1]; + Digit rhsLow = u[j + 2]; + if(rhsHigh < static_cast(1) << DigitBitCount + && lhs > ((rhsHigh << DigitBitCount) | rhsLow)) + { + qHat--; + lhs -= v[2]; + rhsHigh += v[1]; + if(rhsHigh < static_cast(1) << DigitBitCount + && lhs > ((rhsHigh << DigitBitCount) | rhsLow)) + { + qHat--; + } + } + } + bool borrow = false; + { + Digit mulCarry = 0; + for(std::size_t i = n; i > 0; i--) + { + assert(i <= NumberSizes); + DoubleDigit product = qHat * v[i] + mulCarry; + mulCarry = product >> DigitBitCount; + product &= DigitMax; + bool prevBorrow = borrow; + DoubleDigit digit = u[j + i] - product - prevBorrow; + borrow = digit != (digit & DigitMax); + digit &= DigitMax; + u[j + i] = digit; + } + bool prevBorrow = borrow; + DoubleDigit digit = u[j] - mulCarry - prevBorrow; + borrow = digit != (digit & DigitMax); + digit &= DigitMax; + u[j] = digit; + } + Digit qj = qHat; + if(borrow) + { + qj--; + bool carry = false; + for(std::size_t i = n; i > 0; i--) + { + bool prevCarry = carry; + assert(i + j <= NumberSizes); + DoubleDigit digit = u[j + i] + v[i] + prevCarry; + carry = digit != (digit & DigitMax); + digit &= DigitMax; + u[j + i] = digit; + } + u[j] = (u[j] + carry) & DigitMax; + } + quotient[j + n - 1] = qj; + } + for(std::size_t i = 0; i < NumberSizes; i++) + { + DoubleDigit value = u[i]; + value <<= DigitBitCount; + value |= u[i + 1]; + remainder[i] = value >> log2D; + } +} +struct OpClz4 final +{ + constexpr unsigned operator()(std::uint16_t value) const noexcept + { + return __builtin_clz(value) - (__builtin_clz(0) - 4); + } +}; +UInt16::DivModResult UInt16::divMod(UInt16 uIn, UInt16 vIn) +{ + constexpr std::size_t NumberSizes = 4; + typedef std::uint16_t Digit; + typedef unsigned DoubleDigit; + constexpr unsigned DigitBitCount = 4; + Digit numerator[NumberSizes], denominator[NumberSizes], quotient[NumberSizes], + remainder[NumberSizes]; + numerator[0] = uIn.high >> 4; + numerator[1] = uIn.high & 0xF; + numerator[2] = uIn.low >> 4; + numerator[3] = uIn.low & 0xF; + denominator[0] = vIn.high >> 4; + denominator[1] = vIn.high & 0xF; + denominator[2] = vIn.low >> 4; + denominator[3] = vIn.low & 0xF; + ::divMod( + numerator, denominator, quotient, remainder); + return DivModResult( + UInt16((quotient[0] << 4) | quotient[1], (quotient[2] << 4) | quotient[3]), + UInt16((remainder[0] << 4) | remainder[1], (remainder[2] << 4) | remainder[3])); +} +void mainFn(std::uint8_t start, std::uint8_t end) +{ + for(unsigned dHigh = start; dHigh <= end; dHigh++) + { + if(start == 0) + { + std::ostringstream ss; + ss << dHigh * 100 / (end + 1) << "%\n"; + std::cout << ss.str() << std::flush; + } + for(unsigned dLow = 0; dLow < 0x100U; dLow++) + { + UInt16 d(dHigh, dLow); + if(d == UInt16(0)) + continue; +#if 0 + if(d < UInt16(2, 0)) + continue; +#endif + for(unsigned nHigh = 0; nHigh < 0x100U; nHigh++) + { + for(unsigned nLow = 0; nLow < 0x100U; nLow++) + { + UInt16 n(nHigh, nLow); + auto result = UInt16::divMod(n, d); + auto result2 = UInt16::divMod2(n, d); + if(result.divResult != result2.divResult + || result.modResult != result2.modResult) + { + std::ostringstream ss; + ss << std::hex << std::uppercase; + ss.fill('0'); + ss.width(2); + ss << static_cast(n.high); + ss.width(2); + ss << static_cast(n.low); + ss << " / "; + ss.width(2); + ss << static_cast(d.high); + ss.width(2); + ss << static_cast(d.low); + ss << " == "; + ss.width(2); + ss << static_cast(result.divResult.high); + ss.width(2); + ss << static_cast(result.divResult.low); + ss << ", "; + ss.width(2); + ss << static_cast(result2.divResult.high); + ss.width(2); + ss << static_cast(result2.divResult.low); + ss << std::endl; + ss.width(2); + ss << static_cast(n.high); + ss.width(2); + ss << static_cast(n.low); + ss << " % "; + ss.width(2); + ss << static_cast(d.high); + ss.width(2); + ss << static_cast(d.low); + ss << " == "; + ss.width(2); + ss << static_cast(result.modResult.high); + ss.width(2); + ss << static_cast(result.modResult.low); + ss << ", "; + ss.width(2); + ss << static_cast(result2.modResult.high); + ss.width(2); + ss << static_cast(result2.modResult.low); + std::cout << ss.str() << std::endl; + return; + } + } + } + } + } +} +struct Init +{ + Init() + { + const std::size_t splitCount = 6; + std::list threads; + for(std::size_t i = 0; i < splitCount; i++) + { + auto start = i * 0x100 / splitCount; + auto end = (i + 1) * 0x100 / splitCount - 1; + threads.push_back(std::thread([=]() + { + mainFn(start, end); + })); + } + for(std::thread &thread : threads) + thread.join(); + std::exit(0); + } +}; +Init init; +} +#endif diff --git a/src/util/soft_float.h b/src/util/soft_float.h new file mode 100644 index 0000000..e8b7efb --- /dev/null +++ b/src/util/soft_float.h @@ -0,0 +1,1164 @@ +/* + * Copyright 2016-2017 Jacob Lifshay + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +// derived from +// https://github.com/programmerjake/javascript-tasklets/blob/master/javascript_tasklets/soft_float.h + +#ifndef UTIL_SOFT_FLOAT_H_ +#define UTIL_SOFT_FLOAT_H_ + +#include +#include +#include +#include "bit_intrinsics.h" + +namespace vulkan_cpu +{ +namespace util +{ +namespace soft_float +{ +struct UInt128 final +{ + std::uint64_t low; + std::uint64_t high; + constexpr UInt128(std::uint64_t high, std::uint64_t low) noexcept : low(low), high(high) + { + } + constexpr explicit UInt128(std::uint64_t low = 0) noexcept : low(low), high(0) + { + } + static constexpr bool addCarries(std::uint64_t a, std::uint64_t b) noexcept + { + return static_cast(a + b) < a; + } + static constexpr bool subBorrows(std::uint64_t a, std::uint64_t b) noexcept + { + return static_cast(a - b) > a; + } + friend constexpr UInt128 operator+(UInt128 a, UInt128 b) noexcept + { + return UInt128(a.high + b.high + addCarries(a.low, b.low), a.low + b.low); + } + constexpr UInt128 &operator+=(UInt128 v) noexcept + { + return *this = *this + v; + } + friend constexpr UInt128 operator-(UInt128 a, UInt128 b) noexcept + { + return UInt128(a.high - b.high - subBorrows(a.low, b.low), a.low - b.low); + } + constexpr UInt128 &operator-=(UInt128 v) noexcept + { + return *this = *this - v; + } + +private: + static constexpr std::uint64_t multiplyHighHelper2(std::uint64_t h, + std::uint64_t m1, + std::uint64_t m2, + std::uint64_t l) noexcept + { + return (UInt128(h, l) + UInt128(m1 >> 32, m1 << 32) + UInt128(m2 >> 32, m2 << 32)).high; + } + static constexpr std::uint64_t multiplyHighHelper1(std::uint32_t ah, + std::uint32_t al, + std::uint32_t bh, + std::uint32_t bl) noexcept + { + return multiplyHighHelper2(static_cast(ah) * bh, + static_cast(ah) * bl, + static_cast(al) * bh, + static_cast(al) * bl); + } + +public: + static constexpr std::uint64_t multiplyHigh(std::uint64_t a, std::uint64_t b) noexcept + { + return multiplyHighHelper1(a >> 32, a, b >> 32, b); + } + friend constexpr UInt128 operator*(UInt128 a, UInt128 b) noexcept + { + return UInt128(a.high * b.low + a.low * b.high + multiplyHigh(a.low, b.low), a.low * b.low); + } + constexpr UInt128 &operator*=(UInt128 v) noexcept + { + return *this = *this * v; + } + struct DivModResult; + static constexpr DivModResult divmod(UInt128 a, UInt128 b) noexcept; + static constexpr UInt128 div(UInt128 a, UInt128 b) noexcept; + static constexpr UInt128 mod(UInt128 a, UInt128 b) noexcept; + friend constexpr UInt128 operator/(UInt128 a, UInt128 b) noexcept + { + return div(a, b); + } + friend constexpr UInt128 operator%(UInt128 a, UInt128 b) noexcept + { + return mod(a, b); + } + constexpr UInt128 &operator/=(UInt128 v) noexcept + { + return *this = *this / v; + } + constexpr UInt128 &operator%=(UInt128 v) noexcept + { + return *this = *this % v; + } + friend constexpr UInt128 operator&(UInt128 a, UInt128 b) noexcept + { + return UInt128(a.high & b.high, a.low & b.low); + } + constexpr UInt128 &operator&=(UInt128 v) noexcept + { + return *this = *this & v; + } + friend constexpr UInt128 operator|(UInt128 a, UInt128 b) noexcept + { + return UInt128(a.high | b.high, a.low | b.low); + } + constexpr UInt128 &operator|=(UInt128 v) noexcept + { + return *this = *this | v; + } + friend constexpr UInt128 operator^(UInt128 a, UInt128 b) noexcept + { + return UInt128(a.high ^ b.high, a.low ^ b.low); + } + constexpr UInt128 &operator^=(UInt128 v) noexcept + { + return *this = *this ^ v; + } + friend constexpr UInt128 operator<<(UInt128 v, unsigned shiftAmount) noexcept + { + assert(shiftAmount < 128); + return shiftAmount == 0 ? v : shiftAmount < 64 ? + UInt128((v.high << shiftAmount) | (v.low >> (64 - shiftAmount)), + v.low << shiftAmount) : + shiftAmount == 64 ? UInt128(v.low, 0) : + UInt128(v.low << (shiftAmount - 64), 0); + } + constexpr UInt128 &operator<<=(unsigned shiftAmount) noexcept + { + return *this = *this << shiftAmount; + } + friend constexpr UInt128 operator>>(UInt128 v, unsigned shiftAmount) noexcept + { + assert(shiftAmount < 128); + return shiftAmount == 0 ? v : shiftAmount < 64 ? + UInt128(v.high >> shiftAmount, + (v.low >> shiftAmount) | (v.high << (64 - shiftAmount))) : + shiftAmount == 64 ? UInt128(0, v.high) : + UInt128(0, v.high >> (shiftAmount - 64)); + } + constexpr UInt128 &operator>>=(unsigned shiftAmount) noexcept + { + return *this = *this >> shiftAmount; + } + constexpr UInt128 operator+() noexcept + { + return *this; + } + constexpr UInt128 operator~() noexcept + { + return UInt128(~high, ~low); + } + constexpr UInt128 operator-() noexcept + { + return low != 0 ? UInt128(~high, -low) : UInt128(-high, 0); + } + friend constexpr bool operator==(UInt128 a, UInt128 b) noexcept + { + return a.high == b.high && a.low == b.low; + } + friend constexpr bool operator!=(UInt128 a, UInt128 b) noexcept + { + return a.high != b.high || a.low != b.low; + } + friend constexpr bool operator<(UInt128 a, UInt128 b) noexcept + { + return a.high < b.high || (a.high == b.high && a.low < b.low); + } + friend constexpr bool operator<=(UInt128 a, UInt128 b) noexcept + { + return a.high < b.high || (a.high == b.high && a.low <= b.low); + } + friend constexpr bool operator>(UInt128 a, UInt128 b) noexcept + { + return a.high > b.high || (a.high == b.high && a.low > b.low); + } + friend constexpr bool operator>=(UInt128 a, UInt128 b) noexcept + { + return a.high > b.high || (a.high == b.high && a.low >= b.low); + } + friend constexpr unsigned clz128(UInt128 v) noexcept + { + return v.high == 0 ? 64 + clz64(v.low) : clz64(v.high); + } + friend constexpr unsigned ctz128(UInt128 v) noexcept + { + return v.low == 0 ? 64 + ctz64(v.high) : ctz64(v.low); + } +}; + +struct UInt128::DivModResult final +{ + UInt128 divResult; + UInt128 modResult; + constexpr DivModResult(UInt128 divResult, UInt128 modResult) noexcept : divResult(divResult), + modResult(modResult) + { + } +}; + +constexpr UInt128::DivModResult UInt128::divmod(UInt128 a, UInt128 b) noexcept +{ + constexpr std::size_t NumberSizes = 4; + typedef std::uint32_t Digit; + typedef std::uint64_t DoubleDigit; + constexpr unsigned DigitBitCount = 32; + struct DigitCLZFn final + { + constexpr unsigned operator()(Digit v) const noexcept + { + return clz32(v); + } + }; + constexpr Digit DigitMax = (static_cast(1) << DigitBitCount) - 1; + const Digit numerator[NumberSizes] = { + static_cast(a.high >> DigitBitCount), + static_cast(a.high & DigitMax), + static_cast(a.low >> DigitBitCount), + static_cast(a.low & DigitMax), + }; + const Digit denominator[NumberSizes] = { + static_cast(b.high >> DigitBitCount), + static_cast(b.high & DigitMax), + static_cast(b.low >> DigitBitCount), + static_cast(b.low & DigitMax), + }; + Digit quotient[NumberSizes]{}; + Digit remainder[NumberSizes]{}; + std::size_t m = NumberSizes; + for(std::size_t i = 0; i < NumberSizes; i++) + { + if(denominator[i] != 0) + { + m = i; + break; + } + } + const std::size_t n = NumberSizes - m; + if(n <= 1) + { + assert(denominator[NumberSizes - 1] != 0); + for(std::size_t i = 0; i < NumberSizes - 1; i++) + { + remainder[i] = 0; + } + Digit currentRemainder = 0; + for(std::size_t i = 0; i < NumberSizes; i++) + { + DoubleDigit n = currentRemainder; + n <<= DigitBitCount; + n |= numerator[i]; + quotient[i] = n / denominator[NumberSizes - 1]; + currentRemainder = n % denominator[NumberSizes - 1]; + } + remainder[NumberSizes - 1] = currentRemainder; + } + else + { + // from algorithm D, section 4.3.1 in Art of Computer Programming volume 2 by Knuth. + unsigned log2D = DigitCLZFn()(denominator[m]); + Digit u[NumberSizes + 1]{}; + u[NumberSizes] = (numerator[NumberSizes - 1] << log2D) & DigitMax; + u[0] = ((static_cast(numerator[0]) << log2D) >> DigitBitCount) & DigitMax; + for(std::size_t i = 1; i < NumberSizes; i++) + { + DoubleDigit value = numerator[i - 1]; + value <<= DigitBitCount; + value |= numerator[i]; + value <<= log2D; + u[i] = (value >> DigitBitCount) & DigitMax; + } + Digit v[NumberSizes + 1] = {}; + v[n] = (denominator[NumberSizes - 1] << log2D) & DigitMax; + for(std::size_t i = 1; i < n; i++) + { + DoubleDigit value = denominator[m + i - 1]; + value <<= DigitBitCount; + value |= denominator[m + i]; + value <<= log2D; + v[i] = (value >> DigitBitCount) & DigitMax; + quotient[i - 1] = 0; + } + for(std::size_t j = 0; j <= m; j++) + { + DoubleDigit qHat{}; + if(u[j] == v[1]) + { + qHat = DigitMax; + } + else + { + qHat = ((static_cast(u[j]) << DigitBitCount) | u[j + 1]) / v[1]; + } + { + DoubleDigit lhs = v[2] * qHat; + DoubleDigit rhsHigh = + ((static_cast(u[j]) << DigitBitCount) | u[j + 1]) - qHat * v[1]; + Digit rhsLow = u[j + 2]; + if(rhsHigh < static_cast(1) << DigitBitCount + && lhs > ((rhsHigh << DigitBitCount) | rhsLow)) + { + qHat--; + lhs -= v[2]; + rhsHigh += v[1]; + if(rhsHigh < static_cast(1) << DigitBitCount + && lhs > ((rhsHigh << DigitBitCount) | rhsLow)) + { + qHat--; + } + } + } + bool borrow = false; + { + Digit mulCarry = 0; + for(std::size_t i = n; i > 0; i--) + { + assert(i <= NumberSizes); + DoubleDigit product = qHat * v[i] + mulCarry; + mulCarry = product >> DigitBitCount; + product &= DigitMax; + bool prevBorrow = borrow; + DoubleDigit digit = u[j + i] - product - prevBorrow; + borrow = digit != (digit & DigitMax); + digit &= DigitMax; + u[j + i] = digit; + } + bool prevBorrow = borrow; + DoubleDigit digit = u[j] - mulCarry - prevBorrow; + borrow = digit != (digit & DigitMax); + digit &= DigitMax; + u[j] = digit; + } + Digit qj = qHat; + if(borrow) + { + qj--; + bool carry = false; + for(std::size_t i = n; i > 0; i--) + { + bool prevCarry = carry; + assert(i + j <= NumberSizes); + DoubleDigit digit = u[j + i] + v[i] + prevCarry; + carry = digit != (digit & DigitMax); + digit &= DigitMax; + u[j + i] = digit; + } + u[j] = (u[j] + carry) & DigitMax; + } + quotient[j + n - 1] = qj; + } + for(std::size_t i = 0; i < NumberSizes; i++) + { + DoubleDigit value = u[i]; + value <<= DigitBitCount; + value |= u[i + 1]; + remainder[i] = value >> log2D; + } + } + return DivModResult( + UInt128((static_cast(quotient[0]) << DigitBitCount) | quotient[1], + (static_cast(quotient[2]) << DigitBitCount) | quotient[3]), + UInt128((static_cast(remainder[0]) << DigitBitCount) | remainder[1], + (static_cast(remainder[2]) << DigitBitCount) | remainder[3])); +} + +constexpr UInt128 UInt128::div(UInt128 a, UInt128 b) noexcept +{ + return divmod(a, b).divResult; +} + +constexpr UInt128 UInt128::mod(UInt128 a, UInt128 b) noexcept +{ + return divmod(a, b).modResult; +} + +struct ExtendedFloat final // modeled after IEEE754 standard +{ + std::uint64_t mantissa; + std::uint16_t exponent; + bool sign; + static constexpr std::uint16_t infinityNaNExponent() noexcept + { + return 0xFFFFU; + } + static constexpr std::uint16_t exponentBias() noexcept + { + return 0x7FFFU; + } + static constexpr std::uint64_t normalizedMantissaMax() noexcept + { + return 0xFFFFFFFFFFFFFFFFULL; + } + static constexpr std::uint64_t normalizedMantissaMin() noexcept + { + return 0x8000000000000000ULL; + } + struct NormalizedTag final + { + }; + static constexpr ExtendedFloat normalizeHelper(const ExtendedFloat &v, + unsigned shiftAmount) noexcept + { + return shiftAmount > 0 && v.exponent >= shiftAmount ? + ExtendedFloat(NormalizedTag{}, + v.mantissa << shiftAmount, + v.exponent - shiftAmount, + v.sign) : + v; + } + static constexpr ExtendedFloat normalizeHelper(UInt128 mantissa, + std::int32_t exponent, + bool sign, + int shiftAmount) noexcept + { + return shiftAmount > 0 && exponent >= shiftAmount ? + ExtendedFloat(NormalizedTag{}, + (mantissa << shiftAmount).high, + exponent - shiftAmount, + sign) : + ExtendedFloat(NormalizedTag{}, mantissa.high, exponent, sign); + } + static constexpr ExtendedFloat normalize(const ExtendedFloat &v) noexcept + { + return v.exponent == infinityNaNExponent() ? v : v.mantissa == 0 ? + Zero(v.sign) : + normalizeHelper(v, clz64(v.mantissa)); + } + static constexpr ExtendedFloat normalize(UInt128 mantissa, + std::uint16_t exponent, + bool sign) noexcept + { + return exponent == infinityNaNExponent() ? + ExtendedFloat( + NormalizedTag{}, mantissa != UInt128(0), infinityNaNExponent(), sign) : + mantissa == UInt128(0) ? + Zero(sign) : + normalizeHelper(mantissa, exponent, sign, clz128(mantissa)); + } + constexpr ExtendedFloat() noexcept : mantissa(0), exponent(0), sign(false) + { + } + constexpr ExtendedFloat(NormalizedTag, + std::uint64_t mantissa, + std::uint16_t exponent, + bool sign = false) noexcept : mantissa(mantissa), + exponent(exponent), + sign(sign) + { + } + explicit constexpr ExtendedFloat(std::uint64_t mantissa, + std::uint16_t exponent = exponentBias() + 63, + bool sign = false) noexcept + : ExtendedFloat(normalize(ExtendedFloat(NormalizedTag{}, mantissa, exponent, sign))) + { + } + explicit constexpr ExtendedFloat(UInt128 mantissa, + std::uint16_t exponent = exponentBias() + 127, + bool sign = false) noexcept + : ExtendedFloat(normalize(mantissa, exponent, sign)) + { + } + explicit constexpr ExtendedFloat(std::int64_t mantissa) noexcept + : ExtendedFloat(mantissa < 0 ? -static_cast(mantissa) : + static_cast(mantissa), + exponentBias() + 63, + mantissa < 0) + { + } + explicit ExtendedFloat(double value) noexcept : mantissa(0), + exponent(0), + sign(std::signbit(value)) + { + value = std::fabs(value); + if(std::isnan(value)) + { + mantissa = 1; + exponent = infinityNaNExponent(); + return; + } + if(std::isinf(value)) + { + exponent = infinityNaNExponent(); + mantissa = 0; + return; + } + if(value == 0) + { + exponent = 0; + mantissa = 0; + return; + } + int log2Value = std::ilogb(value); + if(log2Value <= -static_cast(exponentBias())) + exponent = 0; + else + exponent = log2Value + exponentBias(); + value = std::scalbn(value, 63 - static_cast(exponent) + exponentBias()); + mantissa = value; + } + explicit ExtendedFloat(long double value) noexcept : mantissa(0), + exponent(0), + sign(std::signbit(value)) + { + value = std::fabs(value); + if(std::isnan(value)) + { + mantissa = 1; + exponent = infinityNaNExponent(); + return; + } + if(std::isinf(value)) + { + exponent = infinityNaNExponent(); + mantissa = 0; + return; + } + if(value == 0) + { + exponent = 0; + mantissa = 0; + return; + } + int log2Value = std::ilogb(value); + if(log2Value <= -static_cast(exponentBias())) + exponent = 0; + else + exponent = log2Value + exponentBias(); + value = std::scalbn(value, 63 - static_cast(exponent) + exponentBias()); + mantissa = value; + } + explicit operator long double() const noexcept + { + if(exponent == infinityNaNExponent()) + { + double retval = std::numeric_limits::infinity(); + if(mantissa) + retval = std::numeric_limits::quiet_NaN(); + if(sign) + return -retval; + return retval; + } + if(isZero()) + { + if(sign) + return -0.0; + return 0; + } + long double value = std::scalbln(static_cast(mantissa), + static_cast(exponent) - exponentBias() - 63); + if(sign) + return -value; + return value; + } + explicit operator double() const noexcept + { + if(exponent == infinityNaNExponent()) + { + double retval = std::numeric_limits::infinity(); + if(mantissa) + retval = std::numeric_limits::quiet_NaN(); + if(sign) + return -retval; + return retval; + } + if(isZero()) + { + if(sign) + return -0.0; + return 0; + } + double value = std::scalbln(static_cast(mantissa), + static_cast(exponent) - exponentBias() - 63); + if(sign) + return -value; + return value; + } + constexpr bool isNaN() const noexcept + { + return exponent == infinityNaNExponent() && mantissa != 0; + } + constexpr bool isInfinite() const noexcept + { + return exponent == infinityNaNExponent() && mantissa == 0; + } + constexpr bool isFinite() const noexcept + { + return exponent != infinityNaNExponent(); + } + constexpr bool isNormal() const noexcept + { + return exponent != infinityNaNExponent() && exponent != 0; + } + constexpr bool isDenormal() const noexcept + { + return exponent == 0 && mantissa != 0; + } + constexpr bool isZero() const noexcept + { + return exponent == 0 && mantissa == 0; + } + constexpr bool signBit() const noexcept + { + return sign; + } + static constexpr ExtendedFloat NaN() noexcept + { + return ExtendedFloat(NormalizedTag{}, 1, infinityNaNExponent()); + } + static constexpr ExtendedFloat One() noexcept + { + return ExtendedFloat(NormalizedTag{}, 0x8000000000000000ULL, exponentBias()); + } + static constexpr ExtendedFloat TwoToThe64() noexcept + { + return ExtendedFloat(NormalizedTag{}, 0x8000000000000000ULL, exponentBias() + 64); + } + static constexpr ExtendedFloat Infinity(bool sign = false) noexcept + { + return ExtendedFloat(NormalizedTag{}, 0, infinityNaNExponent(), sign); + } + static constexpr ExtendedFloat Zero(bool sign = false) noexcept + { + return ExtendedFloat(NormalizedTag{}, 0, 0, sign); + } + constexpr ExtendedFloat operator+() const noexcept + { + return *this; + } + constexpr ExtendedFloat operator-() const noexcept + { + return ExtendedFloat(NormalizedTag{}, mantissa, exponent, !sign); + } + static constexpr UInt128 shiftHelper(std::uint64_t a, unsigned shift) noexcept + { + return shift >= 128 ? UInt128(0) : UInt128(a, 0) >> shift; + } + static constexpr UInt128 finalRoundHelper(UInt128 v) noexcept + { + return v.low == 0x8000000000000000ULL && (v.high & 1) == 0 ? + UInt128(v.high) : + ((v >> 1) + UInt128(0x4000000000000000ULL)) >> 63; + } + static constexpr ExtendedFloat subtractHelper6(UInt128 mantissa, + std::uint16_t exponent, + bool sign, + unsigned shift) + { + return ExtendedFloat(finalRoundHelper(mantissa << shift), exponent - shift + 64, sign); + } + static constexpr ExtendedFloat subtractHelper5(UInt128 mantissa, + std::uint16_t exponent, + bool sign, + unsigned shift) + { + return subtractHelper6(mantissa, exponent, sign, shift > exponent ? exponent : shift); + } + static constexpr ExtendedFloat subtractHelper4(UInt128 mantissa, + std::uint16_t exponent, + bool sign) + { + return subtractHelper5(mantissa, exponent, sign, clz128(mantissa)); + } + static constexpr ExtendedFloat subtractHelper3(UInt128 aMantissa, + UInt128 bMantissa, + std::uint16_t exponent) noexcept + { + return aMantissa == bMantissa ? Zero() : aMantissa < bMantissa ? + subtractHelper4(bMantissa - aMantissa, exponent, true) : + subtractHelper4(aMantissa - bMantissa, exponent, false); + } + static constexpr ExtendedFloat subtractHelper2(std::uint64_t aMantissa, + std::uint16_t aExponent, + std::uint64_t bMantissa, + std::uint16_t bExponent, + std::uint16_t maxExponent) noexcept + { + return subtractHelper3(shiftHelper(aMantissa, maxExponent - aExponent), + shiftHelper(bMantissa, maxExponent - bExponent), + maxExponent); + } + static constexpr ExtendedFloat subtractHelper(std::uint64_t aMantissa, + std::uint16_t aExponent, + std::uint64_t bMantissa, + std::uint16_t bExponent) noexcept + { + return subtractHelper2(aMantissa, + aExponent, + bMantissa, + bExponent, + aExponent < bExponent ? bExponent : aExponent); + } + static constexpr ExtendedFloat addHelper3(UInt128 mantissa, + std::uint16_t exponent, + bool sign) noexcept + { + return mantissa >= UInt128(0x8000000000000000ULL, 0) ? + (exponent + 1 == infinityNaNExponent() ? + Infinity(sign) : + ExtendedFloat(finalRoundHelper(mantissa), exponent + 65, sign)) : + ExtendedFloat(finalRoundHelper(mantissa << 1), exponent + 64, sign); + } + static constexpr ExtendedFloat addHelper2(std::uint64_t aMantissa, + std::uint16_t aExponent, + std::uint64_t bMantissa, + std::uint16_t bExponent, + std::uint16_t maxExponent, + bool sign) noexcept + { + return addHelper3(shiftHelper(aMantissa, maxExponent - aExponent + 1) + + shiftHelper(bMantissa, maxExponent - bExponent + 1), + maxExponent, + sign); + } + static constexpr ExtendedFloat addHelper(std::uint64_t aMantissa, + std::uint16_t aExponent, + std::uint64_t bMantissa, + std::uint16_t bExponent, + bool sign) noexcept + { + return addHelper2(aMantissa, + aExponent, + bMantissa, + bExponent, + aExponent < bExponent ? bExponent : aExponent, + sign); + } + constexpr friend ExtendedFloat operator+(const ExtendedFloat &a, + const ExtendedFloat &b) noexcept + { + return a.isNaN() ? a : b.isNaN() ? + b : + a.isInfinite() ? + (b.isInfinite() ? (a.sign == b.sign ? a : NaN()) : a) : + b.isInfinite() ? + b : + a.isZero() ? + (b.isZero() ? Zero(a.sign && b.sign) : b) : + b.isZero() ? + a : + a.sign == b.sign ? + addHelper(a.mantissa, a.exponent, b.mantissa, b.exponent, a.sign) : + a.sign ? subtractHelper(b.mantissa, b.exponent, a.mantissa, a.exponent) : + subtractHelper(a.mantissa, a.exponent, b.mantissa, b.exponent); + } + constexpr friend ExtendedFloat operator-(const ExtendedFloat &a, + const ExtendedFloat &b) noexcept + { + return a + b.operator-(); + } + constexpr ExtendedFloat &operator+=(const ExtendedFloat &v) noexcept + { + return *this = *this + v; + } + constexpr ExtendedFloat &operator-=(const ExtendedFloat &v) noexcept + { + return *this = *this - v; + } + friend constexpr bool operator==(const ExtendedFloat &a, const ExtendedFloat &b) noexcept + { + return a.isNaN() ? false : b.isNaN() ? false : a.isZero() ? + b.isZero() : + a.exponent == b.exponent && a.mantissa == b.mantissa; + } + friend constexpr bool operator!=(const ExtendedFloat &a, const ExtendedFloat &b) noexcept + { + return !(a == b); + } + static constexpr int compareHelper(const ExtendedFloat &a, const ExtendedFloat &b) noexcept + { + return a.isZero() ? (b.isZero() ? 0 : (b.sign ? 1 : -1)) : a.sign != b.sign ? + (a.sign ? -1 : 1) : + a.exponent != b.exponent ? + ((a.exponent < b.exponent) != a.sign ? -1 : 1) : + a.mantissa == b.mantissa ? 0 : + (a.mantissa < b.mantissa) != a.sign ? -1 : 1; + } + friend constexpr bool operator<(const ExtendedFloat &a, const ExtendedFloat &b) noexcept + { + return a.isNaN() ? false : b.isNaN() ? false : compareHelper(a, b) < 0; + } + friend constexpr bool operator<=(const ExtendedFloat &a, const ExtendedFloat &b) noexcept + { + return a.isNaN() ? false : b.isNaN() ? false : compareHelper(a, b) <= 0; + } + friend constexpr bool operator>(const ExtendedFloat &a, const ExtendedFloat &b) noexcept + { + return a.isNaN() ? false : b.isNaN() ? false : compareHelper(a, b) > 0; + } + friend constexpr bool operator>=(const ExtendedFloat &a, const ExtendedFloat &b) noexcept + { + return a.isNaN() ? false : b.isNaN() ? false : compareHelper(a, b) >= 0; + } + static constexpr ExtendedFloat mulHelper4(UInt128 mantissa, + std::int32_t exponent, + bool sign) noexcept + { + return exponent >= infinityNaNExponent() ? + Infinity(sign) : + exponent <= -128 ? + Zero(sign) : + exponent < 0 ? ExtendedFloat(finalRoundHelper(mantissa >> -exponent), 64, sign) : + ExtendedFloat(finalRoundHelper(mantissa), exponent + 64, sign); + } + static constexpr ExtendedFloat mulHelper3(UInt128 mantissa, + std::int32_t exponent, + bool sign, + unsigned shift) noexcept + { + return mulHelper4(mantissa << shift, exponent - shift, sign); + } + static constexpr ExtendedFloat mulHelper2(UInt128 mantissa, + std::int32_t exponent, + bool sign) noexcept + { + return mantissa == UInt128(0) ? Zero(sign) : + mulHelper3(mantissa, exponent, sign, clz128(mantissa)); + } + static constexpr ExtendedFloat mulHelper(std::uint64_t aMantissa, + std::int32_t aExponent, + std::uint64_t bMantissa, + std::int32_t bExponent, + bool sign) noexcept + { + return mulHelper2(UInt128(aMantissa) * UInt128(bMantissa), + aExponent + bExponent - exponentBias() + 1, + sign); + } + constexpr friend ExtendedFloat operator*(const ExtendedFloat &a, + const ExtendedFloat &b) noexcept + { + return a.isNaN() ? a : b.isNaN() ? + b : + a.isInfinite() ? + (b.isZero() ? NaN() : Infinity(a.sign != b.sign)) : + b.isInfinite() ? + (a.isZero() ? NaN() : Infinity(a.sign != b.sign)) : + mulHelper( + a.mantissa, a.exponent, b.mantissa, b.exponent, a.sign != b.sign); + } + constexpr ExtendedFloat &operator*=(const ExtendedFloat &v) noexcept + { + return *this = *this * v; + } + static constexpr int compareU128(UInt128 a, UInt128 b) noexcept + { + return a == b ? 0 : a < b ? -1 : 1; + } + static constexpr ExtendedFloat divHelper6(UInt128 mantissa, + std::int32_t exponent, + bool sign) noexcept + { + return exponent >= infinityNaNExponent() ? + Infinity(sign) : + exponent <= -128 ? + Zero(sign) : + exponent < 0 ? ExtendedFloat(finalRoundHelper(mantissa >> -exponent), 64, sign) : + ExtendedFloat(finalRoundHelper(mantissa), exponent + 64, sign); + } + static constexpr ExtendedFloat divHelper5(UInt128 quotient, + unsigned shift, + int roundExtraBitsCompareValue, + std::int32_t exponent, + bool sign) noexcept + { + return divHelper6( + ((quotient << 2) | UInt128(static_cast(2 - roundExtraBitsCompareValue))) + << (shift - 2), + exponent - shift + 64, + sign); + } + static constexpr ExtendedFloat divHelper4(UInt128::DivModResult mantissa, + std::uint64_t bMantissa, + std::int32_t exponent, + bool sign) noexcept + { + return divHelper5(mantissa.divResult, + clz128(mantissa.divResult), + compareU128(UInt128(bMantissa), mantissa.modResult << 1), + exponent, + sign); + } + static constexpr ExtendedFloat divHelper3(std::uint64_t aMantissa, + std::uint64_t bMantissa, + std::int32_t exponent, + bool sign) noexcept + { + return divHelper4( + UInt128::divmod(UInt128(aMantissa, 0), UInt128(bMantissa)), bMantissa, exponent, sign); + } + static constexpr ExtendedFloat divHelper2(std::uint64_t aMantissa, + std::int32_t aExponent, + unsigned aShift, + std::uint64_t bMantissa, + std::int32_t bExponent, + unsigned bShift, + bool sign) noexcept + { + return divHelper3(aMantissa << aShift, + bMantissa << bShift, + aExponent - aShift - (bExponent - bShift) + exponentBias() - 1, + sign); + } + static constexpr ExtendedFloat divHelper(std::uint64_t aMantissa, + std::int32_t aExponent, + std::uint64_t bMantissa, + std::int32_t bExponent, + bool sign) noexcept + { + return divHelper2( + aMantissa, aExponent, clz64(aMantissa), bMantissa, bExponent, clz64(bMantissa), sign); + } + friend constexpr ExtendedFloat operator/(const ExtendedFloat &a, + const ExtendedFloat &b) noexcept + { + return a.isNaN() ? a : b.isNaN() ? + b : + a.isInfinite() ? + (b.isInfinite() ? NaN() : Infinity(a.sign != b.sign)) : + b.isZero() ? + (a.isZero() ? NaN() : Infinity(a.sign != b.sign)) : + b.isInfinite() || a.isZero() ? + Zero(a.sign != b.sign) : + divHelper( + a.mantissa, a.exponent, b.mantissa, b.exponent, a.sign != b.sign); + } + constexpr ExtendedFloat &operator/=(const ExtendedFloat &v) noexcept + { + return *this = *this / v; + } + static constexpr ExtendedFloat floorCeilHelper2(std::uint64_t mantissa, + std::int32_t exponent) noexcept + { + return exponent >= infinityNaNExponent() ? + Infinity() : + exponent <= -128 ? + Zero() : + exponent < 0 ? + ExtendedFloat(finalRoundHelper(UInt128(mantissa, 0) >> -exponent), 64) : + ExtendedFloat(finalRoundHelper(UInt128(mantissa, 0)), exponent + 64); + } + static constexpr ExtendedFloat floorCeilHelper(UInt128 mantissa, std::int32_t exponent) noexcept + { + return mantissa.high != 0 ? floorCeilHelper2((mantissa >> 1).low, exponent + 1) : + floorCeilHelper2(mantissa.low, exponent); + } + static constexpr ExtendedFloat ceilHelper2(UInt128 mantissa) noexcept + { + return mantissa.low != 0 ? (mantissa.high == ~static_cast(0) ? + TwoToThe64() : + ExtendedFloat(mantissa.high + 1)) : + ExtendedFloat(mantissa.high); + } + static constexpr ExtendedFloat ceilHelper(std::uint64_t mantissa, + std::int32_t exponent) noexcept + { + return exponent < exponentBias() ? + One() : + exponent >= exponentBias() + 63 ? + ExtendedFloat(NormalizedTag{}, mantissa, exponent) : + ceilHelper2(UInt128(mantissa, 0) >> (exponentBias() - exponent + 63)); + } + static constexpr ExtendedFloat floorHelper2(UInt128 mantissa) noexcept + { + return ExtendedFloat(mantissa.high); + } + static constexpr ExtendedFloat floorHelper(std::uint64_t mantissa, + std::int32_t exponent) noexcept + { + return exponent < exponentBias() ? + Zero() : + exponent >= exponentBias() + 63 ? + ExtendedFloat(NormalizedTag{}, mantissa, exponent) : + floorHelper2(UInt128(mantissa, 0) >> (exponentBias() - exponent + 63)); + } + constexpr friend ExtendedFloat floor(const ExtendedFloat &v) noexcept + { + return !v.isFinite() || v.isZero() ? v : v.sign ? -ceilHelper(v.mantissa, v.exponent) : + floorHelper(v.mantissa, v.exponent); + } + constexpr friend ExtendedFloat trunc(const ExtendedFloat &v) noexcept + { + return !v.isFinite() || v.isZero() ? v : v.sign ? -floorHelper(v.mantissa, v.exponent) : + floorHelper(v.mantissa, v.exponent); + } + constexpr friend ExtendedFloat ceil(const ExtendedFloat &v) noexcept + { + return !v.isFinite() || v.isZero() ? v : v.sign ? -floorHelper(v.mantissa, v.exponent) : + ceilHelper(v.mantissa, v.exponent); + } + static constexpr ExtendedFloat roundHelper(std::uint64_t mantissa, + std::int32_t exponent) noexcept + { + return exponent < exponentBias() - 2 ? + Zero() : + exponent >= exponentBias() + 63 ? + ExtendedFloat(NormalizedTag{}, mantissa, exponent) : + ExtendedFloat(((UInt128(mantissa, 0) >> (exponentBias() - exponent + 64)) + + UInt128(0x4000000000000000ULL)) + >> 63); + } + constexpr friend ExtendedFloat round(const ExtendedFloat &v) noexcept + { + return !v.isFinite() || v.isZero() ? v : v.sign ? -roundHelper(v.mantissa, v.exponent) : + roundHelper(v.mantissa, v.exponent); + } + explicit constexpr operator std::uint64_t() const noexcept + { + return isNaN() ? 0 : isInfinite() ? + (sign ? 0 : ~static_cast(0)) : + exponent < exponentBias() || sign ? + 0 : + *this >= TwoToThe64() ? + ~static_cast(0) : + (UInt128(mantissa, 0) >> (exponentBias() - exponent + 63)).high; + } + static constexpr std::int64_t toInt64Helper(bool sign, std::uint64_t uint64Value) noexcept + { + return sign ? (uint64Value > 0x8000000000000000ULL ? + -static_cast(0x7FFFFFFFFFFFFFFFULL) - 1 : + -static_cast(uint64Value)) : + uint64Value >= 0x8000000000000000ULL ? + static_cast(0x7FFFFFFFFFFFFFFFULL) : + static_cast(uint64Value); + } + explicit constexpr operator std::int64_t() const noexcept + { + return isNaN() ? 0 : sign ? toInt64Helper(true, static_cast(operator-())) : + toInt64Helper(false, static_cast(*this)); + } + static constexpr ExtendedFloat powHelper(const ExtendedFloat &base, + const ExtendedFloat ¤tValue, + std::uint64_t exponent) noexcept + { + return exponent == 0 ? currentValue : exponent == 1 ? + currentValue * base : + exponent == 2 ? + currentValue * (base * base) : + exponent & 1 ? + powHelper(base * base, currentValue * base, exponent >> 1) : + powHelper(base * base, currentValue, exponent >> 1); + } + constexpr friend ExtendedFloat pow(const ExtendedFloat &base, std::uint64_t exponent) noexcept + { + return powHelper(base, One(), exponent); + } + friend ExtendedFloat pow(const ExtendedFloat &base, std::int64_t exponent) noexcept + { + return exponent < 0 ? powHelper(One() / base, One(), -exponent) : + powHelper(base, One(), exponent); + } + constexpr friend int ilogb(const ExtendedFloat &v) noexcept + { + return v.isNaN() ? FP_ILOGBNAN : v.isZero() ? FP_ILOGB0 : v.isInfinite() ? + std::numeric_limits::max() : + static_cast(v.exponent) + - exponentBias() + - clz64(v.mantissa); + } + static constexpr ExtendedFloat scalbnHelper(std::uint64_t mantissa, + std::int64_t exponent, + bool sign) noexcept + { + return exponent >= infinityNaNExponent() ? + Infinity(sign) : + exponent <= -128 ? + Zero(sign) : + exponent < 0 ? + ExtendedFloat(finalRoundHelper(UInt128(mantissa, 0) >> -exponent), 64, sign) : + ExtendedFloat(finalRoundHelper(UInt128(mantissa, 0)), exponent + 64, sign); + } + constexpr friend ExtendedFloat scalbn(const ExtendedFloat &v, std::int64_t exponent) noexcept + { + return !v.isFinite() || v.isZero() ? v : scalbnHelper( + v.mantissa, v.exponent + exponent, v.sign); + } + static constexpr std::uint64_t log2Helper4(UInt128 mantissa) noexcept + { + return ~mantissa.high == 0 + || ((mantissa.high & 1) == 0 && mantissa.low == 0x8000000000000000ULL) ? + mantissa.high : + (mantissa + UInt128(0x8000000000000000ULL)).high; + } + static constexpr UInt128 log2Helper3(UInt128 mantissa, unsigned bitsLeft) noexcept + { + return (bitsLeft > 0 ? + log2Helper2( + log2Helper4(mantissa << (mantissa.high & 0x8000000000000000ULL ? 0 : 1)), + bitsLeft - 1) + >> 1 : + UInt128(0)) + | UInt128(mantissa.high & 0x8000000000000000ULL, 0); + } + static constexpr UInt128 log2Helper2(std::uint64_t mantissa, unsigned bitsLeft) noexcept + { + return log2Helper3(UInt128(mantissa) * UInt128(mantissa), bitsLeft); + } + static constexpr ExtendedFloat log2Helper(const ExtendedFloat &v, unsigned shift) noexcept + { + return ExtendedFloat(finalRoundHelper(log2Helper2(v.mantissa << shift, 67)), + exponentBias() - 1 + 64, + 0) + + ExtendedFloat(static_cast(v.exponent) - exponentBias() - shift); + } + constexpr friend ExtendedFloat log2(const ExtendedFloat &v) noexcept + { + return v.isNaN() ? v : v.isZero() ? Infinity(true) : v.sign ? + NaN() : + v.isInfinite() ? v : log2Helper(v, clz64(v.mantissa)); + } + static constexpr ExtendedFloat Log10Of2() noexcept + { + return ExtendedFloat(NormalizedTag{}, 0x9A209A84FBCFF799ULL, exponentBias() - 2); + } + static constexpr ExtendedFloat LogOf2() noexcept + { + return ExtendedFloat(NormalizedTag{}, 0xB17217F7D1CF79ACULL, exponentBias() - 1); + } + constexpr friend ExtendedFloat log10(const ExtendedFloat &v) noexcept + { + return log2(v) * Log10Of2(); + } + constexpr friend ExtendedFloat log(const ExtendedFloat &v) noexcept + { + return log2(v) * LogOf2(); + } +}; +} +} +} + +#endif /* UTIL_SOFT_FLOAT_H_ */ diff --git a/src/util/variant.h b/src/util/variant.h index f32aa2a..118629c 100644 --- a/src/util/variant.h +++ b/src/util/variant.h @@ -21,8 +21,8 @@ * */ -#ifndef SOURCE_UTIL_VARIANT_H_ -#define SOURCE_UTIL_VARIANT_H_ +#ifndef UTIL_VARIANT_H_ +#define UTIL_VARIANT_H_ #include #include @@ -154,6 +154,11 @@ using variant_alternative_t = typename variant_alternative::type; namespace detail { +struct variant_base_construct_tag +{ + explicit variant_base_construct_tag() = default; +}; + template struct variant_identity_type { @@ -236,7 +241,19 @@ public: }; template -union variant_values +constexpr bool variant_is_trivially_destructible() noexcept +{ + bool values[] = { + std::is_trivially_destructible::value..., + }; + for(bool v : values) + if(!v) + return false; + return true; +} + +template +union variant_values_implementation { char value; static constexpr bool is_copy_constructible = true; @@ -247,16 +264,15 @@ union variant_values static constexpr bool is_move_assignable = true; static constexpr bool is_nothrow_copy_assignable = true; static constexpr bool is_nothrow_move_assignable = true; - static constexpr bool is_trivially_destructible = true; static constexpr bool is_swappable = true; static constexpr bool is_nothrow_swappable = true; static constexpr bool is_equals_comparable = true; static constexpr bool is_less_comparable = true; static constexpr bool is_nothrow_equals_comparable = true; static constexpr bool is_nothrow_less_comparable = true; - variant_values() = delete; + variant_values_implementation() = delete; template - constexpr variant_values(in_place_index_t) noexcept : value() + constexpr variant_values_implementation(in_place_index_t) noexcept : value() { } template @@ -264,197 +280,238 @@ union variant_values { return variant_npos; } - void copy_construct(const variant_values &rt, std::size_t index) noexcept + void copy_construct(const variant_values_implementation &rt, std::size_t index) noexcept { } - void move_construct(variant_values &&rt, std::size_t index) noexcept + void move_construct(variant_values_implementation &&rt, std::size_t index) noexcept { } - void copy_assign(const variant_values &rt, std::size_t index) noexcept + void copy_assign(const variant_values_implementation &rt, std::size_t index) noexcept { } - void move_assign(variant_values &&rt, std::size_t index) noexcept + void move_assign(variant_values_implementation &&rt, std::size_t index) noexcept { } - void destroy(std::size_t index) noexcept + void destruct(std::size_t index) noexcept { } - void swap(variant_values &rt, std::size_t index) noexcept + void swap(variant_values_implementation &rt, std::size_t index) noexcept { } - bool is_equal(const variant_values &rt, std::size_t index) const noexcept + bool is_equal(const variant_values_implementation &rt, std::size_t index) const noexcept { return true; } - bool is_less(const variant_values &rt, std::size_t index) const noexcept + bool is_less(const variant_values_implementation &rt, std::size_t index) const noexcept { return false; } }; -template -union variant_values -{ - typedef T type_0; - static_assert(!std::is_void::value, "invalid variant member type"); - static_assert(!std::is_reference::value, "invalid variant member type"); - static_assert(!std::is_array::value, "invalid variant member type"); - static_assert(!std::is_const::value, "invalid variant member type"); - static_assert(!std::is_volatile::value, "invalid variant member type"); - static_assert(std::is_object::value, "invalid variant member type"); - T current_value; - variant_values other_values; - static constexpr bool is_copy_constructible = - std::is_copy_constructible::value && variant_values::is_copy_constructible; - static constexpr bool is_move_constructible = - std::is_move_constructible::value && variant_values::is_move_constructible; - static constexpr bool is_nothrow_copy_constructible = - std::is_nothrow_copy_constructible::value - && variant_values::is_nothrow_copy_constructible; - static constexpr bool is_nothrow_move_constructible = - std::is_nothrow_move_constructible::value - && variant_values::is_nothrow_move_constructible; - static constexpr bool is_copy_assignable = std::is_copy_assignable::value - && std::is_copy_constructible::value - && variant_values::is_copy_assignable; - static constexpr bool is_move_assignable = std::is_move_assignable::value - && std::is_move_constructible::value - && variant_values::is_move_assignable; - static constexpr bool is_nothrow_copy_assignable = - std::is_nothrow_copy_assignable::value && std::is_nothrow_copy_constructible::value - && variant_values::is_nothrow_copy_assignable; - static constexpr bool is_nothrow_move_assignable = - std::is_nothrow_move_assignable::value && std::is_nothrow_move_constructible::value - && variant_values::is_nothrow_move_assignable; - static constexpr bool is_trivially_destructible = - std::is_trivially_destructible::value - && variant_values::is_trivially_destructible; - static constexpr bool is_swappable = is_swappable_v && std::is_move_constructible::value - && variant_values::is_swappable; - static constexpr bool is_nothrow_swappable = - is_nothrow_swappable_v && std::is_nothrow_move_constructible::value - && variant_values::is_nothrow_swappable; - static constexpr bool is_equals_comparable = - variant_is_equals_comparable::value && variant_values::is_equals_comparable; - static constexpr bool is_less_comparable = - variant_is_less_comparable::value && variant_values::is_less_comparable; - static constexpr bool is_nothrow_equals_comparable = - variant_is_nothrow_equals_comparable::value - && variant_values::is_nothrow_equals_comparable; - static constexpr bool is_nothrow_less_comparable = - variant_is_nothrow_less_comparable::value - && variant_values::is_nothrow_less_comparable; - template ::value>::type> - constexpr variant_values() noexcept(std::is_nothrow_default_constructible::value) - : current_value() - { - } - template ::value>::type> - constexpr variant_values(in_place_index_t<0>, Args &&... args) noexcept( - std::is_nothrow_constructible::value) - : current_value(std::forward(args)...) - { - } - template , - in_place_index_t, - Args...>::value>::type> - constexpr variant_values(in_place_index_t, Args &&... args) noexcept( - std::is_nothrow_constructible, - in_place_index_t, - Args...>::value) - : other_values(in_place_index, std::forward(args)...) - { - } - template < - typename U, - typename... Args, - typename = typename std:: - enable_if, Args...>::value>::type> - constexpr variant_values( - in_place_index_t<0>, - std::initializer_list il, - Args &&... args) noexcept(std::is_nothrow_constructible, - Args...>::value) - : current_value(il, std::forward(args)...) - { - } - template - static constexpr std::size_t index_from_type() noexcept - { - std::size_t next = variant_values::template index_from_type(); - if(std::is_same::value && next == variant_npos) - return 0; - if(next == variant_npos) - return variant_npos; - return next + 1; - } - void copy_construct(const variant_values &rt, - std::size_t index) noexcept(is_nothrow_copy_constructible) - { - if(index == 0) - new(const_cast(std::addressof(current_value))) T(rt.current_value); - else - other_values.copy_construct(rt.other_values, index - 1); - } - void move_construct(variant_values &&rt, - std::size_t index) noexcept(is_nothrow_move_constructible) - { - if(index == 0) - new(const_cast(std::addressof(current_value))) T(std::move(rt.current_value)); - else - other_values.move_construct(std::move(rt.other_values), index - 1); - } - void copy_assign(const variant_values &rt, - std::size_t index) noexcept(is_nothrow_copy_assignable) - { - if(index == 0) - current_value = rt.current_value; - else - other_values.copy_assign(rt.other_values, index - 1); - } - void move_assign(variant_values &&rt, std::size_t index) noexcept(is_nothrow_move_assignable) - { - if(index == 0) - current_value = std::move(rt.current_value); - else - other_values.move_assign(std::move(rt.other_values), index - 1); - } - void destruct(std::size_t index) noexcept - { - if(index == 0) - current_value.~T(); - else - other_values.destruct(index - 1); - } - void swap(variant_values &rt, std::size_t index) noexcept(is_nothrow_swappable) - { - using std::swap; - if(index == 0) - swap(current_value, rt.current_value); - else - other_values.swap(rt.other_values, index - 1); - } - bool is_equal(const variant_values &rt, std::size_t index) const - noexcept(is_nothrow_equals_comparable) - { - if(index == 0) - return static_cast(current_value == rt.current_value); - return other_values.is_equal(rt.other_values, index - 1); - } - bool is_less(const variant_values &rt, std::size_t index) const - noexcept(is_nothrow_less_comparable) - { - if(index == 0) - return static_cast(current_value < rt.current_value); - return other_values.is_equal(rt.other_values, index - 1); - } -}; +template +using variant_values = + variant_values_implementation(), Types...>; + +#define VULKAN_CPU_UTIL_VARIANT_VALUES(Is_Trivially_Destructible, Destructor) \ + template \ + union variant_values_implementation \ + { \ + typedef T type_0; \ + static_assert(!std::is_void::value, "invalid variant member type"); \ + static_assert(!std::is_reference::value, "invalid variant member type"); \ + static_assert(!std::is_array::value, "invalid variant member type"); \ + static_assert(!std::is_const::value, "invalid variant member type"); \ + static_assert(!std::is_volatile::value, "invalid variant member type"); \ + static_assert(std::is_object::value, "invalid variant member type"); \ + T current_value; \ + variant_values_implementation other_values; \ + static constexpr bool is_copy_constructible = \ + std::is_copy_constructible::value \ + && variant_values_implementation::is_copy_constructible; \ + static constexpr bool is_move_constructible = \ + std::is_move_constructible::value \ + && variant_values_implementation::is_move_constructible; \ + static constexpr bool is_nothrow_copy_constructible = \ + std::is_nothrow_copy_constructible::value \ + && variant_values_implementation::is_nothrow_copy_constructible; \ + static constexpr bool is_nothrow_move_constructible = \ + std::is_nothrow_move_constructible::value \ + && variant_values_implementation::is_nothrow_move_constructible; \ + static constexpr bool is_copy_assignable = \ + std::is_copy_assignable::value && std::is_copy_constructible::value \ + && variant_values_implementation::is_copy_assignable; \ + static constexpr bool is_move_assignable = \ + std::is_move_assignable::value && std::is_move_constructible::value \ + && variant_values_implementation::is_move_assignable; \ + static constexpr bool is_nothrow_copy_assignable = \ + std::is_nothrow_copy_assignable::value \ + && std::is_nothrow_copy_constructible::value \ + && variant_values_implementation::is_nothrow_copy_assignable; \ + static constexpr bool is_nothrow_move_assignable = \ + std::is_nothrow_move_assignable::value \ + && std::is_nothrow_move_constructible::value \ + && variant_values_implementation::is_nothrow_move_assignable; \ + static constexpr bool is_swappable = \ + is_swappable_v && std::is_move_constructible::value \ + && variant_values_implementation::is_swappable; \ + static constexpr bool is_nothrow_swappable = \ + is_nothrow_swappable_v && std::is_nothrow_move_constructible::value \ + && variant_values_implementation::is_nothrow_swappable; \ + static constexpr bool is_equals_comparable = \ + variant_is_equals_comparable::value \ + && variant_values_implementation::is_equals_comparable; \ + static constexpr bool is_less_comparable = \ + variant_is_less_comparable::value \ + && variant_values_implementation::is_less_comparable; \ + static constexpr bool is_nothrow_equals_comparable = \ + variant_is_nothrow_equals_comparable::value \ + && variant_values_implementation::is_nothrow_equals_comparable; \ + static constexpr bool is_nothrow_less_comparable = \ + variant_is_nothrow_less_comparable::value \ + && variant_values_implementation::is_nothrow_less_comparable; \ + template < \ + typename T2 = T, \ + typename = typename std::enable_if::value>::type> \ + constexpr variant_values_implementation() noexcept( \ + std::is_nothrow_default_constructible::value) \ + : current_value() \ + { \ + } \ + template < \ + typename... Args, \ + typename = typename std::enable_if::value>::type> \ + constexpr variant_values_implementation(in_place_index_t<0>, Args &&... args) noexcept( \ + std::is_nothrow_constructible::value) \ + : current_value(std::forward(args)...) \ + { \ + } \ + template , \ + in_place_index_t, \ + Args...>::value>::type> \ + constexpr variant_values_implementation(in_place_index_t, Args &&... args) noexcept( \ + std::is_nothrow_constructible, \ + in_place_index_t, \ + Args...>::value) \ + : other_values(in_place_index, std::forward(args)...) \ + { \ + } \ + template < \ + typename U, \ + typename... Args, \ + typename = typename std::enable_if, \ + Args...>::value>::type> \ + constexpr variant_values_implementation( \ + in_place_index_t<0>, \ + std::initializer_list il, \ + Args &&... args) noexcept(std::is_nothrow_constructible, \ + Args...>::value) \ + : current_value(il, std::forward(args)...) \ + { \ + } \ + template \ + static constexpr std::size_t index_from_type() noexcept \ + { \ + std::size_t next = \ + variant_values_implementation::template index_from_type(); \ + if(std::is_same::value && next == variant_npos) \ + return 0; \ + if(next == variant_npos) \ + return variant_npos; \ + return next + 1; \ + } \ + void copy_construct(const variant_values_implementation &rt, \ + std::size_t index) noexcept(is_nothrow_copy_constructible) \ + { \ + if(index == 0) \ + new(const_cast(static_cast( \ + std::addressof(current_value)))) T(rt.current_value); \ + else \ + other_values.copy_construct(rt.other_values, index - 1); \ + } \ + void move_construct(variant_values_implementation &&rt, \ + std::size_t index) noexcept(is_nothrow_move_constructible) \ + { \ + if(index == 0) \ + new(const_cast(static_cast( \ + std::addressof(current_value)))) T(std::move(rt.current_value)); \ + else \ + other_values.move_construct(std::move(rt.other_values), index - 1); \ + } \ + void copy_assign(const variant_values_implementation &rt, \ + std::size_t index) noexcept(is_nothrow_copy_assignable) \ + { \ + if(index == 0) \ + current_value = rt.current_value; \ + else \ + other_values.copy_assign(rt.other_values, index - 1); \ + } \ + void move_assign(variant_values_implementation &&rt, \ + std::size_t index) noexcept(is_nothrow_move_assignable) \ + { \ + if(index == 0) \ + current_value = std::move(rt.current_value); \ + else \ + other_values.move_assign(std::move(rt.other_values), index - 1); \ + } \ + void destruct(std::size_t index) noexcept \ + { \ + if(index == 0) \ + current_value.~T(); \ + else \ + other_values.destruct(index - 1); \ + } \ + void swap(variant_values_implementation &rt, \ + std::size_t index) noexcept(is_nothrow_swappable) \ + { \ + using std::swap; \ + if(index == 0) \ + swap(current_value, rt.current_value); \ + else \ + other_values.swap(rt.other_values, index - 1); \ + } \ + bool is_equal(const variant_values_implementation &rt, std::size_t index) const \ + noexcept(is_nothrow_equals_comparable) \ + { \ + if(index == 0) \ + return static_cast(current_value == rt.current_value); \ + return other_values.is_equal(rt.other_values, index - 1); \ + } \ + bool is_less(const variant_values_implementation &rt, std::size_t index) const \ + noexcept(is_nothrow_less_comparable) \ + { \ + if(index == 0) \ + return static_cast(current_value < rt.current_value); \ + return other_values.is_equal(rt.other_values, index - 1); \ + } \ + Destructor \ + }; + +VULKAN_CPU_UTIL_VARIANT_VALUES(true, ~variant_values_implementation() = default;) +VULKAN_CPU_UTIL_VARIANT_VALUES(false, ~variant_values_implementation(){}) +#undef VULKAN_CPU_UTIL_VARIANT_VALUES template struct variant_get; @@ -505,103 +562,106 @@ struct variant_get<0, T, Types...> } }; -#define VULKAN_CPU_UTIL_VARIANT_DISPATCH(Const, Ref) \ - template ()( \ - std::declval(), std::declval()...))...>::type> \ - constexpr Return_Type variant_dispatch_helper(Fn &&fn, \ - Const variant_values Ref values, \ - std::size_t index, \ - std::index_sequence, \ - Args &&... args) \ - { \ - typedef Return_Type (*Dispatch_Function)( \ - Fn && fn, Const variant_values & values, Args && ... args); \ - const Dispatch_Function dispatch_functions[sizeof...(Types)] = { \ - static_cast( \ - [](Fn &&fn, Const variant_values &values, Args &&... args) \ - -> Return_Type \ - { \ - return std::forward(fn)( \ - variant_get::get( \ - std::forward Ref>(values)), \ - std::forward(args)...); \ - })..., \ - }; \ - if(index < sizeof...(Types)) \ - return dispatch_functions[index]( \ - std::forward(fn), values, std::forward(args)...); \ - throw bad_variant_access(); \ - } \ - \ - template \ - constexpr auto variant_dispatch( \ - Fn &&fn, Const variant_values Ref values, std::size_t index, Args &&... args) \ - ->decltype( \ - variant_dispatch_helper(std::forward(fn), \ - std::forward Ref>(values), \ - index, \ - std::index_sequence_for{}, \ - std::forward(args)...)) \ - { \ - return variant_dispatch_helper(std::forward(fn), \ - std::forward Ref>(values), \ - index, \ - std::index_sequence_for{}, \ - std::forward(args)...); \ - } \ - \ - template ()( \ - std::declval(), std::declval()...))...>::type> \ - constexpr Return_Type variant_dispatch_helper_nothrow( \ - Fn &&fn, \ - Const variant_values Ref values, \ - std::size_t index, \ - std::index_sequence, \ - Args &&... args) \ - { \ - typedef Return_Type (*Dispatch_Function)( \ - Fn && fn, Const variant_values & values, Args && ... args); \ - const Dispatch_Function dispatch_functions[sizeof...(Types)] = { \ - static_cast( \ - [](Fn &&fn, Const variant_values &values, Args &&... args) \ - -> Return_Type \ - { \ - return std::forward(fn)( \ - variant_get::get( \ - std::forward Ref>(values)), \ - std::forward(args)...); \ - })..., \ - }; \ - if(index < sizeof...(Types)) \ - return dispatch_functions[index]( \ - std::forward(fn), values, std::forward(args)...); \ - return {}; \ - } \ - \ - template \ - constexpr auto variant_dispatch_nothrow( \ - Fn &&fn, Const variant_values Ref values, std::size_t index, Args &&... args) \ - ->decltype(variant_dispatch_helper_nothrow( \ - std::forward(fn), \ - std::forward Ref>(values), \ - index, \ - std::index_sequence_for{}, \ - std::forward(args)...)) \ - { \ - return variant_dispatch_helper_nothrow( \ - std::forward(fn), \ - std::forward Ref>(values), \ - index, \ - std::index_sequence_for{}, \ - std::forward(args)...); \ +#define VULKAN_CPU_UTIL_VARIANT_DISPATCH(Const, Ref) \ + template \ + constexpr Return_Type variant_dispatch_helper_dispatch_function( \ + Fn &&fn, Const variant_values Ref values, Args &&... args) \ + { \ + return std::forward(fn)(variant_get::get( \ + std::forward Ref>(values)), \ + std::forward(args)...); \ + } \ + \ + template ()( \ + std::declval(), std::declval()...))...>::type> \ + constexpr Return_Type variant_dispatch_helper(Fn &&fn, \ + Const variant_values Ref values, \ + std::size_t index, \ + std::index_sequence, \ + Args &&... args) \ + { \ + typedef Return_Type (*Dispatch_Function)( \ + Fn && fn, Const variant_values Ref values, Args && ... args); \ + const Dispatch_Function dispatch_functions[sizeof...(Types)] = { \ + static_cast( \ + &variant_dispatch_helper_dispatch_function)..., \ + }; \ + if(index < sizeof...(Types)) \ + return dispatch_functions[index]( \ + std::forward(fn), \ + std::forward Ref>(values), \ + std::forward(args)...); \ + throw bad_variant_access(); \ + } \ + \ + template \ + constexpr auto variant_dispatch( \ + Fn &&fn, Const variant_values Ref values, std::size_t index, Args &&... args) \ + ->decltype( \ + variant_dispatch_helper(std::forward(fn), \ + std::forward Ref>(values), \ + index, \ + std::index_sequence_for{}, \ + std::forward(args)...)) \ + { \ + return variant_dispatch_helper(std::forward(fn), \ + std::forward Ref>(values), \ + index, \ + std::index_sequence_for{}, \ + std::forward(args)...); \ + } \ + \ + template ()( \ + std::declval(), std::declval()...))...>::type> \ + constexpr Return_Type variant_dispatch_helper_nothrow( \ + Fn &&fn, \ + Const variant_values Ref values, \ + std::size_t index, \ + std::index_sequence, \ + Args &&... args) \ + { \ + typedef Return_Type (*Dispatch_Function)( \ + Fn && fn, Const variant_values Ref values, Args && ... args); \ + const Dispatch_Function dispatch_functions[sizeof...(Types)] = { \ + static_cast( \ + &variant_dispatch_helper_dispatch_function)..., \ + }; \ + if(index < sizeof...(Types)) \ + return dispatch_functions[index]( \ + std::forward(fn), \ + std::forward Ref>(values), \ + std::forward(args)...); \ + return {}; \ + } \ + \ + template \ + constexpr auto variant_dispatch_nothrow( \ + Fn &&fn, Const variant_values Ref values, std::size_t index, Args &&... args) \ + ->decltype(variant_dispatch_helper_nothrow( \ + std::forward(fn), \ + std::forward Ref>(values), \ + index, \ + std::index_sequence_for{}, \ + std::forward(args)...)) \ + { \ + return variant_dispatch_helper_nothrow( \ + std::forward(fn), \ + std::forward Ref>(values), \ + index, \ + std::index_sequence_for{}, \ + std::forward(args)...); \ } VULKAN_CPU_UTIL_VARIANT_DISPATCH(, &) @@ -671,38 +731,150 @@ struct variant_index_type } }; -template -struct variant_base -{ - detail::variant_values values; - detail::variant_index_type index_value; - template - constexpr variant_base(std::size_t index_value, Args &&... args) // - noexcept(noexcept(new(std::declval()) - detail::variant_values(std::declval()...))) - : values(std::forward(args)...), index_value(index_value) - { +template +struct variant_base; + +#define VULKAN_CPU_UTIL_VARIANT_BASE_DESTRUCTOR_false \ + ~variant_base() \ + { \ + values.destruct(index_value.get()); \ } - ~variant_base() - { - values.destroy(index_value.get()); + +#define VULKAN_CPU_UTIL_VARIANT_BASE_DESTRUCTOR_true ~variant_base() = default; + +#define VULKAN_CPU_UTIL_VARIANT_BASE_COPY_CONSTRUCTOR_true \ + variant_base(const variant_base &rt) noexcept( \ + detail::variant_values::is_nothrow_copy_constructible) \ + : values(in_place_index), index_value(variant_npos) \ + { \ + values.copy_construct(rt.values, rt.index_value.get()); \ + index_value = rt.index_value; \ } -}; -template -struct variant_base -{ - detail::variant_values values; - detail::variant_index_type index_value; - template - constexpr variant_base(std::size_t index_value, Args &&... args) // - noexcept(noexcept(new(std::declval()) - detail::variant_values(std::declval()...))) - : values(std::forward(args)...), index_value(index_value) - { +#define VULKAN_CPU_UTIL_VARIANT_BASE_COPY_CONSTRUCTOR_false \ + variant_base(const variant_base &rt) = delete; + +#define VULKAN_CPU_UTIL_VARIANT_BASE_MOVE_CONSTRUCTOR_true \ + variant_base(variant_base &&rt) noexcept( \ + detail::variant_values::is_nothrow_move_constructible) \ + : values(in_place_index), index_value(variant_npos) \ + { \ + values.move_construct(std::move(rt.values), rt.index_value.get()); \ + index_value = rt.index_value; \ } - ~variant_base() = default; -}; + +#define VULKAN_CPU_UTIL_VARIANT_BASE_MOVE_CONSTRUCTOR_false \ + variant_base(variant_base &&rt) = delete; + +#define VULKAN_CPU_UTIL_VARIANT_BASE_COPY_ASSIGN_OP_true \ + variant_base &operator=(const variant_base &rt) noexcept( \ + detail::variant_values::is_nothrow_copy_assignable) \ + { \ + if(index_value.get() == rt.index_value.get()) \ + { \ + values.copy_assign(rt.values, index_value.get()); \ + } \ + else \ + { \ + values.destruct(index_value.get()); \ + index_value.set(variant_npos); /* in case copy_construct throws*/ \ + values.copy_construct(rt, rt.index_value.get()); \ + index_value = rt.index_value; \ + } \ + return *this; \ + } + +#define VULKAN_CPU_UTIL_VARIANT_BASE_COPY_ASSIGN_OP_false \ + variant_base &operator=(const variant_base &rt) = delete; + +#define VULKAN_CPU_UTIL_VARIANT_BASE_MOVE_ASSIGN_OP_true \ + variant_base &operator=(variant_base &&rt) noexcept( \ + detail::variant_values::is_nothrow_move_assignable) \ + { \ + if(index_value.get() == rt.index_value.get()) \ + { \ + values.move_assign(std::move(rt.values), index_value.get()); \ + } \ + else \ + { \ + values.destruct(index_value.get()); \ + index_value.set(variant_npos); /* in case move_construct throws*/ \ + values.move_construct(std::move(rt), rt.index_value.get()); \ + index_value = rt.index_value; \ + } \ + return *this; \ + } + +#define VULKAN_CPU_UTIL_VARIANT_BASE_MOVE_ASSIGN_OP_false \ + variant_base &operator=(variant_base &&rt) = delete; + +#define VULKAN_CPU_UTIL_VARIANT_BASE0(Is_Trivially_Destructible, \ + Is_Copy_Constructible, \ + Is_Move_Constructible, \ + Is_Copy_Assignable, \ + Is_Move_Assignable) \ + template \ + struct variant_base \ + { \ + detail::variant_values values; \ + detail::variant_index_type index_value; \ + template \ + constexpr variant_base( \ + variant_base_construct_tag, \ + std::size_t index_value, \ + Args &&... args) noexcept(noexcept(new(std::declval()) \ + detail::variant_values( \ + std::declval()...))) \ + : values(std::forward(args)...), index_value(index_value) \ + { \ + } \ + VULKAN_CPU_UTIL_VARIANT_BASE_DESTRUCTOR_##Is_Trivially_Destructible \ + VULKAN_CPU_UTIL_VARIANT_BASE_COPY_CONSTRUCTOR_##Is_Copy_Constructible \ + VULKAN_CPU_UTIL_VARIANT_BASE_MOVE_CONSTRUCTOR_##Is_Move_Constructible \ + VULKAN_CPU_UTIL_VARIANT_BASE_COPY_ASSIGN_OP_##Is_Copy_Assignable \ + VULKAN_CPU_UTIL_VARIANT_BASE_MOVE_ASSIGN_OP_##Is_Move_Assignable \ + }; + +#define VULKAN_CPU_UTIL_VARIANT_BASE1( \ + Is_Copy_Constructible, Is_Move_Constructible, Is_Copy_Assignable, Is_Move_Assignable) \ + VULKAN_CPU_UTIL_VARIANT_BASE0(false, \ + Is_Copy_Constructible, \ + Is_Move_Constructible, \ + Is_Copy_Assignable, \ + Is_Move_Assignable) \ + VULKAN_CPU_UTIL_VARIANT_BASE0(true, \ + Is_Copy_Constructible, \ + Is_Move_Constructible, \ + Is_Copy_Assignable, \ + Is_Move_Assignable) + +#define VULKAN_CPU_UTIL_VARIANT_BASE2( \ + Is_Move_Constructible, Is_Copy_Assignable, Is_Move_Assignable) \ + VULKAN_CPU_UTIL_VARIANT_BASE1( \ + false, Is_Move_Constructible, Is_Copy_Assignable, Is_Move_Assignable) \ + VULKAN_CPU_UTIL_VARIANT_BASE1( \ + true, Is_Move_Constructible, Is_Copy_Assignable, Is_Move_Assignable) + +#define VULKAN_CPU_UTIL_VARIANT_BASE3(Is_Copy_Assignable, Is_Move_Assignable) \ + VULKAN_CPU_UTIL_VARIANT_BASE2(false, Is_Copy_Assignable, Is_Move_Assignable) \ + VULKAN_CPU_UTIL_VARIANT_BASE2(true, Is_Copy_Assignable, Is_Move_Assignable) + +#define VULKAN_CPU_UTIL_VARIANT_BASE4(Is_Move_Assignable) \ + VULKAN_CPU_UTIL_VARIANT_BASE3(false, Is_Move_Assignable) \ + VULKAN_CPU_UTIL_VARIANT_BASE3(true, Is_Move_Assignable) + +VULKAN_CPU_UTIL_VARIANT_BASE4(false) +VULKAN_CPU_UTIL_VARIANT_BASE4(true) template struct variant_is_in_place_index @@ -743,49 +915,36 @@ typename std::common_type()(std::declval())) template typename std::common_type()(std::declval()))...>::type variant_dispatch(Fn &&fn, const variant &&v, Args &&... args); + +template +using variant_base_t = variant_base(), + detail::variant_values::is_copy_constructible, + detail::variant_values::is_move_constructible, + detail::variant_values::is_copy_assignable, + detail::variant_values::is_move_assignable, + Types...>; } template -class variant - : private detail::variant_base::is_trivially_destructible, - Types...> +class variant : private detail::variant_base_t { static_assert(sizeof...(Types) > 0, "empty variant is not permitted"); private: typedef typename detail::variant_values::type_0 type_0; - typedef detail::variant_base::is_trivially_destructible, - Types...> base; + typedef detail::variant_base_t base; private: using base::values; using base::index_value; public: - template < - typename = typename std::enable_if::value>::value> + template ::value>::type> constexpr variant() noexcept(std::is_nothrow_default_constructible::value) - : base(0) - { - } - template < - typename = - typename std::enable_if::is_copy_constructible>::type> - variant(const variant &rt) noexcept( - detail::variant_values::is_nothrow_copy_constructible) - : base(variant_npos, in_place_index) + : base(detail::variant_base_construct_tag{}, 0) { - values.copy_construct(rt.values, rt.index_value.get()); - index_value = rt.index_value; - } - template < - typename = - typename std::enable_if::is_move_constructible>::type> - variant(variant &&rt) noexcept(detail::variant_values::is_nothrow_move_constructible) - : base(variant_npos, in_place_index) - { - values.move_construct(std::move(rt.values), rt.index_value.get()); - index_value = rt.index_value; } template < typename T, @@ -798,7 +957,10 @@ public: T>::value>::type> constexpr variant(T &&value) noexcept( std::is_nothrow_constructible>, T>::value) - : base(Index, in_place_index, std::forward(value)) + : base(detail::variant_base_construct_tag{}, + Index, + in_place_index, + std::forward(value)) { } template ::value>::type> constexpr explicit variant(in_place_type_t, Args &&... args) noexcept( std::is_nothrow_constructible::value) - : base(Index, in_place_index, std::forward(args)...) + : base(detail::variant_base_construct_tag{}, + Index, + in_place_index, + std::forward(args)...) { } template < @@ -825,7 +990,11 @@ public: Args &&... args) noexcept(std::is_nothrow_constructible, Args...>::value) - : base(Index, in_place_index, il, std::forward(args)...) + : base(detail::variant_base_construct_tag{}, + Index, + in_place_index, + il, + std::forward(args)...) { } template , Args &&... args) noexcept( std::is_nothrow_constructible>, Args...>::value) - : base(Index, in_place_index, std::forward(args)...) + : base(detail::variant_base_construct_tag{}, + Index, + in_place_index, + std::forward(args)...) { } template >, std::initializer_list, Args...>::value) - : base(Index, in_place_index, il, std::forward(args)...) - { - } - template < - typename = - typename std::enable_if::is_copy_assignable>::type> - variant &operator=(const variant &rt) noexcept( - detail::variant_values::is_nothrow_copy_assignable) - { - if(index_value.get() == rt.index_value.get()) - { - values.copy_assign(rt.values, index_value.get()); - } - else - { - values.destruct(index_value.get()); - index_value.set(variant_npos); // in case copy_construct throws - values.copy_construct(rt, rt.index_value.get()); - index_value = rt.index_value; - } - return *this; - } - template < - typename = - typename std::enable_if::is_move_assignable>::type> - variant &operator=(variant &&rt) noexcept( - detail::variant_values::is_nothrow_move_assignable) + : base(detail::variant_base_construct_tag{}, + Index, + in_place_index, + il, + std::forward(args)...) { - if(index_value.get() == rt.index_value.get()) - { - values.move_assign(std::move(rt.values), index_value.get()); - } - else - { - values.destruct(index_value.get()); - index_value.set(variant_npos); // in case move_construct throws - values.move_construct(std::move(rt), rt.index_value.get()); - index_value = rt.index_value; - } - return *this; } template < typename T, @@ -917,7 +1055,7 @@ public: values.destruct(index_value.get()); index_value.set(variant_npos); // in case construction throws auto &value = detail::variant_get::get(values); - new(const_cast(std::addressof(value))) + new(const_cast(static_cast(std::addressof(value)))) variant_alternative_t>(std::forward(new_value)); index_value.set(Index); } @@ -956,7 +1094,7 @@ public: values.destruct(index_value.get()); index_value.set(variant_npos); // in case construction throws auto &value = detail::variant_get::get(values); - new(const_cast(std::addressof(value))) + new(const_cast(static_cast(std::addressof(value)))) variant_alternative_t>(std::forward(args)...); index_value.set(Index); } @@ -974,7 +1112,7 @@ public: values.destruct(index_value.get()); index_value.set(variant_npos); // in case construction throws auto &value = detail::variant_get::get(values); - new(const_cast(std::addressof(value))) + new(const_cast(static_cast(std::addressof(value)))) variant_alternative_t>(il, std::forward(args)...); index_value.set(Index); } @@ -986,8 +1124,10 @@ public: { return index_value.get(); } - template < - typename = typename std::enable_if::is_swappable>::type> + template ::is_swappable>::type> void swap(variant &rt) noexcept(detail::variant_values::is_nothrow_swappable) { if(index_value.get() == rt.index_value.get()) @@ -1017,7 +1157,8 @@ public: operator<(const variant &l, const variant &r) noexcept( detail::variant_values::is_nothrow_less_comparable); template - friend typename std::common_type()(std::declval()))...>::type + friend + typename std::common_type()(std::declval()))...>::type detail::variant_dispatch(Fn &&fn, variant &v, Args &&... args); template friend typename std::common_type::type; } } -#endif /* SOURCE_UTIL_VOID_T_H_ */ +#endif /* UTIL_VOID_T_H_ */