From: Jacob Lifshay Date: Fri, 23 Jun 2017 09:15:54 +0000 (-0700) Subject: added text conversion utilities X-Git-Tag: gsoc-2017~87 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=4139e4d067fbcaee71e46c83080a3974b9026ee0;p=kazan.git added text conversion utilities --- diff --git a/src/util/filesystem.h b/src/util/filesystem.h new file mode 100644 index 0000000..52f9997 --- /dev/null +++ b/src/util/filesystem.h @@ -0,0 +1,869 @@ +/* + * Copyright 2017 Jacob Lifshay + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ +#ifndef UTIL_FILESYSTEM_H_ +#define UTIL_FILESYSTEM_H_ + +#include +#include +#include +#include +#include +#include +#include "bit_intrinsics.h" +#include "string_view.h" +#include "void_t.h" +#include +#include +#include "optional.h" +#include "text.h" +#include + +#error finish + +namespace vulkan_cpu +{ +namespace util +{ +namespace filesystem +{ +namespace detail +{ +enum class Path_traits_kind +{ + posix, + windows, +}; + +#ifdef _WIN32 +constexpr Path_traits_kind default_path_traits_kind = Path_traits_kind::windows; +#else +constexpr Path_traits_kind default_path_traits_kind = Path_traits_kind::posix; +#endif + +template +struct Path_traits +{ + typedef char value_type; + static constexpr value_type preferred_separator = '/'; +}; + +template <> +struct Path_traits +{ + typedef wchar_t value_type; + static constexpr value_type preferred_separator = L'\\'; +}; + +enum class Path_kind +{ + root_name, + root_dir, + file_name, + multiple_parts, +}; + +template +struct Path_is_convertable_char_type +{ + static constexpr bool value = false; +}; + +template +struct Path_is_convertable_char_type : public Path_is_convertable_char_type +{ +}; + +template <> +struct Path_is_convertable_char_type +{ + static constexpr bool value = true; + typedef char Char_type; +}; + +template <> +struct Path_is_convertable_char_type +{ + static constexpr bool value = true; + typedef wchar_t Char_type; +}; + +template <> +struct Path_is_convertable_char_type +{ + static constexpr bool value = true; + typedef char16_t Char_type; +}; + +template <> +struct Path_is_convertable_char_type +{ + static constexpr bool value = true; + typedef char32_t Char_type; +}; + +template +struct Path_is_convertable_iterator_type +{ + static constexpr bool value = false; +}; + +template +struct Path_is_convertable_iterator_type::value_type>::value>:: + type> +{ + static constexpr bool value = true; + typedef typename Path_is_convertable_char_type< + typename std::iterator_traits::value_type>::Char_type Char_type; +}; + +struct Path_iterator_sentinel +{ +}; + +template +class Path_convert_single_iterator_adaptor +{ +private: + typedef std::iterator_traits Traits; + optional base_iterator; + +public: + typedef typename Traits::value_type value_type; + typedef typename Traits::pointer pointer; + typedef typename Traits::reference reference; + typedef typename Traits::difference_type difference_type; + typedef std::input_iterator_tag iterator_category; + constexpr Path_convert_single_iterator_adaptor() noexcept : base_iterator() + { + } + constexpr explicit Path_convert_single_iterator_adaptor(Iterator iterator) + : base_iterator(std::move(iterator)) + { + } + bool operator==(const Path_convert_single_iterator_adaptor &rt) const + { + if(base_iterator) + { + assert(!rt.base_iterator); + return *base_iterator == value_type(); + } + if(rt.base_iterator) + return *rt.base_iterator == value_type(); + return true; + } + bool operator!=(const Path_convert_single_iterator_adaptor &rt) const + { + return !operator==(rt); + } + bool operator==(Path_iterator_sentinel) const + { + if(base_iterator) + return *base_iterator == value_type(); + return true; + } + bool operator!=(Path_iterator_sentinel) const + { + return !operator==(Path_iterator_sentinel()); + } + Path_convert_single_iterator_adaptor &operator++() + { + if(base_iterator) + ++(*base_iterator); + return *this; + } + Path_convert_single_iterator_adaptor operator++(int) + { + if(base_iterator) + return Path_convert_single_iterator_adaptor((*base_iterator)++); + return {}; + } + reference operator*() const + { + return **base_iterator; + } + pointer operator->() const + { + return std::addressof(operator*()); + } +}; + +template +struct Iterator_and_sentinel +{ + Iterator iterator; + Sentinel sentinel; + Iterator_and_sentinel(Iterator iterator, Sentinel sentinel) + : iterator(std::move(iterator)), sentinel(std::move(sentinel)) + { + } +}; + +template ::Char_type> +class Path_convert_iterator +{ +private: + typedef decltype(text::Decode_encode_functions::encode( + char32_t(), text::Convert_options())) Encode_result; + static_assert(std::is_same::value, ""); + +public: + typedef Dest_char_type value_type; + typedef const Dest_char_type *pointer; + typedef const Dest_char_type &reference; + typedef std::ptrdiff_t difference_type; + typedef std::input_iterator_tag iterator_category; + +private: + Encode_result encode_result; + std::size_t encode_result_index; + util::optional> iterator_and_sentinel; + void convert_next() + { + std::char_traits::int_type ch = + text::Decode_encode_functions::decode(iterator_and_sentinel->iterator, + iterator_and_sentinel->sentinel, + text::Convert_options()); + if(ch == std::char_traits::eof()) + *this = Path_convert_iterator(); + else + { + encode_result = text::Decode_encode_functions::encode(ch); + encode_result_index = 0; + } + } + +public: + constexpr Path_convert_iterator() noexcept : encode_result(), + encode_result_index(), + iterator_and_sentinel() + { + } + Path_convert_iterator(Iterator iterator, Sentinel sentinel) + : encode_result(), + encode_result_index(), + iterator_and_sentinel(in_place, std::move(iterator), std::move(sentinel)) + { + convert_next(); + } + Path_convert_iterator &operator++() + { + if(++encode_result_index >= encode_result.size()) + convert_next(); + return *this; + } + Path_convert_iterator operator++(int) + { + auto retval = *this; + operator++(); + return retval; + } + const char32_t &operator*() const noexcept + { + return encode_result[encode_result_index]; + } + const char32_t *operator->() const noexcept + { + return &encode_result[encode_result_index]; + } + bool operator==(const Path_convert_iterator &rt) const noexcept + { + return iterator_and_sentinel.has_value() == rt.iterator_and_sentinel.has_value(); + } + bool operator!=(const Path_convert_iterator &rt) const noexcept + { + return !operator==(rt); + } + bool operator==(Path_iterator_sentinel) const noexcept + { + return !iterator_and_sentinel; + } + bool operator!=(Path_iterator_sentinel) const noexcept + { + return !operator==(Path_iterator_sentinel()); + } +}; + +template +struct Path_convert_range +{ + static constexpr bool is_convertible = false; +}; + +template +struct Path_convert_range::value>::type> +{ + static constexpr bool is_convertible = true; + template + static std::basic_string to_string(Iterator iterator, Sentinel sentinel) + { + typedef Path_convert_iterator Convert_iterator; + return std::basic_string(Convert_iterator(iterator, sentinel), + Convert_iterator()); + } +}; + +template +struct Path_convert_range::Char_type, + Iterator, + void> +{ + static constexpr bool is_convertible = true; + typedef typename Path_is_convertable_iterator_type::Char_type Char_type; + static std::basic_string to_string(Iterator iterator, Iterator sentinel) + { + return std::basic_string(iterator, sentinel); + } + template + static std::basic_string to_string(Iterator iterator, Sentinel sentinel) + { + std::basic_string retval; + while(iterator != sentinel) + retval += *iterator++; + return retval; + } +}; + +template +struct Path_convert_source +{ + static constexpr bool is_convertible = false; +}; + +template +struct Path_convert_source, + typename std:: + enable_if:: + const_iterator>:: + is_convertible>::type> +{ + typedef Path_convert_range:: + const_iterator> Convert_range; + static constexpr bool is_convertible = true; + static std::basic_string to_string( + const std::basic_string &source) + { + return Convert_range::to_string(source.begin(), source.end()); + } +}; + +template +struct + Path_convert_source, + typename std:: + enable_if:: + const_iterator>::is_convertible>::type> +{ + typedef Path_convert_range::const_iterator> + Convert_range; + static constexpr bool is_convertible = true; + static std::basic_string to_string( + const basic_string_view &source) + { + return Convert_range::to_string(source.begin(), source.end()); + } +}; + +template +struct Path_convert_source, + typename std:: + enable_if:: + const_iterator>::is_convertible>:: + type> +{ + static constexpr bool is_convertible = true; + static std::basic_string to_string(const std::basic_string &source) + { + return source; + } +}; + +template +struct Path_convert_source::Char_type>> +{ + static constexpr bool is_convertible = true; + typedef Path_convert_range> + Convert_range; + static std::basic_string to_string(Iterator iterator) + { + return Convert_range::to_string(Path_convert_single_iterator_adaptor(iterator), + Path_iterator_sentinel()); + } +}; + +template +struct Path_convert_source +{ + static constexpr bool is_convertible = true; + static std::basic_string to_string(const Char_type *source) + { + return source; + } +}; + +#error finish + +template ::value_type, + Char_type Preferred_separator = Path_traits::preferred_separator> +class basic_path +{ +public: + typedef Char_type value_type; + typedef std::basic_string string_type; + enum format + { + native_format, + generic_format, + auto_format + }; + static constexpr Char_type preferred_separator = Preferred_separator; + +private: + typedef basic_string_view string_view_type; + class Parts + { + private: + std::size_t allocated_count; + std::size_t used_count; + basic_path *values; + + private: + static basic_path *allocate(std::size_t count); + template + static void construct(basic_path &value, Args &&... args) + { + ::new(static_cast(&value)) basic_path(std::forward(args)...); + } + static void destruct(basic_path &value) noexcept + { + value.~basic_path(); + } + static void deallocate(basic_path *values, std::size_t count) noexcept; + void reallocate(std::size_t new_allocated_count) + { + assert(new_allocated_count >= used_count); + if(used_count == 0) + { + deallocate(values, allocated_count); + values = nullptr; + allocated_count = 0; // set now in case allocate throws + values = allocate(new_allocated_count); + allocated_count = new_allocated_count; + } + else + { + Parts new_parts; + new_parts.reserve(new_allocated_count); + for(std::size_t i = 0; i < used_count; i++) + new_parts.push_back(std::move(values[i])); + swap(new_parts); + } + } + static constexpr std::uint64_t get_expanded_count_64(std::uint64_t count) noexcept + { + constexpr std::uint64_t high_bit = 1ULL << 63; + if(count == 0 || count >= high_bit) + return count; + return 1ULL << (64 - clz64(count - 1)); + } + static constexpr std::uint32_t get_expanded_count_32(std::uint32_t count) noexcept + { + constexpr std::uint32_t high_bit = 1UL << 31; + if(count == 0 || count >= high_bit) + return count; + return 1UL << (32 - clz32(count - 1)); + } + static constexpr std::size_t get_expanded_count(std::size_t count) noexcept + { + constexpr bool is_size_t_uint32_t = std::is_same::value, + is_size_t_uint64_t = std::is_same::value; + static_assert(is_size_t_uint32_t || is_size_t_uint64_t, ""); + if(is_size_t_uint32_t) + return get_expanded_count_32(static_cast(count)); + return static_cast(get_expanded_count_64(count)); + } + + public: + constexpr Parts() noexcept : allocated_count(0), used_count(0), values(nullptr) + { + } + Parts(const Parts &rt) : Parts() + { + Parts new_parts; + new_parts.reserve(rt.used_count); + for(std::size_t i = 0; i < rt.used_count; i++) + push_back(rt.values[i]); + swap(new_parts); + } + Parts(Parts &&rt) noexcept : Parts() + { + swap(rt); + } + Parts &operator=(Parts &&rt) noexcept + { + Parts(std::move(rt)).swap(*this); + return *this; + } + Parts &operator=(const Parts &rt) + { + if(this == &rt) + return *this; + if(allocated_count < rt.used_count) + { + Parts(rt).swap(*this); + return *this; + } + while(used_count > rt.used_count) + pop_back(); + for(std::size_t i = 0; i < used_count; i++) + values[i] = rt[i]; + while(used_count < rt.used_count) + push_back(rt[used_count]); + return *this; + } + ~Parts() noexcept + { + while(used_count > 0) + destruct(values[--used_count]); + deallocate(values, allocated_count); + } + void swap(Parts &rt) noexcept + { + using std::swap; + swap(allocated_count, rt.allocated_count); + swap(used_count, rt.used_count); + swap(values, rt.values); + } + void reserve(std::size_t new_allocated_count) + { + if(new_allocated_count > allocated_count) + reallocate(new_allocated_count); + } + bool empty() const noexcept + { + return used_count == 0; + } + std::size_t size() const noexcept + { + return used_count; + } + std::size_t capacity() const noexcept + { + return allocated_count; + } + typedef basic_path *iterator; + typedef const basic_path *const_iterator; + iterator begin() noexcept + { + return values; + } + iterator end() noexcept + { + return values + used_count; + } + const_iterator begin() const noexcept + { + return values; + } + const_iterator end() const noexcept + { + return values + used_count; + } + const_iterator cbegin() const noexcept + { + return values; + } + const_iterator cend() const noexcept + { + return values + used_count; + } + template + void emplace_back(Args &&... args) + { + if(used_count >= allocated_count) + reallocate(get_expanded_count(used_count + 1)); + construct(values[used_count], std::forward(args)...); + used_count++; + } + void push_back(const basic_path &v) + { + emplace_back(v); + } + void push_back(basic_path &&v) + { + emplace_back(v); + } + void pop_back() noexcept + { + assert(used_count > 0); + destruct(values[--used_count]); + } + void clear() noexcept + { + while(used_count > 0) + pop_back(); + } + basic_path &operator[](std::size_t index) noexcept + { + assert(index < used_count); + return values[index]; + } + const basic_path &operator[](std::size_t index) const noexcept + { + assert(index < used_count); + return values[index]; + } + }; + +private: + Parts parts; + string_type value; + Path_kind kind; + +private: + static constexpr bool is_ascii_letter(Char_type v) noexcept + { + auto ch = static_cast(v); + if(static_cast(ch) != v) + return false; + if(ch >= 'a' && ch <= 'z') + return true; + if(ch >= 'A' && ch <= 'Z') + return true; + return false; + } + static constexpr bool is_separator(Char_type v) noexcept + { + return v == static_cast('/') || v == preferred_separator; + } + template + static void parse(string_view_type value, Fn callback, format fmt = auto_format) noexcept( + noexcept(callback(typename string_view_type::iterator(), + typename string_view_type::iterator(), + Path_kind()))) + { + constexpr Char_type dot = '.'; + constexpr Char_type colon = ':'; + typedef typename std::char_traits::int_type Int_type; + constexpr Int_type eof = std::char_traits::eof(); + auto char_iter = value.begin(); + auto peek = [&]() -> Int_type + { + if(char_iter == value.end()) + return eof; + return std::char_traits::to_int_type(*char_iter); + }; + auto get = [&]() -> Int_type + { + if(char_iter == value.end()) + return eof; + return std::char_traits::to_int_type(*char_iter++); + }; + if(value.empty()) + return; + if(Traits_kind == Path_traits_kind::windows && value.size() >= 2 + && is_ascii_letter(value[0]) + && value[1] == colon) + { + char_iter += 2; + callback(value.begin(), char_iter, Path_kind::root_name); + } + else if(Traits_kind == Path_traits_kind::windows && value.size() >= 2 + && is_separator(value[0]) + && is_separator(value[1])) + { + while(peek() != eof && is_separator(peek())) + get(); + while(peek() != eof && !is_separator(peek())) + get(); + callback(value.begin(), char_iter, Path_kind::root_name); + } + if(peek() != eof && is_separator(peek())) + { + auto start_iter = char_iter; + do + { + get(); + } while(peek() != eof && is_separator(peek())); + callback(start_iter, char_iter, Path_kind::root_dir); + } + if(peek() != eof && !is_separator(peek())) + { + auto start_iter = char_iter; + do + { + get(); + } while(peek() != eof && !is_separator(peek())); + callback(start_iter, char_iter, Path_kind::file_name); + } + while(peek() != eof) + { + do + { + get(); + } while(peek() != eof && is_separator(peek())); + auto start_iter = char_iter; + while(peek() != eof && !is_separator(peek())) + get(); + callback(start_iter, char_iter, Path_kind::file_name); + } + } + void parse(format fmt = auto_format) + { + auto last_part_kind = Path_kind::multiple_parts; + std::size_t part_count = 0; + parse(value, + [&]([[gnu::unused]] typename string_view_type::iterator part_string_begin, + [[gnu::unused]] typename string_view_type::iterator part_string_end, + Path_kind part_kind) noexcept + { + last_part_kind = part_kind; + part_count++; + }, + fmt); + if(part_count == 1) + { + kind = last_part_kind; + parts.clear(); + return; + } + else + { + kind = Path_kind::multiple_parts; + } + while(parts.size() > part_count) + parts.pop_back(); + parts.reserve(part_count); + std::size_t part_index = 0; + parse(value, + [&](typename string_view_type::iterator part_string_begin, + typename string_view_type::iterator part_string_end, + Path_kind part_kind) noexcept + { + if(part_index >= parts.size()) + parts.emplace_back(); + parts[part_index].value.assign(part_string_begin, part_string_end); + parts[part_index].kind = part_kind; + part_index++; + }, + fmt); + } + static void convert_source(string_type &output_value, const string_type &source) + { + output_value = source; + } + template + static void convert_source(string_type &output_value, + const std::basic_string &source) + { + convert_source(output_value, source.begin(), source.end()); + } + template + static void convert_source(string_type &output_value, + const basic_string_view &source) + { + convert_source(output_value, source.begin(), source.end()); + } + template + static void convert_source(string_type &output_value, const Char_type2 *source) + { + convert_source(output_value, basic_string_view(source)); + } + template < + + public : basic_path() noexcept : parts(), + value(), + kind(Path_kind::multiple_parts) + { + } + basic_path(const basic_path &) = default; + basic_path(basic_path &&) noexcept = default; + basic_path(string_type &&source, format fmt = auto_format) + : parts(), value(std::move(source)), kind() + { + parse(fmt); + } + template + basic_path(const Source &source, format fmt = auto_format) + : basic_path() + { + convert_source(value, source); + parse(fmt); + } + template + basic_path(Input_iterator first, Input_iterator last, format fmt = auto_format) + : basic_path() + { + convert_source(value, first, last); + parse(fmt); + } +}; + +template +constexpr Char_type basic_path::preferred_separator; + +template +basic_path + *basic_path::Parts::allocate(std::size_t count) +{ + if(count == 0) + return nullptr; + return std::allocator::allocate(count); +} + +template +void basic_path::Parts::deallocate( + basic_path *values, std::size_t count) noexcept +{ + if(count != 0) + std::allocator::deallocate(values, count); +} +} +} +} +} + +#endif /* UTIL_FILESYSTEM_H_ */ diff --git a/src/util/text.h b/src/util/text.h new file mode 100644 index 0000000..3ea1245 --- /dev/null +++ b/src/util/text.h @@ -0,0 +1,573 @@ +/* + * Copyright 2012-2017 Jacob Lifshay + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ +/* translated from + * https://github.com/programmerjake/hashlife-voxels/blob/5dda3bc240e1e89f43606316d1c3202221e3b06b/util/text.h + */ + +#ifndef UTIL_TEXT_H_ +#define UTIL_TEXT_H_ + +#include +#include +#include +#include +#include +#include +#include +#include "string_view.h" + +namespace vulkan_cpu +{ +namespace util +{ +namespace text +{ +constexpr char32_t replacement_character = U'\uFFFD'; + +template +typename std::char_traits::int_type decode_utf8( + Input_iterator &iter, + Sentinel sentinel, + bool allow_surrogate_code_points = true, + bool allow_2_byte_null = false, + typename std::char_traits::int_type error_value = + replacement_character) noexcept(noexcept(++iter) && noexcept(static_cast(*iter)) + && noexcept(iter == sentinel ? 0 : 0)) +{ + if(iter == sentinel) + return error_value; + auto byte0 = static_cast(static_cast(*iter)); + ++iter; + if(byte0 < 0x80) + return byte0; + if(allow_2_byte_null && byte0 == 0xC0) + { + if(iter == sentinel) + return error_value; + auto byte1 = static_cast(static_cast(*iter)); + ++iter; + if(byte1 != 0x80) + return error_value; + return 0; + } + if(byte0 > 0xF4 || byte0 < 0xC2) + return error_value; + if(iter == sentinel) + return error_value; + auto byte1 = static_cast(static_cast(*iter)); + if(byte1 < 0x80 || byte1 >= 0xC0) + return error_value; + if(byte0 < 0xE0) + { + ++iter; + return (static_cast(byte0 & 0x1F) << 6) | (byte1 & 0x3F); + } + if(byte0 == 0xE0 && byte1 < 0xA0) + return error_value; + if(byte0 == 0xF0 && byte1 < 0x90) + return error_value; + if(byte0 == 0xF4 && byte1 >= 0x90) + return error_value; + if(!allow_surrogate_code_points && byte0 == 0xED && byte1 >= 0xA0) + return error_value; + if(iter == sentinel) + return error_value; + ++iter; + auto byte2 = static_cast(static_cast(*iter)); + ++iter; + if(byte2 < 0x80 || byte2 >= 0xC0) + return error_value; + if(byte0 < 0xF0) + return (static_cast(byte0 & 0xF) << 12) + | (static_cast(byte1 & 0x3F) << 6) | (byte2 & 0x3F); + if(iter == sentinel) + return error_value; + auto byte3 = static_cast(static_cast(*iter)); + ++iter; + if(byte3 < 0x80 || byte3 >= 0xC0) + return error_value; + return (static_cast(byte0 & 0x7) << 18) + | (static_cast(byte1 & 0x3F) << 12) + | (static_cast(byte2 & 0x3F) << 6) | (byte3 & 0x3F); +} + +template +struct Encoded_character final +{ + static constexpr std::size_t max_Chars = N; + typedef T Char_type; + static_assert(max_Chars != 0, ""); + Char_type chars[max_Chars]; + std::size_t used; + Char_type &front() + { + return chars[0]; + } + constexpr const Char_type &front() const + { + return chars[0]; + } + Char_type &back() + { + return chars[0]; + } + constexpr const Char_type &back() const + { + return chars[0]; + } + typedef const Char_type *const_iterator; + typedef Char_type *iterator; + constexpr const_iterator begin() const + { + return &chars[0]; + } + constexpr const_iterator end() const + { + return begin() + used; + } + constexpr const_iterator cbegin() const + { + return &chars[0]; + } + constexpr const_iterator cend() const + { + return begin() + used; + } + iterator begin() + { + return &chars[0]; + } + iterator end() + { + return begin() + used; + } + constexpr std::size_t capacity() const + { + return max_Chars; + } + constexpr std::size_t size() const + { + return used; + } + constexpr const Char_type &operator[](std::size_t index) const + { + return (assert(index < used), chars[index]); + } + Char_type &operator[](std::size_t index) + { + assert(index < used); + return chars[index]; + } + constexpr Encoded_character() : chars(), used(0) + { + } + +private: + static constexpr Char_type implicit_conversion_helper(Char_type ch) noexcept + { + return ch; + } + +public: + template + constexpr Encoded_character(Args &&... args) + : chars{implicit_conversion_helper(std::forward(args))...}, used(sizeof...(args)) + { + static_assert(sizeof...(args) <= max_Chars, ""); + } + template + operator std::basic_string() const + { + return std::basic_string(begin(), end()); + } + template + friend std::basic_string operator+( + std::basic_string a, const Encoded_character &b) + { + a.append(b.begin(), b.end()); + return a; + } + template + friend std::basic_string operator+( + const Encoded_character &a, std::basic_string b) + { + b.insert(b.begin(), a.begin(), a.end()); + return b; + } + template + friend std::basic_string operator+(const Encoded_character &a, + const Encoded_character &b) + { + std::basic_string retval; + retval.reserve(a.size() + b.size()); + retval.append(a.begin(), a.end()); + retval.append(b.begin(), b.end()); + return retval; + } + template + friend std::basic_ostream &operator<<( + std::basic_ostream &os, const Encoded_character &a) + { + os << static_cast>(a); + return os; + } +}; + +Encoded_character encode_utf8(char32_t ch, bool use_2_byte_null = false) noexcept +{ + assert(ch < 0x10FFFFUL && ch >= 0); + if(use_2_byte_null && ch == 0) + return Encoded_character(0xC0U, 0x80U); + if(ch < 0x80) + return Encoded_character(ch); + if(ch < 0x800) + return Encoded_character(0xC0 | (ch >> 6), 0x80 | (ch & 0x3F)); + if(ch < 0x10000UL) + return Encoded_character( + 0xE0 | (ch >> 12), 0x80 | ((ch >> 6) & 0x3F), 0x80 | (ch & 0x3F)); + return Encoded_character(0xF0 | (ch >> 18), + 0x80 | ((ch >> 12) & 0x3F), + 0x80 | ((ch >> 6) & 0x3F), + 0x80 | (ch & 0x3F)); +} + +template +typename std::char_traits::int_type decode_utf16( + Input_iterator &iter, + Sentinel sentinel, + bool allow_unpaired_surrogate_code_units = true, + typename std::char_traits::int_type error_value = + replacement_character) noexcept(noexcept(++iter) && noexcept(static_cast(*iter)) + && noexcept(iter == sentinel ? 0 : 0)) +{ + if(iter == sentinel) + return error_value; + auto unit0 = static_cast(static_cast(*iter)); + ++iter; + if(unit0 >= 0xD800U && unit0 < 0xDC00U) + { + if(iter == sentinel) + return allow_unpaired_surrogate_code_units ? unit0 : error_value; + auto unit1 = static_cast(static_cast(*iter)); + if(unit1 < 0xDC00U || unit1 >= 0xE000U) + return allow_unpaired_surrogate_code_units ? unit0 : error_value; + ++iter; + return 0x10000UL + ((unit0 & 0x3FF) << 10) + (unit1 & 0x3FF); + } + return unit0; +} + +Encoded_character encode_utf16(char32_t ch) noexcept +{ + assert(ch < 0x10FFFFUL && ch >= 0); + if(ch < 0x10000UL) + return Encoded_character(ch); + return Encoded_character(0xD800U | ((ch - 0x10000UL) >> 10), + 0xDC00U | ((ch - 0x10000UL) & 0x3FF)); +} + +template +typename std::char_traits::int_type decode_utf32( + Input_iterator &iter, + Sentinel sentinel, + bool allow_Surrogate_Code_Units = true, + typename std::char_traits::int_type error_value = + replacement_character) noexcept(noexcept(++iter) && noexcept(static_cast(*iter)) + && noexcept(iter == sentinel ? 0 : 0)) +{ + if(iter == sentinel) + return error_value; + auto retval = static_cast(static_cast(*iter)); + ++iter; + if(retval > 0x10FFFFUL) + return error_value; + if(!allow_Surrogate_Code_Units && retval >= 0xD800U && retval < 0xE000U) + return error_value; + return retval; +} + +Encoded_character encode_utf32(char32_t ch) noexcept +{ + return Encoded_character(ch); +} + +static_assert(std::numeric_limits::radix == 2, ""); +static_assert(std::numeric_limits::digits + + static_cast(std::is_signed::value) + >= 16, + ""); + +constexpr bool is_wide_character_utf16 = std::numeric_limits::digits <= 16; + +Encoded_character encode_wide(char32_t ch) noexcept +{ + if(is_wide_character_utf16) + { + auto result = encode_utf16(ch); + Encoded_character retval; + retval.used = result.used; + for(std::size_t i = 0; i < result.size(); i++) + { + retval[i] = static_cast(result[i]); + } + return retval; + } + return Encoded_character(static_cast(ch)); +} + +template +typename std::char_traits::int_type decode_wide( + Input_iterator &iter, + Sentinel sentinel, + bool allow_unpaired_surrogate_code_units = true, + typename std::char_traits::int_type error_value = + replacement_character) noexcept(noexcept(++iter) && noexcept(static_cast(*iter)) + && noexcept(iter == sentinel ? 0 : 0)) +{ + struct Iterator_wrapper + { + Input_iterator &iter; + Iterator_wrapper(Input_iterator &iter) : iter(iter) + { + } + void operator++() + { + ++iter; + } + wchar_t operator*() + { + return static_cast(*iter); + } + bool operator==(Sentinel &sentinel) + { + return iter == sentinel; + } + }; + Iterator_wrapper iterator_wrapper(iter); + if(is_wide_character_utf16) + return decode_utf16(iterator_wrapper, + std::move(sentinel), + allow_unpaired_surrogate_code_units, + error_value); + return decode_utf32( + iterator_wrapper, std::move(sentinel), allow_unpaired_surrogate_code_units, error_value); +} + +struct Convert_options final +{ + typename std::char_traits::int_type error_value = replacement_character; + bool allow_unpaired_surrogate_code_points = true; + bool allow_2_byte_null = false; + bool use_2_byte_null = false; + constexpr Convert_options() + { + } + constexpr Convert_options(typename std::char_traits::int_type error_value, + bool allow_unpaired_surrogate_code_points, + bool allow_2_byte_null, + bool use_2_byte_null) + : error_value(error_value), + allow_unpaired_surrogate_code_points(allow_unpaired_surrogate_code_points), + allow_2_byte_null(allow_2_byte_null), + use_2_byte_null(use_2_byte_null) + { + } + static constexpr Convert_options strict( + typename std::char_traits::int_type error_value = replacement_character) + { + return Convert_options(error_value, false, false, false); + } + static constexpr Convert_options java( + typename std::char_traits::int_type error_value = replacement_character) + { + return Convert_options(error_value, true, true, true); + } +}; + +template +struct Decode_encode_functions +{ + template + typename std::char_traits::int_type decode( + Input_iterator &iter, Sentinel sentinel, const Convert_options &convert_options) = delete; + Encoded_character encode( + char32_t ch, const Convert_options &convert_options) noexcept = delete; +}; + +template <> +struct Decode_encode_functions +{ + template + typename std::char_traits::int_type decode( + Input_iterator &iter, + Sentinel sentinel, + const Convert_options + &convert_options) noexcept(noexcept(decode_utf8(std::declval(), + std::declval()))) + { + return decode_utf8(iter, + std::move(sentinel), + convert_options.allow_unpaired_surrogate_code_points, + convert_options.allow_2_byte_null, + convert_options.error_value); + } + Encoded_character encode(char32_t ch, const Convert_options &convert_options) noexcept + { + return encode_utf8(ch, convert_options.use_2_byte_null); + } +}; + +template <> +struct Decode_encode_functions +{ + template + typename std::char_traits::int_type decode( + Input_iterator &iter, + Sentinel sentinel, + const Convert_options + &convert_options) noexcept(noexcept(decode_utf16(std::declval(), + std::declval()))) + { + return decode_utf16(iter, + std::move(sentinel), + convert_options.allow_unpaired_surrogate_code_points, + convert_options.error_value); + } + Encoded_character encode(char32_t ch, + const Convert_options &convert_options) noexcept + { + return encode_utf16(ch); + } +}; + +template <> +struct Decode_encode_functions +{ + template + typename std::char_traits::int_type decode( + Input_iterator &iter, + Sentinel sentinel, + const Convert_options + &convert_options) noexcept(noexcept(decode_utf32(std::declval(), + std::declval()))) + { + return decode_utf32(iter, + std::move(sentinel), + convert_options.allow_unpaired_surrogate_code_points, + convert_options.error_value); + } + Encoded_character encode(char32_t ch, + const Convert_options &convert_options) noexcept + { + return encode_utf32(ch); + } +}; + +template <> +struct Decode_encode_functions +{ + template + typename std::char_traits::int_type decode( + Input_iterator &iter, + Sentinel sentinel, + const Convert_options + &convert_options) noexcept(noexcept(decode_wide(std::declval(), + std::declval()))) + { + return decode_wide(iter, + std::move(sentinel), + convert_options.allow_unpaired_surrogate_code_points, + convert_options.error_value); + } + Encoded_character encode(char32_t ch, + const Convert_options &convert_options) noexcept + { + return encode_wide(ch); + } +}; + +namespace detail +{ +template +struct String_cast_helper; + +template +struct String_cast_helper, + basic_string_view> +{ + static std::basic_string run( + basic_string_view source, + const Convert_options &convert_options) + { + std::basic_string retval; + for(auto iter = source.begin(); iter != source.end();) + { + retval = std::move(retval) + Decode_encode_functions::encode( + Decode_encode_functions::decode( + iter, source.end(), convert_options), + convert_options); + } + return retval; + } +}; + +template +struct String_cast_helper, + basic_string_view> +{ + static std::basic_string run( + basic_string_view source, const Convert_options &) + { + return std::basic_string(source.begin(), + source.end()); + } +}; +} + +template +Target string_cast(basic_string_view source, + const Convert_options &convert_options) +{ + return detail::String_cast_helper>:: + run(source, convert_options); +} + +template +Target string_cast(basic_string_view source) +{ + return detail::String_cast_helper>:: + run(source, Convert_options()); +} +} +} +} +} + +#endif /* UTIL_TEXT_H_ */