From 3eda32cb9b534f89f37c7155cdd8be2d9a5de7a4 Mon Sep 17 00:00:00 2001 From: Jonathan Wakely Date: Sat, 30 Nov 2019 01:03:36 +0000 Subject: [PATCH] libstdc++: Fix experimental::filesystem::u8path(const Source&) for Windows This function failed to compile when called with a std::string. Also, constructing a path with a char8_t string did not correctly treat the string as already UTF-8 encoded. * include/bits/fs_path.h (u8path(InputIterator, InputIterator)) (u8path(const Source&)) [_GLIBCXX_FILESYSTEM_IS_WINDOWS]: Simplify conditions. * include/experimental/bits/fs_path.h [_GLIBCXX_FILESYSTEM_IS_WINDOWS] (__u8path(const Source&, char)): Add overloads for std::string and types convertible to std::string. (_Cvt::_S_wconvert): Add a new overload for char8_t strings and use codecvt_utf8_utf16 to do the correct conversion. From-SVN: r278869 --- libstdc++-v3/ChangeLog | 11 +++ libstdc++-v3/include/bits/fs_path.h | 28 ++----- .../include/experimental/bits/fs_path.h | 78 ++++++++++++------- 3 files changed, 67 insertions(+), 50 deletions(-) diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog index ded6e5c139b..54c5f85ab83 100644 --- a/libstdc++-v3/ChangeLog +++ b/libstdc++-v3/ChangeLog @@ -1,3 +1,14 @@ +2019-11-30 Jonathan Wakely + + * include/bits/fs_path.h (u8path(InputIterator, InputIterator)) + (u8path(const Source&)) [_GLIBCXX_FILESYSTEM_IS_WINDOWS]: Simplify + conditions. + * include/experimental/bits/fs_path.h [_GLIBCXX_FILESYSTEM_IS_WINDOWS] + (__u8path(const Source&, char)): Add overloads for std::string and + types convertible to std::string. + (_Cvt::_S_wconvert): Add a new overload for char8_t strings and use + codecvt_utf8_utf16 to do the correct conversion. + 2019-11-29 Jonathan Wakely * include/bits/fs_path.h (path::operator/=): Change template-head to diff --git a/libstdc++-v3/include/bits/fs_path.h b/libstdc++-v3/include/bits/fs_path.h index b129372447b..20ec42da57d 100644 --- a/libstdc++-v3/include/bits/fs_path.h +++ b/libstdc++-v3/include/bits/fs_path.h @@ -691,14 +691,8 @@ namespace __detail u8path(_InputIterator __first, _InputIterator __last) { #ifdef _GLIBCXX_FILESYSTEM_IS_WINDOWS -#ifdef _GLIBCXX_USE_CHAR8_T - if constexpr (is_same_v<_CharT, char8_t>) - { - return path{ __first, __last }; - } - else + if constexpr (is_same_v<_CharT, char>) { -#endif // XXX This assumes native wide encoding is UTF-16. std::codecvt_utf8_utf16 __cvt; path::string_type __tmp; @@ -710,16 +704,16 @@ namespace __detail else { const std::string __u8str{__first, __last}; - const char* const __ptr = __u8str.data(); - if (__str_codecvt_in_all(__ptr, __ptr + __u8str.size(), __tmp, __cvt)) + const char* const __p = __u8str.data(); + if (__str_codecvt_in_all(__p, __p + __u8str.size(), __tmp, __cvt)) return path{ __tmp }; } _GLIBCXX_THROW_OR_ABORT(filesystem_error( "Cannot convert character sequence", std::make_error_code(errc::illegal_byte_sequence))); -#ifdef _GLIBCXX_USE_CHAR8_T } -#endif + else + return path{ __first, __last }; #else // This assumes native normal encoding is UTF-8. return path{ __first, __last }; @@ -737,14 +731,8 @@ namespace __detail u8path(const _Source& __source) { #ifdef _GLIBCXX_FILESYSTEM_IS_WINDOWS -#ifdef _GLIBCXX_USE_CHAR8_T - if constexpr (is_same_v<_CharT, char8_t>) - { - return path{ __source }; - } - else + if constexpr (is_same_v<_CharT, char>) { -#endif if constexpr (is_convertible_v) { const std::string_view __s = __source; @@ -755,9 +743,9 @@ namespace __detail std::string __s = path::_S_string_from_iter(__source); return filesystem::u8path(__s.data(), __s.data() + __s.size()); } -#ifdef _GLIBCXX_USE_CHAR8_T } -#endif + else + return path{ __source }; #else return path{ __source }; #endif diff --git a/libstdc++-v3/include/experimental/bits/fs_path.h b/libstdc++-v3/include/experimental/bits/fs_path.h index 91202e5b008..5ce012eec81 100644 --- a/libstdc++-v3/include/experimental/bits/fs_path.h +++ b/libstdc++-v3/include/experimental/bits/fs_path.h @@ -644,8 +644,22 @@ namespace __detail /// Create a path from a UTF-8-encoded sequence of char #ifdef _GLIBCXX_FILESYSTEM_IS_WINDOWS + inline path + __u8path(const string& __s, char) + { + return filesystem::u8path(__s.data(), __s.data() + __s.size()); + } + template - inline path + inline __enable_if_t::value, path> + __u8path(const _Source& __source, char) + { + std::string __s = __source; + return filesystem::u8path(__s.data(), __s.data() + __s.size()); + } + + template + inline __enable_if_t::value, path> __u8path(const _Source& __source, char) { std::string __s = path::_S_string_from_iter(__source); @@ -733,8 +747,21 @@ namespace __detail struct path::_Cvt { #ifdef _GLIBCXX_FILESYSTEM_IS_WINDOWS +#ifdef _GLIBCXX_USE_CHAR8_T + static string_type + _S_wconvert(const char8_t* __f, const char8_t* __l, const char8_t*) + { + const char* __f2 = (const char*)__f; + const char* __l2 = (const char*)__l; + std::wstring __wstr; + std::codecvt_utf8_utf16 __wcvt; + if (__str_codecvt_in_all(__f2, __l2, __wstr, __wcvt)) + return __wstr; + } +#endif + static string_type - _S_wconvert(const char* __f, const char* __l, true_type) + _S_wconvert(const char* __f, const char* __l, const char*) { using _Cvt = std::codecvt; const auto& __cvt = std::use_facet<_Cvt>(std::locale{}); @@ -747,36 +774,29 @@ namespace __detail } static string_type - _S_wconvert(const _CharT* __f, const _CharT* __l, false_type) + _S_wconvert(const _CharT* __f, const _CharT* __l, const void*) { -#ifdef _GLIBCXX_USE_CHAR8_T - if constexpr (is_same<_CharT, char8_t>::value) - return _S_wconvert((const char*)__f, (const char*)__l, true_type()); - else -#endif + struct _UCvt : std::codecvt<_CharT, char, std::mbstate_t> + { } __cvt; + std::string __str; + if (__str_codecvt_out_all(__f, __l, __str, __cvt)) { - struct _UCvt : std::codecvt<_CharT, char, std::mbstate_t> - { } __cvt; - std::string __str; - if (__str_codecvt_out_all(__f, __l, __str, __cvt)) - { - const char* __f2 = __str.data(); - const char* __l2 = __f2 + __str.size(); - std::codecvt_utf8_utf16 __wcvt; - std::wstring __wstr; - if (__str_codecvt_in_all(__f2, __l2, __wstr, __wcvt)) - return __wstr; - } - _GLIBCXX_THROW_OR_ABORT(filesystem_error( - "Cannot convert character sequence", - std::make_error_code(errc::illegal_byte_sequence))); + const char* __f2 = __str.data(); + const char* __l2 = __f2 + __str.size(); + std::codecvt_utf8_utf16 __wcvt; + std::wstring __wstr; + if (__str_codecvt_in_all(__f2, __l2, __wstr, __wcvt)) + return __wstr; } + _GLIBCXX_THROW_OR_ABORT(filesystem_error( + "Cannot convert character sequence", + std::make_error_code(errc::illegal_byte_sequence))); } static string_type _S_convert(const _CharT* __f, const _CharT* __l) { - return _S_wconvert(__f, __l, is_same<_CharT, char>{}); + return _S_wconvert(__f, __l, (const _CharT*)nullptr); } #else static string_type @@ -786,19 +806,17 @@ namespace __detail if constexpr (is_same<_CharT, char8_t>::value) return string_type(__f, __l); else - { #endif + { struct _UCvt : std::codecvt<_CharT, char, std::mbstate_t> { } __cvt; std::string __str; if (__str_codecvt_out_all(__f, __l, __str, __cvt)) return __str; -#ifdef _GLIBCXX_USE_CHAR8_T + _GLIBCXX_THROW_OR_ABORT(filesystem_error( + "Cannot convert character sequence", + std::make_error_code(errc::illegal_byte_sequence))); } -#endif - _GLIBCXX_THROW_OR_ABORT(filesystem_error( - "Cannot convert character sequence", - std::make_error_code(errc::illegal_byte_sequence))); } #endif -- 2.30.2