From: Jonathan Wakely Date: Mon, 17 Jun 2019 14:19:04 +0000 (+0100) Subject: PR libstdc++/90281 Fix string conversions for filesystem::path X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=26b1320ee5e2e9e107e092162d1c82b682504534;p=gcc.git PR libstdc++/90281 Fix string conversions for filesystem::path Fix several bugs in the encoding conversions for filesystem::path that prevent conversion of Unicode characters outside the Basic Multilingual Plane, and prevent returning basic_string specializations with alternative allocator types. The std::codecvt_utf8 class template is not suitable for UTF-16 conversions because it uses UCS-2 instead. For conversions between UTF-8 and UTF-16 either std::codecvt or codecvt_utf8_utf16 must be used. The __str_codecvt_in and __str_codecvt_out utilities do not return false on a partial conversion (e.g. for invalid or incomplete Unicode input). Add new helpers that treat partial conversions as errors, and use them for all filesystem::path conversions. PR libstdc++/90281 Fix string conversions for filesystem::path * include/bits/fs_path.h (u8path) [_GLIBCXX_FILESYSTEM_IS_WINDOWS]: Use codecvt_utf8_utf16 instead of codecvt_utf8. Use __str_codecvt_in_all to fail for partial conversions and throw on error. [!_GLIBCXX_FILESYSTEM_IS_WINDOWS && _GLIBCXX_USE_CHAR8_T] (path::_Cvt): Add explicit specialization. [_GLIBCXX_FILESYSTEM_IS_WINDOWS] (path::_Cvt::_S_wconvert): Remove overloads. [_GLIBCXX_FILESYSTEM_IS_WINDOWS] (path::_Cvt::_S_convert): Use if-constexpr instead of dispatching to _S_wconvert. Use codecvt instead of codecvt_utf8. Use __str_codecvt_in_all and __str_codecvt_out_all. [!_GLIBCXX_FILESYSTEM_IS_WINDOWS] (path::_Cvt::_S_convert): Use codecvt instead of codecvt_utf8. Use __str_codecvt_out_all. (path::_S_str_convert) [_GLIBCXX_FILESYSTEM_IS_WINDOWS]: Use codecvt_utf8_utf16 instead of codecvt_utf8. Construct return values with allocator. Use __str_codecvt_out_all. Fallthrough to POSIX code after converting to UTF-8. (path::_S_str_convert): Use codecvt instead of codecvt_utf8. Use __str_codecvt_in_all. (path::string): Fix initialization of string types with different allocators. (path::u8string) [_GLIBCXX_FILESYSTEM_IS_WINDOWS]: Use codecvt_utf8_utf16 instead of codecvt_utf8. Use __str_codecvt_out_all. * include/bits/locale_conv.h (__do_str_codecvt): Reorder static and runtime conditions. (__str_codecvt_out_all, __str_codecvt_in_all): New functions that return false for partial conversions. * include/experimental/bits/fs_path.h (u8path): [_GLIBCXX_FILESYSTEM_IS_WINDOWS]: Implement correctly for mingw. [_GLIBCXX_FILESYSTEM_IS_WINDOWS] (path::_Cvt::_S_wconvert): Add missing handling for char8_t. Use codecvt and codecvt_utf8_utf16 instead of codecvt_utf8. Use __str_codecvt_in_all and __str_codecvt_out_all. [!_GLIBCXX_FILESYSTEM_IS_WINDOWS] (path::_Cvt::_S_convert): Use codecvt instead of codecvt_utf8. Use __str_codecvt_out_all. (path::string) [_GLIBCXX_FILESYSTEM_IS_WINDOWS]: Use codecvt_utf8_utf16 instead of codecvt_utf8. Construct return values with allocator. Use __str_codecvt_out_all and __str_codecvt_in_all. (path::string) [!_GLIBCXX_FILESYSTEM_IS_WINDOWS]: Use __str_codecvt_in_all. (path::u8string) [_GLIBCXX_FILESYSTEM_IS_WINDOWS]: Use codecvt_utf8_utf16 instead of codecvt_utf8. Use __str_codecvt_out_all. * src/c++17/fs_path.cc (path::_S_convert_loc): Use __str_codecvt_in_all. * src/filesystem/path.cc (path::_S_convert_loc): Likewise. * testsuite/27_io/filesystem/path/construct/90281.cc: New test. * testsuite/27_io/filesystem/path/factory/u8path.cc: New test. * testsuite/27_io/filesystem/path/native/string.cc: Test with empty strings and with Unicode characters outside the basic multilingual plane. * testsuite/27_io/filesystem/path/native/alloc.cc: New test. * testsuite/experimental/filesystem/path/construct/90281.cc: New test. * testsuite/experimental/filesystem/path/factory/u8path.cc: New test. * testsuite/experimental/filesystem/path/native/alloc.cc: New test. * testsuite/experimental/filesystem/path/native/string.cc: Test with empty strings and with Unicode characters outside the basic multilingual plane. From-SVN: r272385 --- diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog index 1bca26b8b8e..f05970ede20 100644 --- a/libstdc++-v3/ChangeLog +++ b/libstdc++-v3/ChangeLog @@ -1,3 +1,65 @@ +2019-06-17 Jonathan Wakely + + PR libstdc++/90281 Fix string conversions for filesystem::path + * include/bits/fs_path.h (u8path) [_GLIBCXX_FILESYSTEM_IS_WINDOWS]: + Use codecvt_utf8_utf16 instead of codecvt_utf8. Use + __str_codecvt_in_all to fail for partial conversions and throw on + error. + [!_GLIBCXX_FILESYSTEM_IS_WINDOWS && _GLIBCXX_USE_CHAR8_T] + (path::_Cvt): Add explicit specialization. + [_GLIBCXX_FILESYSTEM_IS_WINDOWS] (path::_Cvt::_S_wconvert): Remove + overloads. + [_GLIBCXX_FILESYSTEM_IS_WINDOWS] (path::_Cvt::_S_convert): Use + if-constexpr instead of dispatching to _S_wconvert. Use codecvt + instead of codecvt_utf8. Use __str_codecvt_in_all and + __str_codecvt_out_all. + [!_GLIBCXX_FILESYSTEM_IS_WINDOWS] (path::_Cvt::_S_convert): Use + codecvt instead of codecvt_utf8. Use __str_codecvt_out_all. + (path::_S_str_convert) [_GLIBCXX_FILESYSTEM_IS_WINDOWS]: Use + codecvt_utf8_utf16 instead of codecvt_utf8. Construct return values + with allocator. Use __str_codecvt_out_all. Fallthrough to POSIX code + after converting to UTF-8. + (path::_S_str_convert): Use codecvt instead of codecvt_utf8. Use + __str_codecvt_in_all. + (path::string): Fix initialization of string types with different + allocators. + (path::u8string) [_GLIBCXX_FILESYSTEM_IS_WINDOWS]: Use + codecvt_utf8_utf16 instead of codecvt_utf8. Use __str_codecvt_out_all. + * include/bits/locale_conv.h (__do_str_codecvt): Reorder static and + runtime conditions. + (__str_codecvt_out_all, __str_codecvt_in_all): New functions that + return false for partial conversions. + * include/experimental/bits/fs_path.h (u8path): + [_GLIBCXX_FILESYSTEM_IS_WINDOWS]: Implement correctly for mingw. + [_GLIBCXX_FILESYSTEM_IS_WINDOWS] (path::_Cvt::_S_wconvert): Add + missing handling for char8_t. Use codecvt and codecvt_utf8_utf16 + instead of codecvt_utf8. Use __str_codecvt_in_all and + __str_codecvt_out_all. + [!_GLIBCXX_FILESYSTEM_IS_WINDOWS] (path::_Cvt::_S_convert): Use + codecvt instead of codecvt_utf8. Use __str_codecvt_out_all. + (path::string) [_GLIBCXX_FILESYSTEM_IS_WINDOWS]: Use + codecvt_utf8_utf16 instead of codecvt_utf8. Construct return values + with allocator. Use __str_codecvt_out_all and __str_codecvt_in_all. + (path::string) [!_GLIBCXX_FILESYSTEM_IS_WINDOWS]: Use + __str_codecvt_in_all. + (path::u8string) [_GLIBCXX_FILESYSTEM_IS_WINDOWS]: Use + codecvt_utf8_utf16 instead of codecvt_utf8. Use __str_codecvt_out_all. + * src/c++17/fs_path.cc (path::_S_convert_loc): Use + __str_codecvt_in_all. + * src/filesystem/path.cc (path::_S_convert_loc): Likewise. + * testsuite/27_io/filesystem/path/construct/90281.cc: New test. + * testsuite/27_io/filesystem/path/factory/u8path.cc: New test. + * testsuite/27_io/filesystem/path/native/string.cc: Test with empty + strings and with Unicode characters outside the basic multilingual + plane. + * testsuite/27_io/filesystem/path/native/alloc.cc: New test. + * testsuite/experimental/filesystem/path/construct/90281.cc: New test. + * testsuite/experimental/filesystem/path/factory/u8path.cc: New test. + * testsuite/experimental/filesystem/path/native/alloc.cc: New test. + * testsuite/experimental/filesystem/path/native/string.cc: Test with + empty strings and with Unicode characters outside the basic + multilingual plane. + 2019-06-17 François Dumont Jonathan Wakely diff --git a/libstdc++-v3/include/bits/fs_path.h b/libstdc++-v3/include/bits/fs_path.h index cec35614b42..0a8ab0de2ff 100644 --- a/libstdc++-v3/include/bits/fs_path.h +++ b/libstdc++-v3/include/bits/fs_path.h @@ -628,22 +628,26 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11 -> decltype(filesystem::path(__first, __last, std::locale::classic())) { #ifdef _GLIBCXX_FILESYSTEM_IS_WINDOWS - codecvt_utf8 __cvt; + // XXX This assumes native wide encoding is UTF-16. + std::codecvt_utf8_utf16 __cvt; path::string_type __tmp; if constexpr (is_pointer_v<_InputIterator>) { - if (__str_codecvt_in(__first, __last, __tmp, __cvt)) + if (__str_codecvt_in_all(__first, __last, __tmp, __cvt)) return path{ __tmp }; } else { const std::string __u8str{__first, __last}; const char* const __ptr = __u8str.data(); - if (__str_codecvt_in(__ptr, __ptr + __u8str.size(), __tmp, __cvt)) + if (__str_codecvt_in_all(__ptr, __ptr + __u8str.size(), __tmp, __cvt)) return path{ __tmp }; } - return {}; + _GLIBCXX_THROW_OR_ABORT(filesystem_error( + "Cannot convert character sequence", + std::make_error_code(errc::illegal_byte_sequence))); #else + // This assumes native normal encoding is UTF-8. return path{ __first, __last }; #endif } @@ -723,72 +727,68 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11 { return string_type{__first, __last}; } }; +#if !defined _GLIBCXX_FILESYSTEM_IS_WINDOWS && defined _GLIBCXX_USE_CHAR8_T + // For POSIX converting from char8_t to char is also 'noconv' + template<> + struct path::_Cvt + { + template + static string_type + _S_convert(_Iter __first, _Iter __last) + { return string_type(__first, __last); } + }; +#endif + template struct path::_Cvt { -#ifdef _GLIBCXX_FILESYSTEM_IS_WINDOWS static string_type - _S_wconvert(const char* __f, const char* __l, true_type) + _S_convert(const _CharT* __f, const _CharT* __l) { - using _Cvt = std::codecvt; - const auto& __cvt = std::use_facet<_Cvt>(std::locale{}); +#ifdef _GLIBCXX_FILESYSTEM_IS_WINDOWS std::wstring __wstr; - if (__str_codecvt_in(__f, __l, __wstr, __cvt)) - return __wstr; - _GLIBCXX_THROW_OR_ABORT(filesystem_error( - "Cannot convert character sequence", - std::make_error_code(errc::illegal_byte_sequence))); - } - - static string_type - _S_wconvert(const _CharT* __f, const _CharT* __l, false_type) - { - std::codecvt_utf8<_CharT> __cvt; - std::string __str; - if (__str_codecvt_out(__f, __l, __str, __cvt)) + if constexpr (is_same_v<_CharT, char>) { - const char* __f2 = __str.data(); - const char* __l2 = __f2 + __str.size(); - std::codecvt_utf8 __wcvt; - std::wstring __wstr; - if (__str_codecvt_in(__f2, __l2, __wstr, __wcvt)) + struct _UCvt : std::codecvt + { } __cvt; + if (__str_codecvt_in_all(__f, __l, __wstr, __cvt)) return __wstr; } - _GLIBCXX_THROW_OR_ABORT(filesystem_error( - "Cannot convert character sequence", - std::make_error_code(errc::illegal_byte_sequence))); - } - - static string_type - _S_convert(const _CharT* __f, const _CharT* __l) - { - return _S_wconvert(__f, __l, is_same<_CharT, char>{}); - } -#else - static string_type - _S_convert(const _CharT* __f, const _CharT* __l) - { #ifdef _GLIBCXX_USE_CHAR8_T - if constexpr (is_same_v<_CharT, char8_t>) + else if constexpr (is_same_v<_CharT, char8_t>) { - string_type __str(__f, __l); - return __str; + const char* __f2 = (const char*)__f; + const char* __l2 = (const char*)__l; + std::codecvt_utf8_utf16 __wcvt; + if (__str_codecvt_in_all(__f2, __l2, __wstr, __wcvt)) + return __wstr; } - else - { #endif - std::codecvt_utf8<_CharT> __cvt; + else // char16_t or char32_t + { + struct _UCvt : std::codecvt<_CharT, char, std::mbstate_t> + { } __cvt; std::string __str; - if (__str_codecvt_out(__f, __l, __str, __cvt)) - return __str; -#ifdef _GLIBCXX_USE_CHAR8_T + if (__str_codecvt_out_all(__f, __l, __str, __cvt)) + { + const char* __f2 = __str.data(); + const char* __l2 = __f2 + __str.size(); + std::codecvt_utf8_utf16 __wcvt; + if (__str_codecvt_in_all(__f2, __l2, __wstr, __wcvt)) + return __wstr; + } } +#else // ! windows + struct _UCvt : std::codecvt<_CharT, char, std::mbstate_t> + { } __cvt; + std::string __str; + if (__str_codecvt_out_all(__f, __l, __str, __cvt)) + return __str; #endif _GLIBCXX_THROW_OR_ABORT(filesystem_error( "Cannot convert character sequence", std::make_error_code(errc::illegal_byte_sequence))); } -#endif static string_type _S_convert(_CharT* __f, _CharT* __l) @@ -971,61 +971,51 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11 std::basic_string<_CharT, _Traits, _Allocator> path::_S_str_convert(const string_type& __str, const _Allocator& __a) { - if (__str.size() == 0) - return std::basic_string<_CharT, _Traits, _Allocator>(__a); + static_assert(!is_same_v<_CharT, value_type>); - const value_type* __first = __str.data(); - const value_type* __last = __first + __str.size(); + using _WString = basic_string<_CharT, _Traits, _Allocator>; + + if (__str.size() == 0) + return _WString(__a); #ifdef _GLIBCXX_FILESYSTEM_IS_WINDOWS + // First convert native string from UTF-16 to to UTF-8. + // XXX This assumes that the execution wide-character set is UTF-16. + std::codecvt_utf8_utf16 __cvt; + using _CharAlloc = __alloc_rebind<_Allocator, char>; using _String = basic_string, _CharAlloc>; - using _WString = basic_string<_CharT, _Traits, _Allocator>; - - // use codecvt_utf8 to convert native string to UTF-8 - codecvt_utf8 __cvt; _String __u8str{_CharAlloc{__a}}; - if (__str_codecvt_out(__first, __last, __u8str, __cvt)) - { - if constexpr (is_same_v<_CharT, char>) - return __u8str; -#ifdef _GLIBCXX_USE_CHAR8_T - else if constexpr (is_same_v<_CharT, char8_t>) - { - const char* __f = __u8str.data(); - const char* __l = __f + __u8str.size(); - _WString __wstr(__f, __l); - return __wstr; - } -#endif - else - { - _WString __wstr; - // use codecvt_utf8<_CharT> to convert UTF-8 to wide string - codecvt_utf8<_CharT> __cvt; - const char* __f = __u8str.data(); - const char* __l = __f + __u8str.size(); - if (__str_codecvt_in(__f, __l, __wstr, __cvt)) - return __wstr; - } - } + const value_type* __wfirst = __str.data(); + const value_type* __wlast = __wfirst + __str.size(); + if (__str_codecvt_out_all(__wfirst, __wlast, __u8str, __cvt)) { + if constexpr (is_same_v<_CharT, char>) + return __u8str; // XXX assumes native ordinary encoding is UTF-8. + else { + + const char* __first = __u8str.data(); + const char* __last = __first + __u8str.size(); #else + const value_type* __first = __str.data(); + const value_type* __last = __first + __str.size(); +#endif + + // Convert UTF-8 string to requested format. #ifdef _GLIBCXX_USE_CHAR8_T if constexpr (is_same_v<_CharT, char8_t>) - { - basic_string<_CharT, _Traits, _Allocator> __wstr{__first, __last, __a}; - return __wstr; - } + return _WString(__first, __last, __a); else - { #endif - codecvt_utf8<_CharT> __cvt; - basic_string<_CharT, _Traits, _Allocator> __wstr{__a}; - if (__str_codecvt_in(__first, __last, __wstr, __cvt)) + { + // Convert UTF-8 to wide string. + _WString __wstr(__a); + struct _UCvt : std::codecvt<_CharT, char, std::mbstate_t> { } __cvt; + if (__str_codecvt_in_all(__first, __last, __wstr, __cvt)) return __wstr; -#ifdef _GLIBCXX_USE_CHAR8_T } -#endif + +#ifdef _GLIBCXX_FILESYSTEM_IS_WINDOWS + } } #endif _GLIBCXX_THROW_OR_ABORT(filesystem_error( "Cannot convert character sequence", @@ -1038,7 +1028,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11 path::string(const _Allocator& __a) const { if constexpr (is_same_v<_CharT, value_type>) - return { _M_pathname, __a }; + return { _M_pathname.c_str(), _M_pathname.length(), __a }; else return _S_str_convert<_CharT, _Traits>(_M_pathname, __a); } @@ -1060,11 +1050,11 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11 { #ifdef _GLIBCXX_FILESYSTEM_IS_WINDOWS std::string __str; - // convert from native encoding to UTF-8 - codecvt_utf8 __cvt; + // convert from native wide encoding (assumed to be UTF-16) to UTF-8 + std::codecvt_utf8_utf16 __cvt; const value_type* __first = _M_pathname.data(); const value_type* __last = __first + _M_pathname.size(); - if (__str_codecvt_out(__first, __last, __str, __cvt)) + if (__str_codecvt_out_all(__first, __last, __str, __cvt)) return __str; _GLIBCXX_THROW_OR_ABORT(filesystem_error( "Cannot convert character sequence", diff --git a/libstdc++-v3/include/bits/locale_conv.h b/libstdc++-v3/include/bits/locale_conv.h index 4cb9c39ebb4..b29954590e0 100644 --- a/libstdc++-v3/include/bits/locale_conv.h +++ b/libstdc++-v3/include/bits/locale_conv.h @@ -86,23 +86,19 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION return false; } - if (__result == codecvt_base::noconv) - { - // The codecvt facet will only return noconv when the types are - // the same, so avoid instantiating basic_string::assign otherwise - if _GLIBCXX17_CONSTEXPR (is_same()) - { - __outstr.assign(__first, __last); - __count = __last - __first; - } - } - else - { - __outstr.resize(__outchars); - __count = __next - __first; - } + // The codecvt facet will only return noconv when the types are + // the same, so avoid instantiating basic_string::assign otherwise + if _GLIBCXX17_CONSTEXPR (is_same()) + if (__result == codecvt_base::noconv) + { + __outstr.assign(__first, __last); + __count = __last - __first; + return true; + } + __outstr.resize(__outchars); + __count = __next - __first; return true; } @@ -124,6 +120,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION __count, __fn); } + // As above, but with no __count parameter template inline bool __str_codecvt_in(const char* __first, const char* __last, @@ -135,6 +132,19 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION return __str_codecvt_in(__first, __last, __outstr, __cvt, __state, __n); } + // As above, but returns false for partial conversion + template + inline bool + __str_codecvt_in_all(const char* __first, const char* __last, + basic_string<_CharT, _Traits, _Alloc>& __outstr, + const codecvt<_CharT, char, _State>& __cvt) + { + _State __state = {}; + size_t __n; + return __str_codecvt_in(__first, __last, __outstr, __cvt, __state, __n) + && (__n == (__last - __first)); + } + // Convert wide character string to narrow. template inline bool @@ -153,6 +163,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION __count, __fn); } + // As above, but with no __count parameter template inline bool __str_codecvt_out(const _CharT* __first, const _CharT* __last, @@ -164,6 +175,19 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION return __str_codecvt_out(__first, __last, __outstr, __cvt, __state, __n); } + // As above, but returns false for partial conversions + template + inline bool + __str_codecvt_out_all(const _CharT* __first, const _CharT* __last, + basic_string& __outstr, + const codecvt<_CharT, char, _State>& __cvt) + { + _State __state = {}; + size_t __n; + return __str_codecvt_out(__first, __last, __outstr, __cvt, __state, __n) + && (__n == (__last - __first)); + } + #ifdef _GLIBCXX_USE_CHAR8_T // Convert wide character string to narrow. diff --git a/libstdc++-v3/include/experimental/bits/fs_path.h b/libstdc++-v3/include/experimental/bits/fs_path.h index 9a68a272e34..a3655f616f2 100644 --- a/libstdc++-v3/include/experimental/bits/fs_path.h +++ b/libstdc++-v3/include/experimental/bits/fs_path.h @@ -580,28 +580,38 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11 } /// Create a path from a UTF-8-encoded sequence of char - // TODO constrain with _Path and __value_type_is_char - template + // TODO constrain with _Path and __value_type_is_char + template inline path - u8path(const _Source& __source) + u8path(_InputIterator __first, _InputIterator __last) { #ifdef _GLIBCXX_FILESYSTEM_IS_WINDOWS - return path{ path::string_type{__source} }; + // XXX This assumes native wide encoding is UTF-16. + std::codecvt_utf8_utf16 __cvt; + path::string_type __tmp; + const std::string __u8str{__first, __last}; + const char* const __ptr = __u8str.data(); + if (__str_codecvt_in_all(__ptr, __ptr + __u8str.size(), __tmp, __cvt)) + return path{ __tmp }; + _GLIBCXX_THROW_OR_ABORT(filesystem_error( + "Cannot convert character sequence", + std::make_error_code(errc::illegal_byte_sequence))); #else - return path{ __source }; + return path{ __first, __last }; #endif } /// Create a path from a UTF-8-encoded sequence of char - // TODO constrain with _Path and __value_type_is_char - template + // TODO constrain with _Path and __value_type_is_char + template inline path - u8path(_InputIterator __first, _InputIterator __last) + u8path(const _Source& __source) { #ifdef _GLIBCXX_FILESYSTEM_IS_WINDOWS - return path{ path::string_type{__first, __last} }; + std::string __s = path::_S_string_from_iter(__source); + return filesystem::u8path(__s.data(), __s.data() + __s.size()); #else - return path{ __first, __last }; + return path{ __source }; #endif } @@ -668,7 +678,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11 using _Cvt = std::codecvt; const auto& __cvt = std::use_facet<_Cvt>(std::locale{}); std::wstring __wstr; - if (__str_codecvt_in(__f, __l, __wstr, __cvt)) + if (__str_codecvt_in_all(__f, __l, __wstr, __cvt)) return __wstr; _GLIBCXX_THROW_OR_ABORT(filesystem_error( "Cannot convert character sequence", @@ -678,20 +688,28 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11 static string_type _S_wconvert(const _CharT* __f, const _CharT* __l, false_type) { - std::codecvt_utf8<_CharT> __cvt; - std::string __str; - if (__str_codecvt_out(__f, __l, __str, __cvt)) +#ifdef _GLIBCXX_USE_CHAR8_T + if constexpr (is_same<_CharT, char8_t>::value) + return _S_wconvert((const char*)__f, (const char*)__l, true_type()); + else +#endif { - const char* __f2 = __str.data(); - const char* __l2 = __f2 + __str.size(); - std::codecvt_utf8 __wcvt; - std::wstring __wstr; - if (__str_codecvt_in(__f2, __l2, __wstr, __wcvt)) - return __wstr; + struct _UCvt : std::codecvt<_CharT, char, std::mbstate_t> + { } __cvt; + std::string __str; + if (__str_codecvt_out_all(__f, __l, __str, __cvt)) + { + const char* __f2 = __str.data(); + const char* __l2 = __f2 + __str.size(); + std::codecvt_utf8_utf16 __wcvt; + std::wstring __wstr; + if (__str_codecvt_in_all(__f2, __l2, __wstr, __wcvt)) + return __wstr; + } + _GLIBCXX_THROW_OR_ABORT(filesystem_error( + "Cannot convert character sequence", + std::make_error_code(errc::illegal_byte_sequence))); } - _GLIBCXX_THROW_OR_ABORT(filesystem_error( - "Cannot convert character sequence", - std::make_error_code(errc::illegal_byte_sequence))); } static string_type @@ -705,16 +723,14 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11 { #ifdef _GLIBCXX_USE_CHAR8_T if constexpr (is_same<_CharT, char8_t>::value) - { - string_type __str(__f, __l); - return __str; - } + return string_type(__f, __l); else { #endif - std::codecvt_utf8<_CharT> __cvt; + struct _UCvt : std::codecvt<_CharT, char, std::mbstate_t> + { } __cvt; std::string __str; - if (__str_codecvt_out(__f, __l, __str, __cvt)) + if (__str_codecvt_out_all(__f, __l, __str, __cvt)) return __str; #ifdef _GLIBCXX_USE_CHAR8_T } @@ -887,18 +903,20 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11 if (is_same<_CharT, value_type>::value) return { _M_pathname.begin(), _M_pathname.end(), __a }; + using _WString = basic_string<_CharT, _Traits, _Allocator>; + const value_type* __first = _M_pathname.data(); const value_type* __last = __first + _M_pathname.size(); #ifdef _GLIBCXX_FILESYSTEM_IS_WINDOWS using _CharAlloc = __alloc_rebind<_Allocator, char>; using _String = basic_string, _CharAlloc>; - using _WString = basic_string<_CharT, _Traits, _Allocator>; - // use codecvt_utf8 to convert native string to UTF-8 - codecvt_utf8 __cvt; + // First convert native string from UTF-16 to to UTF-8. + // XXX This assumes that the execution wide-character set is UTF-16. + codecvt_utf8_utf16 __cvt; _String __u8str{_CharAlloc{__a}}; - if (__str_codecvt_out(__first, __last, __u8str, __cvt)) + if (__str_codecvt_out_all(__first, __last, __u8str, __cvt)) { struct { @@ -916,41 +934,35 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11 return std::__addressof(__to); } else - { #endif - // use codecvt_utf8<_CharT> to convert UTF-8 to wide string - codecvt_utf8<_CharT> __cvt; + { + // Convert UTF-8 to wide string. + struct _UCvt : std::codecvt<_CharT, char, std::mbstate_t> + { } __cvt; const char* __f = __from.data(); const char* __l = __f + __from.size(); - if (__str_codecvt_in(__f, __l, __to, __cvt)) + if (__str_codecvt_in_all(__f, __l, __to, __cvt)) return std::__addressof(__to); -#ifdef _GLIBCXX_USE_CHAR8_T } -#endif return nullptr; } } __dispatch; - _WString __wstr; + _WString __wstr(__a); if (auto* __p = __dispatch(__u8str, __wstr, is_same<_CharT, char>{})) return *__p; } #else #ifdef _GLIBCXX_USE_CHAR8_T if constexpr (is_same<_CharT, char8_t>::value) - { - basic_string<_CharT, _Traits, _Allocator> __wstr{__first, __last, __a}; - return __wstr; - } + return _WString(__first, __last, __a); else - { #endif - codecvt_utf8<_CharT> __cvt; - basic_string<_CharT, _Traits, _Allocator> __wstr{__a}; - if (__str_codecvt_in(__first, __last, __wstr, __cvt)) + { + struct _UCvt : std::codecvt<_CharT, char, std::mbstate_t> { } __cvt; + _WString __wstr(__a); + if (__str_codecvt_in_all(__first, __last, __wstr, __cvt)) return __wstr; -#ifdef _GLIBCXX_USE_CHAR8_T } -#endif #endif _GLIBCXX_THROW_OR_ABORT(filesystem_error( "Cannot convert character sequence", @@ -974,11 +986,11 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11 { #ifdef _GLIBCXX_FILESYSTEM_IS_WINDOWS std::string __str; - // convert from native encoding to UTF-8 - codecvt_utf8 __cvt; + // convert from native wide encoding (assumed to be UTF-16) to UTF-8 + std::codecvt_utf8_utf16 __cvt; const value_type* __first = _M_pathname.data(); const value_type* __last = __first + _M_pathname.size(); - if (__str_codecvt_out(__first, __last, __str, __cvt)) + if (__str_codecvt_out_all(__first, __last, __str, __cvt)) return __str; _GLIBCXX_THROW_OR_ABORT(filesystem_error( "Cannot convert character sequence", diff --git a/libstdc++-v3/src/c++17/fs_path.cc b/libstdc++-v3/src/c++17/fs_path.cc index c438ddc61fd..82ac736f82a 100644 --- a/libstdc++-v3/src/c++17/fs_path.cc +++ b/libstdc++-v3/src/c++17/fs_path.cc @@ -1894,7 +1894,7 @@ path::_S_convert_loc(const char* __first, const char* __last, #if _GLIBCXX_USE_WCHAR_T auto& __cvt = std::use_facet>(__loc); basic_string __ws; - if (!__str_codecvt_in(__first, __last, __ws, __cvt)) + if (!__str_codecvt_in_all(__first, __last, __ws, __cvt)) _GLIBCXX_THROW_OR_ABORT(filesystem_error( "Cannot convert character sequence", std::make_error_code(errc::illegal_byte_sequence))); diff --git a/libstdc++-v3/src/filesystem/path.cc b/libstdc++-v3/src/filesystem/path.cc index dfc3bd53c00..edf7c67c01b 100644 --- a/libstdc++-v3/src/filesystem/path.cc +++ b/libstdc++-v3/src/filesystem/path.cc @@ -500,7 +500,7 @@ path::_S_convert_loc(const char* __first, const char* __last, #if _GLIBCXX_USE_WCHAR_T auto& __cvt = std::use_facet>(__loc); basic_string __ws; - if (!__str_codecvt_in(__first, __last, __ws, __cvt)) + if (!__str_codecvt_in_all(__first, __last, __ws, __cvt)) _GLIBCXX_THROW_OR_ABORT(filesystem_error( "Cannot convert character sequence", std::make_error_code(errc::illegal_byte_sequence))); diff --git a/libstdc++-v3/testsuite/27_io/filesystem/path/construct/90281.cc b/libstdc++-v3/testsuite/27_io/filesystem/path/construct/90281.cc new file mode 100644 index 00000000000..e0d10e56e8c --- /dev/null +++ b/libstdc++-v3/testsuite/27_io/filesystem/path/construct/90281.cc @@ -0,0 +1,53 @@ +// Copyright (C) 2019 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 3, or (at your option) +// any later version. + +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License along +// with this library; see the file COPYING3. If not see +// . + +// { dg-options "-std=gnu++17" } +// { dg-do run { target c++17 } } + +#include +#include + +namespace fs = std::filesystem; + +template +const C* code_units() +{ + if constexpr (std::is_same_v) + return "\xf0\x9d\x84\x9e"; + else + return L"\xD834\xDD1E"; +} + +// PR libstdc++/90281 +void +test01() +{ + const fs::path::string_type expected = code_units(); + + fs::path p8 = fs::u8path(u8"\U0001D11E"); + VERIFY( p8.native() == expected ); + fs::path p16(u"\U0001D11E"); + VERIFY( p16.native() == expected ); + fs::path p32(U"\U0001D11E"); + VERIFY( p32.native() == expected ); +} + +int +main() +{ + test01(); +} diff --git a/libstdc++-v3/testsuite/27_io/filesystem/path/factory/u8path.cc b/libstdc++-v3/testsuite/27_io/filesystem/path/factory/u8path.cc new file mode 100644 index 00000000000..aff722b5867 --- /dev/null +++ b/libstdc++-v3/testsuite/27_io/filesystem/path/factory/u8path.cc @@ -0,0 +1,67 @@ +// Copyright (C) 2019 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 3, or (at your option) +// any later version. + +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License along +// with this library; see the file COPYING3. If not see +// . + +// { dg-options "-std=gnu++17" } +// { dg-do run { target c++17 } } + +#include +#include + +namespace fs = std::filesystem; + +void +test01() +{ + fs::path p = fs::u8path(""); + VERIFY( p.empty() ); + + p = fs::u8path("filename"); + VERIFY( p.u8string() == u8"filename" ); + + p = fs::u8path("\xf0\x9d\x84\x9e"); + VERIFY( p.u8string() == u8"\U0001D11E" ); +} + +void +test02() +{ + // These calls to u8path are undefined, because they fail to meet the + // requirement that the input is valid UTF-8 data. For Windows u8path + // will fail. For POSIX constructing an invalid path appears to work, + // but will fail when converted to a different encoding. + + try { + auto p = fs::u8path("\xf0\x9d"); // incomplete surrogate pair + p.u16string(); + VERIFY( false ); + } catch(const fs::filesystem_error&) { + } + + try { + auto p = fs::u8path("\xf0"); // incomplete multibyte character + p.u16string(); + VERIFY( false ); + } catch(const fs::filesystem_error&) { + } +} + +int +main() +{ + test01(); + test02(); +} diff --git a/libstdc++-v3/testsuite/27_io/filesystem/path/native/alloc.cc b/libstdc++-v3/testsuite/27_io/filesystem/path/native/alloc.cc new file mode 100644 index 00000000000..bdb52a20e14 --- /dev/null +++ b/libstdc++-v3/testsuite/27_io/filesystem/path/native/alloc.cc @@ -0,0 +1,92 @@ +// Copyright (C) 2016-2019 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 3, or (at your option) +// any later version. + +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License along +// with this library; see the file COPYING3. If not see +// . + +// { dg-options "-std=gnu++17" } +// { dg-do run { target c++17 } } + +#include +#include +#include +#include + +template + using alloc = __gnu_test::uneq_allocator; + +void +test01() +{ + using namespace std::filesystem; + path p; + + auto str = p.string(alloc(1)); + VERIFY( str == "" ); + VERIFY( str.get_allocator() == alloc(1) ); + +#ifdef _GLIBCXX_USE_CHAR8_T + auto str8 = p.string(alloc(1)); + VERIFY( str8 == u8"" ); + VERIFY( str8.get_allocator() == alloc(1) ); +#endif + + auto strw = p.string(alloc(2)); + VERIFY( strw == L"" ); + VERIFY( strw.get_allocator() == alloc(2) ); + + auto str16 = p.string(alloc(3)); + VERIFY( str16 == u"" ); + VERIFY( str16.get_allocator() == alloc(3) ); + + auto str32 = p.string(alloc(4)); + VERIFY( str32 == U"" ); + VERIFY( str32.get_allocator() == alloc(4) ); +} + +void +test02() +{ + using namespace std::filesystem; + path p = "abcdefghijklmnopqrstuvwxyz"; + + auto str = p.string(alloc(1)); + VERIFY( str == "abcdefghijklmnopqrstuvwxyz" ); + VERIFY( str.get_allocator() == alloc(1) ); + +#ifdef _GLIBCXX_USE_CHAR8_T + auto str8 = p.string(alloc(1)); + VERIFY( str8 == u8"abcdefghijklmnopqrstuvwxyz" ); + VERIFY( str8.get_allocator() == alloc(1) ); +#endif + + auto strw = p.string(alloc(2)); + VERIFY( strw == L"abcdefghijklmnopqrstuvwxyz" ); + VERIFY( strw.get_allocator() == alloc(2) ); + + auto str16 = p.string(alloc(3)); + VERIFY( str16 == u"abcdefghijklmnopqrstuvwxyz" ); + VERIFY( str16.get_allocator() == alloc(3) ); + + auto str32 = p.string(alloc(4)); + VERIFY( str32 == U"abcdefghijklmnopqrstuvwxyz" ); + VERIFY( str32.get_allocator() == alloc(4) ); +} + +int +main() +{ + test01(); + test02(); +} diff --git a/libstdc++-v3/testsuite/27_io/filesystem/path/native/string.cc b/libstdc++-v3/testsuite/27_io/filesystem/path/native/string.cc index 4d45c7e15df..2ed58e379ef 100644 --- a/libstdc++-v3/testsuite/27_io/filesystem/path/native/string.cc +++ b/libstdc++-v3/testsuite/27_io/filesystem/path/native/string.cc @@ -62,9 +62,36 @@ test02() VERIFY( str32 == p.u32string() ); } +void +test03() +{ + std::filesystem::path p; + auto str8 = p.u8string(); + VERIFY( str8 == u8"" ); + auto str16 = p.u16string(); + VERIFY( str16 == u"" ); + auto str32 = p.u32string(); + VERIFY( str32 == U"" ); +} + +void +test04() +{ + // PR libstdc++/90281 + auto p = std::filesystem::u8path("\xf0\x9d\x84\x9e"); + auto str8 = p.u8string(); + VERIFY( str8 == u8"\U0001D11E" ); + auto str16 = p.u16string(); + VERIFY( str16 == u"\U0001D11E" ); + auto str32 = p.u32string(); + VERIFY( str32 == U"\U0001D11E" ); +} + int main() { test01(); test02(); + test03(); + test04(); } diff --git a/libstdc++-v3/testsuite/experimental/filesystem/path/construct/90281.cc b/libstdc++-v3/testsuite/experimental/filesystem/path/construct/90281.cc new file mode 100644 index 00000000000..3640b00ec53 --- /dev/null +++ b/libstdc++-v3/testsuite/experimental/filesystem/path/construct/90281.cc @@ -0,0 +1,55 @@ +// Copyright (C) 2019 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 3, or (at your option) +// any later version. + +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License along +// with this library; see the file COPYING3. If not see +// . + +// { dg-options "-lstdc++fs" } +// { dg-do run { target c++11 } } +// { dg-require-filesystem-ts "" } + +#include +#include + +namespace fs = std::experimental::filesystem; + +template::value> +typename std::enable_if::type +code_units() +{ return "\xf0\x9d\x84\x9e"; } + +template::value> +typename std::enable_if::type +code_units() +{ return L"\xD834\xDD1E"; } + +// PR libstdc++/90281 +void +test01() +{ + const fs::path::string_type expected = code_units(); + + fs::path p8 = fs::u8path(u8"\U0001D11E"); + VERIFY( p8.native() == expected ); + fs::path p16(u"\U0001D11E"); + VERIFY( p16.native() == expected ); + fs::path p32(U"\U0001D11E"); + VERIFY( p32.native() == expected ); +} + +int +main() +{ + test01(); +} diff --git a/libstdc++-v3/testsuite/experimental/filesystem/path/factory/u8path.cc b/libstdc++-v3/testsuite/experimental/filesystem/path/factory/u8path.cc new file mode 100644 index 00000000000..bdeb3946a15 --- /dev/null +++ b/libstdc++-v3/testsuite/experimental/filesystem/path/factory/u8path.cc @@ -0,0 +1,68 @@ +// Copyright (C) 2019 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 3, or (at your option) +// any later version. + +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License along +// with this library; see the file COPYING3. If not see +// . + +// { dg-options "-lstdc++fs" } +// { dg-do run { target c++11 } } +// { dg-require-filesystem-ts "" } + +#include +#include + +namespace fs = std::experimental::filesystem; + +void +test01() +{ + fs::path p = fs::u8path(""); + VERIFY( p.empty() ); + + p = fs::u8path("filename"); + VERIFY( p.u8string() == u8"filename" ); + + p = fs::u8path("\xf0\x9d\x84\x9e"); + VERIFY( p.u8string() == u8"\U0001D11E" ); +} + +void +test02() +{ + // These calls to u8path are undefined, because they fail to meet the + // requirement that the input is valid UTF-8 data. For Windows u8path + // will fail. For POSIX constructing an invalid path appears to work, + // but will fail when converted to a different encoding. + + try { + auto p = fs::u8path("\xf0\x9d"); // incomplete surrogate pair + p.u16string(); + VERIFY( false ); + } catch(const fs::filesystem_error&) { + } + + try { + auto p = fs::u8path("\xf0"); // incomplete multibyte character + p.u16string(); + VERIFY( false ); + } catch(const fs::filesystem_error&) { + } +} + +int +main() +{ + test01(); + test02(); +} diff --git a/libstdc++-v3/testsuite/experimental/filesystem/path/native/alloc.cc b/libstdc++-v3/testsuite/experimental/filesystem/path/native/alloc.cc new file mode 100644 index 00000000000..ef9ea67928f --- /dev/null +++ b/libstdc++-v3/testsuite/experimental/filesystem/path/native/alloc.cc @@ -0,0 +1,93 @@ +// Copyright (C) 2016-2019 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 3, or (at your option) +// any later version. + +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License along +// with this library; see the file COPYING3. If not see +// . + +// { dg-options "-lstdc++fs" } +// { dg-do run { target c++11 } } +// { dg-require-filesystem-ts "" } + +#include +#include +#include +#include + +template + using alloc = __gnu_test::uneq_allocator; + +void +test01() +{ + using namespace std::experimental::filesystem; + path p; + + auto str = p.string(alloc(1)); + VERIFY( str == "" ); + VERIFY( str.get_allocator() == alloc(1) ); + +#ifdef _GLIBCXX_USE_CHAR8_T + auto str8 = p.string(alloc(1)); + VERIFY( str8 == u8"" ); + VERIFY( str8.get_allocator() == alloc(1) ); +#endif + + auto strw = p.string(alloc(2)); + VERIFY( strw == L"" ); + VERIFY( strw.get_allocator() == alloc(2) ); + + auto str16 = p.string(alloc(3)); + VERIFY( str16 == u"" ); + VERIFY( str16.get_allocator() == alloc(3) ); + + auto str32 = p.string(alloc(4)); + VERIFY( str32 == U"" ); + VERIFY( str32.get_allocator() == alloc(4) ); +} + +void +test02() +{ + using namespace std::experimental::filesystem; + path p = "abcdefghijklmnopqrstuvwxyz"; + + auto str = p.string(alloc(1)); + VERIFY( str == "abcdefghijklmnopqrstuvwxyz" ); + VERIFY( str.get_allocator() == alloc(1) ); + +#ifdef _GLIBCXX_USE_CHAR8_T + auto str8 = p.string(alloc(1)); + VERIFY( str8 == u8"abcdefghijklmnopqrstuvwxyz" ); + VERIFY( str8.get_allocator() == alloc(1) ); +#endif + + auto strw = p.string(alloc(2)); + VERIFY( strw == L"abcdefghijklmnopqrstuvwxyz" ); + VERIFY( strw.get_allocator() == alloc(2) ); + + auto str16 = p.string(alloc(3)); + VERIFY( str16 == u"abcdefghijklmnopqrstuvwxyz" ); + VERIFY( str16.get_allocator() == alloc(3) ); + + auto str32 = p.string(alloc(4)); + VERIFY( str32 == U"abcdefghijklmnopqrstuvwxyz" ); + VERIFY( str32.get_allocator() == alloc(4) ); +} + +int +main() +{ + test01(); + test02(); +} diff --git a/libstdc++-v3/testsuite/experimental/filesystem/path/native/string.cc b/libstdc++-v3/testsuite/experimental/filesystem/path/native/string.cc index d6ee7fe9101..b78ba2b1dbf 100644 --- a/libstdc++-v3/testsuite/experimental/filesystem/path/native/string.cc +++ b/libstdc++-v3/testsuite/experimental/filesystem/path/native/string.cc @@ -63,9 +63,36 @@ test02() VERIFY( str32 == p.u32string() ); } +void +test03() +{ + std::experimental::filesystem::path p; + auto str8 = p.u8string(); + VERIFY( str8 == u8"" ); + auto str16 = p.u16string(); + VERIFY( str16 == u"" ); + auto str32 = p.u32string(); + VERIFY( str32 == U"" ); +} + +void +test04() +{ + // PR libstdc++/90281 + auto p = std::experimental::filesystem::u8path("\xf0\x9d\x84\x9e"); + auto str8 = p.u8string(); + VERIFY( str8 == u8"\U0001D11E" ); + auto str16 = p.u16string(); + VERIFY( str16 == u"\U0001D11E" ); + auto str32 = p.u32string(); + VERIFY( str32 == U"\U0001D11E" ); +} + int main() { test01(); test02(); + test03(); + test04(); }