+2019-06-17 Jonathan Wakely <jwakely@redhat.com>
+
+ PR libstdc++/90281 Fix string conversions for filesystem::path
+ * include/bits/fs_path.h (u8path) [_GLIBCXX_FILESYSTEM_IS_WINDOWS]:
+ Use codecvt_utf8_utf16 instead of codecvt_utf8. Use
+ __str_codecvt_in_all to fail for partial conversions and throw on
+ error.
+ [!_GLIBCXX_FILESYSTEM_IS_WINDOWS && _GLIBCXX_USE_CHAR8_T]
+ (path::_Cvt<char8_t>): Add explicit specialization.
+ [_GLIBCXX_FILESYSTEM_IS_WINDOWS] (path::_Cvt::_S_wconvert): Remove
+ overloads.
+ [_GLIBCXX_FILESYSTEM_IS_WINDOWS] (path::_Cvt::_S_convert): Use
+ if-constexpr instead of dispatching to _S_wconvert. Use codecvt
+ instead of codecvt_utf8. Use __str_codecvt_in_all and
+ __str_codecvt_out_all.
+ [!_GLIBCXX_FILESYSTEM_IS_WINDOWS] (path::_Cvt::_S_convert): Use
+ codecvt instead of codecvt_utf8. Use __str_codecvt_out_all.
+ (path::_S_str_convert) [_GLIBCXX_FILESYSTEM_IS_WINDOWS]: Use
+ codecvt_utf8_utf16 instead of codecvt_utf8. Construct return values
+ with allocator. Use __str_codecvt_out_all. Fallthrough to POSIX code
+ after converting to UTF-8.
+ (path::_S_str_convert): Use codecvt instead of codecvt_utf8. Use
+ __str_codecvt_in_all.
+ (path::string): Fix initialization of string types with different
+ allocators.
+ (path::u8string) [_GLIBCXX_FILESYSTEM_IS_WINDOWS]: Use
+ codecvt_utf8_utf16 instead of codecvt_utf8. Use __str_codecvt_out_all.
+ * include/bits/locale_conv.h (__do_str_codecvt): Reorder static and
+ runtime conditions.
+ (__str_codecvt_out_all, __str_codecvt_in_all): New functions that
+ return false for partial conversions.
+ * include/experimental/bits/fs_path.h (u8path):
+ [_GLIBCXX_FILESYSTEM_IS_WINDOWS]: Implement correctly for mingw.
+ [_GLIBCXX_FILESYSTEM_IS_WINDOWS] (path::_Cvt::_S_wconvert): Add
+ missing handling for char8_t. Use codecvt and codecvt_utf8_utf16
+ instead of codecvt_utf8. Use __str_codecvt_in_all and
+ __str_codecvt_out_all.
+ [!_GLIBCXX_FILESYSTEM_IS_WINDOWS] (path::_Cvt::_S_convert): Use
+ codecvt instead of codecvt_utf8. Use __str_codecvt_out_all.
+ (path::string) [_GLIBCXX_FILESYSTEM_IS_WINDOWS]: Use
+ codecvt_utf8_utf16 instead of codecvt_utf8. Construct return values
+ with allocator. Use __str_codecvt_out_all and __str_codecvt_in_all.
+ (path::string) [!_GLIBCXX_FILESYSTEM_IS_WINDOWS]: Use
+ __str_codecvt_in_all.
+ (path::u8string) [_GLIBCXX_FILESYSTEM_IS_WINDOWS]: Use
+ codecvt_utf8_utf16 instead of codecvt_utf8. Use __str_codecvt_out_all.
+ * src/c++17/fs_path.cc (path::_S_convert_loc): Use
+ __str_codecvt_in_all.
+ * src/filesystem/path.cc (path::_S_convert_loc): Likewise.
+ * testsuite/27_io/filesystem/path/construct/90281.cc: New test.
+ * testsuite/27_io/filesystem/path/factory/u8path.cc: New test.
+ * testsuite/27_io/filesystem/path/native/string.cc: Test with empty
+ strings and with Unicode characters outside the basic multilingual
+ plane.
+ * testsuite/27_io/filesystem/path/native/alloc.cc: New test.
+ * testsuite/experimental/filesystem/path/construct/90281.cc: New test.
+ * testsuite/experimental/filesystem/path/factory/u8path.cc: New test.
+ * testsuite/experimental/filesystem/path/native/alloc.cc: New test.
+ * testsuite/experimental/filesystem/path/native/string.cc: Test with
+ empty strings and with Unicode characters outside the basic
+ multilingual plane.
+
2019-06-17 François Dumont <fdumont@gcc.gnu.org>
Jonathan Wakely <jwakely@redhat.com>
-> decltype(filesystem::path(__first, __last, std::locale::classic()))
{
#ifdef _GLIBCXX_FILESYSTEM_IS_WINDOWS
- codecvt_utf8<path::value_type> __cvt;
+ // XXX This assumes native wide encoding is UTF-16.
+ std::codecvt_utf8_utf16<path::value_type> __cvt;
path::string_type __tmp;
if constexpr (is_pointer_v<_InputIterator>)
{
- if (__str_codecvt_in(__first, __last, __tmp, __cvt))
+ if (__str_codecvt_in_all(__first, __last, __tmp, __cvt))
return path{ __tmp };
}
else
{
const std::string __u8str{__first, __last};
const char* const __ptr = __u8str.data();
- if (__str_codecvt_in(__ptr, __ptr + __u8str.size(), __tmp, __cvt))
+ if (__str_codecvt_in_all(__ptr, __ptr + __u8str.size(), __tmp, __cvt))
return path{ __tmp };
}
- return {};
+ _GLIBCXX_THROW_OR_ABORT(filesystem_error(
+ "Cannot convert character sequence",
+ std::make_error_code(errc::illegal_byte_sequence)));
#else
+ // This assumes native normal encoding is UTF-8.
return path{ __first, __last };
#endif
}
{ return string_type{__first, __last}; }
};
+#if !defined _GLIBCXX_FILESYSTEM_IS_WINDOWS && defined _GLIBCXX_USE_CHAR8_T
+ // For POSIX converting from char8_t to char is also 'noconv'
+ template<>
+ struct path::_Cvt<char8_t>
+ {
+ template<typename _Iter>
+ static string_type
+ _S_convert(_Iter __first, _Iter __last)
+ { return string_type(__first, __last); }
+ };
+#endif
+
template<typename _CharT>
struct path::_Cvt
{
-#ifdef _GLIBCXX_FILESYSTEM_IS_WINDOWS
static string_type
- _S_wconvert(const char* __f, const char* __l, true_type)
+ _S_convert(const _CharT* __f, const _CharT* __l)
{
- using _Cvt = std::codecvt<wchar_t, char, mbstate_t>;
- const auto& __cvt = std::use_facet<_Cvt>(std::locale{});
+#ifdef _GLIBCXX_FILESYSTEM_IS_WINDOWS
std::wstring __wstr;
- if (__str_codecvt_in(__f, __l, __wstr, __cvt))
- return __wstr;
- _GLIBCXX_THROW_OR_ABORT(filesystem_error(
- "Cannot convert character sequence",
- std::make_error_code(errc::illegal_byte_sequence)));
- }
-
- static string_type
- _S_wconvert(const _CharT* __f, const _CharT* __l, false_type)
- {
- std::codecvt_utf8<_CharT> __cvt;
- std::string __str;
- if (__str_codecvt_out(__f, __l, __str, __cvt))
+ if constexpr (is_same_v<_CharT, char>)
{
- const char* __f2 = __str.data();
- const char* __l2 = __f2 + __str.size();
- std::codecvt_utf8<wchar_t> __wcvt;
- std::wstring __wstr;
- if (__str_codecvt_in(__f2, __l2, __wstr, __wcvt))
+ struct _UCvt : std::codecvt<wchar_t, char, std::mbstate_t>
+ { } __cvt;
+ if (__str_codecvt_in_all(__f, __l, __wstr, __cvt))
return __wstr;
}
- _GLIBCXX_THROW_OR_ABORT(filesystem_error(
- "Cannot convert character sequence",
- std::make_error_code(errc::illegal_byte_sequence)));
- }
-
- static string_type
- _S_convert(const _CharT* __f, const _CharT* __l)
- {
- return _S_wconvert(__f, __l, is_same<_CharT, char>{});
- }
-#else
- static string_type
- _S_convert(const _CharT* __f, const _CharT* __l)
- {
#ifdef _GLIBCXX_USE_CHAR8_T
- if constexpr (is_same_v<_CharT, char8_t>)
+ else if constexpr (is_same_v<_CharT, char8_t>)
{
- string_type __str(__f, __l);
- return __str;
+ const char* __f2 = (const char*)__f;
+ const char* __l2 = (const char*)__l;
+ std::codecvt_utf8_utf16<wchar_t> __wcvt;
+ if (__str_codecvt_in_all(__f2, __l2, __wstr, __wcvt))
+ return __wstr;
}
- else
- {
#endif
- std::codecvt_utf8<_CharT> __cvt;
+ else // char16_t or char32_t
+ {
+ struct _UCvt : std::codecvt<_CharT, char, std::mbstate_t>
+ { } __cvt;
std::string __str;
- if (__str_codecvt_out(__f, __l, __str, __cvt))
- return __str;
-#ifdef _GLIBCXX_USE_CHAR8_T
+ if (__str_codecvt_out_all(__f, __l, __str, __cvt))
+ {
+ const char* __f2 = __str.data();
+ const char* __l2 = __f2 + __str.size();
+ std::codecvt_utf8_utf16<wchar_t> __wcvt;
+ if (__str_codecvt_in_all(__f2, __l2, __wstr, __wcvt))
+ return __wstr;
+ }
}
+#else // ! windows
+ struct _UCvt : std::codecvt<_CharT, char, std::mbstate_t>
+ { } __cvt;
+ std::string __str;
+ if (__str_codecvt_out_all(__f, __l, __str, __cvt))
+ return __str;
#endif
_GLIBCXX_THROW_OR_ABORT(filesystem_error(
"Cannot convert character sequence",
std::make_error_code(errc::illegal_byte_sequence)));
}
-#endif
static string_type
_S_convert(_CharT* __f, _CharT* __l)
std::basic_string<_CharT, _Traits, _Allocator>
path::_S_str_convert(const string_type& __str, const _Allocator& __a)
{
- if (__str.size() == 0)
- return std::basic_string<_CharT, _Traits, _Allocator>(__a);
+ static_assert(!is_same_v<_CharT, value_type>);
- const value_type* __first = __str.data();
- const value_type* __last = __first + __str.size();
+ using _WString = basic_string<_CharT, _Traits, _Allocator>;
+
+ if (__str.size() == 0)
+ return _WString(__a);
#ifdef _GLIBCXX_FILESYSTEM_IS_WINDOWS
+ // First convert native string from UTF-16 to to UTF-8.
+ // XXX This assumes that the execution wide-character set is UTF-16.
+ std::codecvt_utf8_utf16<value_type> __cvt;
+
using _CharAlloc = __alloc_rebind<_Allocator, char>;
using _String = basic_string<char, char_traits<char>, _CharAlloc>;
- using _WString = basic_string<_CharT, _Traits, _Allocator>;
-
- // use codecvt_utf8<wchar_t> to convert native string to UTF-8
- codecvt_utf8<value_type> __cvt;
_String __u8str{_CharAlloc{__a}};
- if (__str_codecvt_out(__first, __last, __u8str, __cvt))
- {
- if constexpr (is_same_v<_CharT, char>)
- return __u8str;
-#ifdef _GLIBCXX_USE_CHAR8_T
- else if constexpr (is_same_v<_CharT, char8_t>)
- {
- const char* __f = __u8str.data();
- const char* __l = __f + __u8str.size();
- _WString __wstr(__f, __l);
- return __wstr;
- }
-#endif
- else
- {
- _WString __wstr;
- // use codecvt_utf8<_CharT> to convert UTF-8 to wide string
- codecvt_utf8<_CharT> __cvt;
- const char* __f = __u8str.data();
- const char* __l = __f + __u8str.size();
- if (__str_codecvt_in(__f, __l, __wstr, __cvt))
- return __wstr;
- }
- }
+ const value_type* __wfirst = __str.data();
+ const value_type* __wlast = __wfirst + __str.size();
+ if (__str_codecvt_out_all(__wfirst, __wlast, __u8str, __cvt)) {
+ if constexpr (is_same_v<_CharT, char>)
+ return __u8str; // XXX assumes native ordinary encoding is UTF-8.
+ else {
+
+ const char* __first = __u8str.data();
+ const char* __last = __first + __u8str.size();
#else
+ const value_type* __first = __str.data();
+ const value_type* __last = __first + __str.size();
+#endif
+
+ // Convert UTF-8 string to requested format.
#ifdef _GLIBCXX_USE_CHAR8_T
if constexpr (is_same_v<_CharT, char8_t>)
- {
- basic_string<_CharT, _Traits, _Allocator> __wstr{__first, __last, __a};
- return __wstr;
- }
+ return _WString(__first, __last, __a);
else
- {
#endif
- codecvt_utf8<_CharT> __cvt;
- basic_string<_CharT, _Traits, _Allocator> __wstr{__a};
- if (__str_codecvt_in(__first, __last, __wstr, __cvt))
+ {
+ // Convert UTF-8 to wide string.
+ _WString __wstr(__a);
+ struct _UCvt : std::codecvt<_CharT, char, std::mbstate_t> { } __cvt;
+ if (__str_codecvt_in_all(__first, __last, __wstr, __cvt))
return __wstr;
-#ifdef _GLIBCXX_USE_CHAR8_T
}
-#endif
+
+#ifdef _GLIBCXX_FILESYSTEM_IS_WINDOWS
+ } }
#endif
_GLIBCXX_THROW_OR_ABORT(filesystem_error(
"Cannot convert character sequence",
path::string(const _Allocator& __a) const
{
if constexpr (is_same_v<_CharT, value_type>)
- return { _M_pathname, __a };
+ return { _M_pathname.c_str(), _M_pathname.length(), __a };
else
return _S_str_convert<_CharT, _Traits>(_M_pathname, __a);
}
{
#ifdef _GLIBCXX_FILESYSTEM_IS_WINDOWS
std::string __str;
- // convert from native encoding to UTF-8
- codecvt_utf8<value_type> __cvt;
+ // convert from native wide encoding (assumed to be UTF-16) to UTF-8
+ std::codecvt_utf8_utf16<value_type> __cvt;
const value_type* __first = _M_pathname.data();
const value_type* __last = __first + _M_pathname.size();
- if (__str_codecvt_out(__first, __last, __str, __cvt))
+ if (__str_codecvt_out_all(__first, __last, __str, __cvt))
return __str;
_GLIBCXX_THROW_OR_ABORT(filesystem_error(
"Cannot convert character sequence",
return false;
}
- if (__result == codecvt_base::noconv)
- {
- // The codecvt facet will only return noconv when the types are
- // the same, so avoid instantiating basic_string::assign otherwise
- if _GLIBCXX17_CONSTEXPR (is_same<typename _Codecvt::intern_type,
- typename _Codecvt::extern_type>())
- {
- __outstr.assign(__first, __last);
- __count = __last - __first;
- }
- }
- else
- {
- __outstr.resize(__outchars);
- __count = __next - __first;
- }
+ // The codecvt facet will only return noconv when the types are
+ // the same, so avoid instantiating basic_string::assign otherwise
+ if _GLIBCXX17_CONSTEXPR (is_same<typename _Codecvt::intern_type,
+ typename _Codecvt::extern_type>())
+ if (__result == codecvt_base::noconv)
+ {
+ __outstr.assign(__first, __last);
+ __count = __last - __first;
+ return true;
+ }
+ __outstr.resize(__outchars);
+ __count = __next - __first;
return true;
}
__count, __fn);
}
+ // As above, but with no __count parameter
template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
inline bool
__str_codecvt_in(const char* __first, const char* __last,
return __str_codecvt_in(__first, __last, __outstr, __cvt, __state, __n);
}
+ // As above, but returns false for partial conversion
+ template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
+ inline bool
+ __str_codecvt_in_all(const char* __first, const char* __last,
+ basic_string<_CharT, _Traits, _Alloc>& __outstr,
+ const codecvt<_CharT, char, _State>& __cvt)
+ {
+ _State __state = {};
+ size_t __n;
+ return __str_codecvt_in(__first, __last, __outstr, __cvt, __state, __n)
+ && (__n == (__last - __first));
+ }
+
// Convert wide character string to narrow.
template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
inline bool
__count, __fn);
}
+ // As above, but with no __count parameter
template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
inline bool
__str_codecvt_out(const _CharT* __first, const _CharT* __last,
return __str_codecvt_out(__first, __last, __outstr, __cvt, __state, __n);
}
+ // As above, but returns false for partial conversions
+ template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
+ inline bool
+ __str_codecvt_out_all(const _CharT* __first, const _CharT* __last,
+ basic_string<char, _Traits, _Alloc>& __outstr,
+ const codecvt<_CharT, char, _State>& __cvt)
+ {
+ _State __state = {};
+ size_t __n;
+ return __str_codecvt_out(__first, __last, __outstr, __cvt, __state, __n)
+ && (__n == (__last - __first));
+ }
+
#ifdef _GLIBCXX_USE_CHAR8_T
// Convert wide character string to narrow.
}
/// Create a path from a UTF-8-encoded sequence of char
- // TODO constrain with _Path<Source> and __value_type_is_char
- template<typename _Source>
+ // TODO constrain with _Path<InputIterator, InputIterator> and __value_type_is_char
+ template<typename _InputIterator>
inline path
- u8path(const _Source& __source)
+ u8path(_InputIterator __first, _InputIterator __last)
{
#ifdef _GLIBCXX_FILESYSTEM_IS_WINDOWS
- return path{ path::string_type{__source} };
+ // XXX This assumes native wide encoding is UTF-16.
+ std::codecvt_utf8_utf16<path::value_type> __cvt;
+ path::string_type __tmp;
+ const std::string __u8str{__first, __last};
+ const char* const __ptr = __u8str.data();
+ if (__str_codecvt_in_all(__ptr, __ptr + __u8str.size(), __tmp, __cvt))
+ return path{ __tmp };
+ _GLIBCXX_THROW_OR_ABORT(filesystem_error(
+ "Cannot convert character sequence",
+ std::make_error_code(errc::illegal_byte_sequence)));
#else
- return path{ __source };
+ return path{ __first, __last };
#endif
}
/// Create a path from a UTF-8-encoded sequence of char
- // TODO constrain with _Path<InputIterator, InputIterator> and __value_type_is_char
- template<typename _InputIterator>
+ // TODO constrain with _Path<Source> and __value_type_is_char
+ template<typename _Source>
inline path
- u8path(_InputIterator __first, _InputIterator __last)
+ u8path(const _Source& __source)
{
#ifdef _GLIBCXX_FILESYSTEM_IS_WINDOWS
- return path{ path::string_type{__first, __last} };
+ std::string __s = path::_S_string_from_iter(__source);
+ return filesystem::u8path(__s.data(), __s.data() + __s.size());
#else
- return path{ __first, __last };
+ return path{ __source };
#endif
}
using _Cvt = std::codecvt<wchar_t, char, mbstate_t>;
const auto& __cvt = std::use_facet<_Cvt>(std::locale{});
std::wstring __wstr;
- if (__str_codecvt_in(__f, __l, __wstr, __cvt))
+ if (__str_codecvt_in_all(__f, __l, __wstr, __cvt))
return __wstr;
_GLIBCXX_THROW_OR_ABORT(filesystem_error(
"Cannot convert character sequence",
static string_type
_S_wconvert(const _CharT* __f, const _CharT* __l, false_type)
{
- std::codecvt_utf8<_CharT> __cvt;
- std::string __str;
- if (__str_codecvt_out(__f, __l, __str, __cvt))
+#ifdef _GLIBCXX_USE_CHAR8_T
+ if constexpr (is_same<_CharT, char8_t>::value)
+ return _S_wconvert((const char*)__f, (const char*)__l, true_type());
+ else
+#endif
{
- const char* __f2 = __str.data();
- const char* __l2 = __f2 + __str.size();
- std::codecvt_utf8<wchar_t> __wcvt;
- std::wstring __wstr;
- if (__str_codecvt_in(__f2, __l2, __wstr, __wcvt))
- return __wstr;
+ struct _UCvt : std::codecvt<_CharT, char, std::mbstate_t>
+ { } __cvt;
+ std::string __str;
+ if (__str_codecvt_out_all(__f, __l, __str, __cvt))
+ {
+ const char* __f2 = __str.data();
+ const char* __l2 = __f2 + __str.size();
+ std::codecvt_utf8_utf16<wchar_t> __wcvt;
+ std::wstring __wstr;
+ if (__str_codecvt_in_all(__f2, __l2, __wstr, __wcvt))
+ return __wstr;
+ }
+ _GLIBCXX_THROW_OR_ABORT(filesystem_error(
+ "Cannot convert character sequence",
+ std::make_error_code(errc::illegal_byte_sequence)));
}
- _GLIBCXX_THROW_OR_ABORT(filesystem_error(
- "Cannot convert character sequence",
- std::make_error_code(errc::illegal_byte_sequence)));
}
static string_type
{
#ifdef _GLIBCXX_USE_CHAR8_T
if constexpr (is_same<_CharT, char8_t>::value)
- {
- string_type __str(__f, __l);
- return __str;
- }
+ return string_type(__f, __l);
else
{
#endif
- std::codecvt_utf8<_CharT> __cvt;
+ struct _UCvt : std::codecvt<_CharT, char, std::mbstate_t>
+ { } __cvt;
std::string __str;
- if (__str_codecvt_out(__f, __l, __str, __cvt))
+ if (__str_codecvt_out_all(__f, __l, __str, __cvt))
return __str;
#ifdef _GLIBCXX_USE_CHAR8_T
}
if (is_same<_CharT, value_type>::value)
return { _M_pathname.begin(), _M_pathname.end(), __a };
+ using _WString = basic_string<_CharT, _Traits, _Allocator>;
+
const value_type* __first = _M_pathname.data();
const value_type* __last = __first + _M_pathname.size();
#ifdef _GLIBCXX_FILESYSTEM_IS_WINDOWS
using _CharAlloc = __alloc_rebind<_Allocator, char>;
using _String = basic_string<char, char_traits<char>, _CharAlloc>;
- using _WString = basic_string<_CharT, _Traits, _Allocator>;
- // use codecvt_utf8<wchar_t> to convert native string to UTF-8
- codecvt_utf8<value_type> __cvt;
+ // First convert native string from UTF-16 to to UTF-8.
+ // XXX This assumes that the execution wide-character set is UTF-16.
+ codecvt_utf8_utf16<value_type> __cvt;
_String __u8str{_CharAlloc{__a}};
- if (__str_codecvt_out(__first, __last, __u8str, __cvt))
+ if (__str_codecvt_out_all(__first, __last, __u8str, __cvt))
{
struct
{
return std::__addressof(__to);
}
else
- {
#endif
- // use codecvt_utf8<_CharT> to convert UTF-8 to wide string
- codecvt_utf8<_CharT> __cvt;
+ {
+ // Convert UTF-8 to wide string.
+ struct _UCvt : std::codecvt<_CharT, char, std::mbstate_t>
+ { } __cvt;
const char* __f = __from.data();
const char* __l = __f + __from.size();
- if (__str_codecvt_in(__f, __l, __to, __cvt))
+ if (__str_codecvt_in_all(__f, __l, __to, __cvt))
return std::__addressof(__to);
-#ifdef _GLIBCXX_USE_CHAR8_T
}
-#endif
return nullptr;
}
} __dispatch;
- _WString __wstr;
+ _WString __wstr(__a);
if (auto* __p = __dispatch(__u8str, __wstr, is_same<_CharT, char>{}))
return *__p;
}
#else
#ifdef _GLIBCXX_USE_CHAR8_T
if constexpr (is_same<_CharT, char8_t>::value)
- {
- basic_string<_CharT, _Traits, _Allocator> __wstr{__first, __last, __a};
- return __wstr;
- }
+ return _WString(__first, __last, __a);
else
- {
#endif
- codecvt_utf8<_CharT> __cvt;
- basic_string<_CharT, _Traits, _Allocator> __wstr{__a};
- if (__str_codecvt_in(__first, __last, __wstr, __cvt))
+ {
+ struct _UCvt : std::codecvt<_CharT, char, std::mbstate_t> { } __cvt;
+ _WString __wstr(__a);
+ if (__str_codecvt_in_all(__first, __last, __wstr, __cvt))
return __wstr;
-#ifdef _GLIBCXX_USE_CHAR8_T
}
-#endif
#endif
_GLIBCXX_THROW_OR_ABORT(filesystem_error(
"Cannot convert character sequence",
{
#ifdef _GLIBCXX_FILESYSTEM_IS_WINDOWS
std::string __str;
- // convert from native encoding to UTF-8
- codecvt_utf8<value_type> __cvt;
+ // convert from native wide encoding (assumed to be UTF-16) to UTF-8
+ std::codecvt_utf8_utf16<value_type> __cvt;
const value_type* __first = _M_pathname.data();
const value_type* __last = __first + _M_pathname.size();
- if (__str_codecvt_out(__first, __last, __str, __cvt))
+ if (__str_codecvt_out_all(__first, __last, __str, __cvt))
return __str;
_GLIBCXX_THROW_OR_ABORT(filesystem_error(
"Cannot convert character sequence",
#if _GLIBCXX_USE_WCHAR_T
auto& __cvt = std::use_facet<codecvt<wchar_t, char, mbstate_t>>(__loc);
basic_string<wchar_t> __ws;
- if (!__str_codecvt_in(__first, __last, __ws, __cvt))
+ if (!__str_codecvt_in_all(__first, __last, __ws, __cvt))
_GLIBCXX_THROW_OR_ABORT(filesystem_error(
"Cannot convert character sequence",
std::make_error_code(errc::illegal_byte_sequence)));
#if _GLIBCXX_USE_WCHAR_T
auto& __cvt = std::use_facet<codecvt<wchar_t, char, mbstate_t>>(__loc);
basic_string<wchar_t> __ws;
- if (!__str_codecvt_in(__first, __last, __ws, __cvt))
+ if (!__str_codecvt_in_all(__first, __last, __ws, __cvt))
_GLIBCXX_THROW_OR_ABORT(filesystem_error(
"Cannot convert character sequence",
std::make_error_code(errc::illegal_byte_sequence)));
--- /dev/null
+// Copyright (C) 2019 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library. This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3. If not see
+// <http://www.gnu.org/licenses/>.
+
+// { dg-options "-std=gnu++17" }
+// { dg-do run { target c++17 } }
+
+#include <filesystem>
+#include <testsuite_hooks.h>
+
+namespace fs = std::filesystem;
+
+template<typename C = fs::path::value_type>
+const C* code_units()
+{
+ if constexpr (std::is_same_v<C, char>)
+ return "\xf0\x9d\x84\x9e";
+ else
+ return L"\xD834\xDD1E";
+}
+
+// PR libstdc++/90281
+void
+test01()
+{
+ const fs::path::string_type expected = code_units();
+
+ fs::path p8 = fs::u8path(u8"\U0001D11E");
+ VERIFY( p8.native() == expected );
+ fs::path p16(u"\U0001D11E");
+ VERIFY( p16.native() == expected );
+ fs::path p32(U"\U0001D11E");
+ VERIFY( p32.native() == expected );
+}
+
+int
+main()
+{
+ test01();
+}
--- /dev/null
+// Copyright (C) 2019 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library. This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3. If not see
+// <http://www.gnu.org/licenses/>.
+
+// { dg-options "-std=gnu++17" }
+// { dg-do run { target c++17 } }
+
+#include <filesystem>
+#include <testsuite_hooks.h>
+
+namespace fs = std::filesystem;
+
+void
+test01()
+{
+ fs::path p = fs::u8path("");
+ VERIFY( p.empty() );
+
+ p = fs::u8path("filename");
+ VERIFY( p.u8string() == u8"filename" );
+
+ p = fs::u8path("\xf0\x9d\x84\x9e");
+ VERIFY( p.u8string() == u8"\U0001D11E" );
+}
+
+void
+test02()
+{
+ // These calls to u8path are undefined, because they fail to meet the
+ // requirement that the input is valid UTF-8 data. For Windows u8path
+ // will fail. For POSIX constructing an invalid path appears to work,
+ // but will fail when converted to a different encoding.
+
+ try {
+ auto p = fs::u8path("\xf0\x9d"); // incomplete surrogate pair
+ p.u16string();
+ VERIFY( false );
+ } catch(const fs::filesystem_error&) {
+ }
+
+ try {
+ auto p = fs::u8path("\xf0"); // incomplete multibyte character
+ p.u16string();
+ VERIFY( false );
+ } catch(const fs::filesystem_error&) {
+ }
+}
+
+int
+main()
+{
+ test01();
+ test02();
+}
--- /dev/null
+// Copyright (C) 2016-2019 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library. This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3. If not see
+// <http://www.gnu.org/licenses/>.
+
+// { dg-options "-std=gnu++17" }
+// { dg-do run { target c++17 } }
+
+#include <filesystem>
+#include <string>
+#include <testsuite_hooks.h>
+#include <testsuite_allocator.h>
+
+template<typename C>
+ using alloc = __gnu_test::uneq_allocator<C>;
+
+void
+test01()
+{
+ using namespace std::filesystem;
+ path p;
+
+ auto str = p.string<char>(alloc<char>(1));
+ VERIFY( str == "" );
+ VERIFY( str.get_allocator() == alloc<char>(1) );
+
+#ifdef _GLIBCXX_USE_CHAR8_T
+ auto str8 = p.string<char8_t>(alloc<char8_t>(1));
+ VERIFY( str8 == u8"" );
+ VERIFY( str8.get_allocator() == alloc<char8_t>(1) );
+#endif
+
+ auto strw = p.string<wchar_t>(alloc<wchar_t>(2));
+ VERIFY( strw == L"" );
+ VERIFY( strw.get_allocator() == alloc<wchar_t>(2) );
+
+ auto str16 = p.string<char16_t>(alloc<char16_t>(3));
+ VERIFY( str16 == u"" );
+ VERIFY( str16.get_allocator() == alloc<char16_t>(3) );
+
+ auto str32 = p.string<char32_t>(alloc<char32_t>(4));
+ VERIFY( str32 == U"" );
+ VERIFY( str32.get_allocator() == alloc<char32_t>(4) );
+}
+
+void
+test02()
+{
+ using namespace std::filesystem;
+ path p = "abcdefghijklmnopqrstuvwxyz";
+
+ auto str = p.string<char>(alloc<char>(1));
+ VERIFY( str == "abcdefghijklmnopqrstuvwxyz" );
+ VERIFY( str.get_allocator() == alloc<char>(1) );
+
+#ifdef _GLIBCXX_USE_CHAR8_T
+ auto str8 = p.string<char8_t>(alloc<char8_t>(1));
+ VERIFY( str8 == u8"abcdefghijklmnopqrstuvwxyz" );
+ VERIFY( str8.get_allocator() == alloc<char8_t>(1) );
+#endif
+
+ auto strw = p.string<wchar_t>(alloc<wchar_t>(2));
+ VERIFY( strw == L"abcdefghijklmnopqrstuvwxyz" );
+ VERIFY( strw.get_allocator() == alloc<wchar_t>(2) );
+
+ auto str16 = p.string<char16_t>(alloc<char16_t>(3));
+ VERIFY( str16 == u"abcdefghijklmnopqrstuvwxyz" );
+ VERIFY( str16.get_allocator() == alloc<char16_t>(3) );
+
+ auto str32 = p.string<char32_t>(alloc<char32_t>(4));
+ VERIFY( str32 == U"abcdefghijklmnopqrstuvwxyz" );
+ VERIFY( str32.get_allocator() == alloc<char32_t>(4) );
+}
+
+int
+main()
+{
+ test01();
+ test02();
+}
VERIFY( str32 == p.u32string() );
}
+void
+test03()
+{
+ std::filesystem::path p;
+ auto str8 = p.u8string();
+ VERIFY( str8 == u8"" );
+ auto str16 = p.u16string();
+ VERIFY( str16 == u"" );
+ auto str32 = p.u32string();
+ VERIFY( str32 == U"" );
+}
+
+void
+test04()
+{
+ // PR libstdc++/90281
+ auto p = std::filesystem::u8path("\xf0\x9d\x84\x9e");
+ auto str8 = p.u8string();
+ VERIFY( str8 == u8"\U0001D11E" );
+ auto str16 = p.u16string();
+ VERIFY( str16 == u"\U0001D11E" );
+ auto str32 = p.u32string();
+ VERIFY( str32 == U"\U0001D11E" );
+}
+
int
main()
{
test01();
test02();
+ test03();
+ test04();
}
--- /dev/null
+// Copyright (C) 2019 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library. This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3. If not see
+// <http://www.gnu.org/licenses/>.
+
+// { dg-options "-lstdc++fs" }
+// { dg-do run { target c++11 } }
+// { dg-require-filesystem-ts "" }
+
+#include <experimental/filesystem>
+#include <testsuite_hooks.h>
+
+namespace fs = std::experimental::filesystem;
+
+template<bool B = std::is_same<fs::path::value_type, char>::value>
+typename std::enable_if<B, const char*>::type
+code_units()
+{ return "\xf0\x9d\x84\x9e"; }
+
+template<bool B = std::is_same<fs::path::value_type, wchar_t>::value>
+typename std::enable_if<B, const wchar_t*>::type
+code_units()
+{ return L"\xD834\xDD1E"; }
+
+// PR libstdc++/90281
+void
+test01()
+{
+ const fs::path::string_type expected = code_units();
+
+ fs::path p8 = fs::u8path(u8"\U0001D11E");
+ VERIFY( p8.native() == expected );
+ fs::path p16(u"\U0001D11E");
+ VERIFY( p16.native() == expected );
+ fs::path p32(U"\U0001D11E");
+ VERIFY( p32.native() == expected );
+}
+
+int
+main()
+{
+ test01();
+}
--- /dev/null
+// Copyright (C) 2019 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library. This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3. If not see
+// <http://www.gnu.org/licenses/>.
+
+// { dg-options "-lstdc++fs" }
+// { dg-do run { target c++11 } }
+// { dg-require-filesystem-ts "" }
+
+#include <experimental/filesystem>
+#include <testsuite_hooks.h>
+
+namespace fs = std::experimental::filesystem;
+
+void
+test01()
+{
+ fs::path p = fs::u8path("");
+ VERIFY( p.empty() );
+
+ p = fs::u8path("filename");
+ VERIFY( p.u8string() == u8"filename" );
+
+ p = fs::u8path("\xf0\x9d\x84\x9e");
+ VERIFY( p.u8string() == u8"\U0001D11E" );
+}
+
+void
+test02()
+{
+ // These calls to u8path are undefined, because they fail to meet the
+ // requirement that the input is valid UTF-8 data. For Windows u8path
+ // will fail. For POSIX constructing an invalid path appears to work,
+ // but will fail when converted to a different encoding.
+
+ try {
+ auto p = fs::u8path("\xf0\x9d"); // incomplete surrogate pair
+ p.u16string();
+ VERIFY( false );
+ } catch(const fs::filesystem_error&) {
+ }
+
+ try {
+ auto p = fs::u8path("\xf0"); // incomplete multibyte character
+ p.u16string();
+ VERIFY( false );
+ } catch(const fs::filesystem_error&) {
+ }
+}
+
+int
+main()
+{
+ test01();
+ test02();
+}
--- /dev/null
+// Copyright (C) 2016-2019 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library. This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3. If not see
+// <http://www.gnu.org/licenses/>.
+
+// { dg-options "-lstdc++fs" }
+// { dg-do run { target c++11 } }
+// { dg-require-filesystem-ts "" }
+
+#include <experimental/filesystem>
+#include <string>
+#include <testsuite_hooks.h>
+#include <testsuite_allocator.h>
+
+template<typename C>
+ using alloc = __gnu_test::uneq_allocator<C>;
+
+void
+test01()
+{
+ using namespace std::experimental::filesystem;
+ path p;
+
+ auto str = p.string<char>(alloc<char>(1));
+ VERIFY( str == "" );
+ VERIFY( str.get_allocator() == alloc<char>(1) );
+
+#ifdef _GLIBCXX_USE_CHAR8_T
+ auto str8 = p.string<char8_t>(alloc<char8_t>(1));
+ VERIFY( str8 == u8"" );
+ VERIFY( str8.get_allocator() == alloc<char8_t>(1) );
+#endif
+
+ auto strw = p.string<wchar_t>(alloc<wchar_t>(2));
+ VERIFY( strw == L"" );
+ VERIFY( strw.get_allocator() == alloc<wchar_t>(2) );
+
+ auto str16 = p.string<char16_t>(alloc<char16_t>(3));
+ VERIFY( str16 == u"" );
+ VERIFY( str16.get_allocator() == alloc<char16_t>(3) );
+
+ auto str32 = p.string<char32_t>(alloc<char32_t>(4));
+ VERIFY( str32 == U"" );
+ VERIFY( str32.get_allocator() == alloc<char32_t>(4) );
+}
+
+void
+test02()
+{
+ using namespace std::experimental::filesystem;
+ path p = "abcdefghijklmnopqrstuvwxyz";
+
+ auto str = p.string<char>(alloc<char>(1));
+ VERIFY( str == "abcdefghijklmnopqrstuvwxyz" );
+ VERIFY( str.get_allocator() == alloc<char>(1) );
+
+#ifdef _GLIBCXX_USE_CHAR8_T
+ auto str8 = p.string<char8_t>(alloc<char8_t>(1));
+ VERIFY( str8 == u8"abcdefghijklmnopqrstuvwxyz" );
+ VERIFY( str8.get_allocator() == alloc<char8_t>(1) );
+#endif
+
+ auto strw = p.string<wchar_t>(alloc<wchar_t>(2));
+ VERIFY( strw == L"abcdefghijklmnopqrstuvwxyz" );
+ VERIFY( strw.get_allocator() == alloc<wchar_t>(2) );
+
+ auto str16 = p.string<char16_t>(alloc<char16_t>(3));
+ VERIFY( str16 == u"abcdefghijklmnopqrstuvwxyz" );
+ VERIFY( str16.get_allocator() == alloc<char16_t>(3) );
+
+ auto str32 = p.string<char32_t>(alloc<char32_t>(4));
+ VERIFY( str32 == U"abcdefghijklmnopqrstuvwxyz" );
+ VERIFY( str32.get_allocator() == alloc<char32_t>(4) );
+}
+
+int
+main()
+{
+ test01();
+ test02();
+}
VERIFY( str32 == p.u32string() );
}
+void
+test03()
+{
+ std::experimental::filesystem::path p;
+ auto str8 = p.u8string();
+ VERIFY( str8 == u8"" );
+ auto str16 = p.u16string();
+ VERIFY( str16 == u"" );
+ auto str32 = p.u32string();
+ VERIFY( str32 == U"" );
+}
+
+void
+test04()
+{
+ // PR libstdc++/90281
+ auto p = std::experimental::filesystem::u8path("\xf0\x9d\x84\x9e");
+ auto str8 = p.u8string();
+ VERIFY( str8 == u8"\U0001D11E" );
+ auto str16 = p.u16string();
+ VERIFY( str16 == u"\U0001D11E" );
+ auto str32 = p.u32string();
+ VERIFY( str32 == U"\U0001D11E" );
+}
+
int
main()
{
test01();
test02();
+ test03();
+ test04();
}