From 4f87bb8d6e8dec21a07f1fba641a78a127281349 Mon Sep 17 00:00:00 2001 From: Jonathan Wakely Date: Thu, 13 Dec 2018 20:33:55 +0000 Subject: [PATCH] PR libstdc++/71044 optimize std::filesystem::path construction This new implementation has a smaller footprint than the previous implementation, due to replacing std::vector<_Cmpt> with a custom pimpl type that only needs a single pointer. The _M_type enumeration is also combined with the pimpl type, by using a tagged pointer, reducing sizeof(path) further still. Construction and modification of paths is now done more efficiently, by splitting the input into a stack-based buffer of string_view objects instead of a dynamically-allocated vector containing strings. Once the final size is known only a single allocation is needed to reserve space for it. The append and concat operations no longer require constructing temporary path objects, nor re-parsing the entire native pathname. This results in algorithmic improvements to path construction, and working with large paths is much faster. PR libstdc++/71044 * include/bits/fs_path.h (path::path(path&&)): Add noexcept when appropriate. Move _M_cmpts instead of reparsing the native pathname. (path::operator=(const path&)): Do not define as defaulted. (path::operator/=, path::append): Call _M_append. (path::concat): Call _M_concat. (path::path(string_type, _Type): Change type of first parameter to basic_string_view. (path::_M_append(basic_string_view)): New member function. (path::_M_concat(basic_string_view)): New member function. (_S_convert(value_type*, __null_terminated)): Return string view. (_S_convert(const value_type*, __null_terminated)): Return string view. (_S_convert(value_type*, value_type*)) (_S_convert(const value_type*, const value_type*)): Add overloads for pairs of pointers. (_S_convert(_InputIterator, __null_terminated)): Construct string_type explicitly, for cases where _S_convert returns a string view. (path::_S_is_dir_sep): Replace with non-member is_dir_sep. (path::_M_trim, path::_M_add_root_name, path::_M_add_root_dir) (path::_M_add_filename): Remove. (path::_M_type()): New member function to replace _M_type data member. (path::_List): Define new struct type instead of using std::vector. (path::_Cmpt::_Cmpt(string_type, _Type, size_t)): Change type of first parameter to basic_string_view. (path::operator+=(const path&)): Do not define inline. (path::operator+=(const string_type&)): Call _M_concat. (path::operator+=(const value_type*)): Likewise. (path::operator+=(value_type)): Likewise. (path::operator+=(basic_string_view)): Likewise. (path::operator/=(const path&)): Do not define inline. (path::_M_append(path)): Remove. * python/libstdcxx/v6/printers.py (StdPathPrinter): New printer that understands the new path::_List type. * src/filesystem/std-path.cc (is_dir_sep): New function to replace path::_S_is_dir_sep. (path::_Parser): New helper class to parse strings as paths. (path::_List::_Impl): Define container type for path components. (path::_List): Define members. (path::operator=(const path&)): Define explicitly, to provide the strong exception safety guarantee. (path::operator/=(const path&)): Implement manually by processing each component of the argument, rather than using _M_split_cmpts to parse the entire string again. (path::_M_append(string_type)): Likewise. (path::operator+=(const path&)): Likewise. (path::_M_concat(string_type)): Likewise. (path::remove_filename()): Perform trim directly instead of calling _M_trim(). (path::_M_split_cmpts()): Rewrite in terms of _Parser class. (path::_M_trim, path::_M_add_root_name, path::_M_add_root_dir) (path::_M_add_filename): Remove. * testsuite/27_io/filesystem/path/append/source.cc: Test appending a string view that aliases the path. testsuite/27_io/filesystem/path/concat/strings.cc: Test concatenating a string view that aliases the path. From-SVN: r267106 --- libstdc++-v3/ChangeLog | 56 + libstdc++-v3/include/bits/fs_path.h | 225 +-- libstdc++-v3/python/libstdcxx/v6/printers.py | 75 +- libstdc++-v3/src/filesystem/std-path.cc | 1293 +++++++++++++++-- .../27_io/filesystem/path/append/source.cc | 28 + .../27_io/filesystem/path/concat/strings.cc | 28 + 6 files changed, 1461 insertions(+), 244 deletions(-) diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog index cde8426089a..c1a25028f25 100644 --- a/libstdc++-v3/ChangeLog +++ b/libstdc++-v3/ChangeLog @@ -1,5 +1,61 @@ 2018-12-13 Jonathan Wakely + PR libstdc++/71044 + * include/bits/fs_path.h (path::path(path&&)): Add noexcept when + appropriate. Move _M_cmpts instead of reparsing the native pathname. + (path::operator=(const path&)): Do not define as defaulted. + (path::operator/=, path::append): Call _M_append. + (path::concat): Call _M_concat. + (path::path(string_type, _Type): Change type of first parameter to + basic_string_view. + (path::_M_append(basic_string_view)): New member function. + (path::_M_concat(basic_string_view)): New member function. + (_S_convert(value_type*, __null_terminated)): Return string view. + (_S_convert(const value_type*, __null_terminated)): Return string view. + (_S_convert(value_type*, value_type*)) + (_S_convert(const value_type*, const value_type*)): Add overloads for + pairs of pointers. + (_S_convert(_InputIterator, __null_terminated)): Construct string_type + explicitly, for cases where _S_convert returns a string view. + (path::_S_is_dir_sep): Replace with non-member is_dir_sep. + (path::_M_trim, path::_M_add_root_name, path::_M_add_root_dir) + (path::_M_add_filename): Remove. + (path::_M_type()): New member function to replace _M_type data member. + (path::_List): Define new struct type instead of using std::vector. + (path::_Cmpt::_Cmpt(string_type, _Type, size_t)): Change type of + first parameter to basic_string_view. + (path::operator+=(const path&)): Do not define inline. + (path::operator+=(const string_type&)): Call _M_concat. + (path::operator+=(const value_type*)): Likewise. + (path::operator+=(value_type)): Likewise. + (path::operator+=(basic_string_view)): Likewise. + (path::operator/=(const path&)): Do not define inline. + (path::_M_append(path)): Remove. + * python/libstdcxx/v6/printers.py (StdPathPrinter): New printer that + understands the new path::_List type. + * src/filesystem/std-path.cc (is_dir_sep): New function to replace + path::_S_is_dir_sep. + (path::_Parser): New helper class to parse strings as paths. + (path::_List::_Impl): Define container type for path components. + (path::_List): Define members. + (path::operator=(const path&)): Define explicitly, to provide the + strong exception safety guarantee. + (path::operator/=(const path&)): Implement manually by processing + each component of the argument, rather than using _M_split_cmpts + to parse the entire string again. + (path::_M_append(string_type)): Likewise. + (path::operator+=(const path&)): Likewise. + (path::_M_concat(string_type)): Likewise. + (path::remove_filename()): Perform trim directly instead of calling + _M_trim(). + (path::_M_split_cmpts()): Rewrite in terms of _Parser class. + (path::_M_trim, path::_M_add_root_name, path::_M_add_root_dir) + (path::_M_add_filename): Remove. + * testsuite/27_io/filesystem/path/append/source.cc: Test appending a + string view that aliases the path. + testsuite/27_io/filesystem/path/concat/strings.cc: Test concatenating + a string view that aliases the path. + * testsuite/27_io/filesystem/path/generation/proximate.cc: Use preferred directory separators for normalized paths. * testsuite/27_io/filesystem/path/generation/relative.cc: Likewise. diff --git a/libstdc++-v3/include/bits/fs_path.h b/libstdc++-v3/include/bits/fs_path.h index 075b3ab5ef8..c69001bcc3c 100644 --- a/libstdc++-v3/include/bits/fs_path.h +++ b/libstdc++-v3/include/bits/fs_path.h @@ -34,7 +34,6 @@ #include #include -#include #include #include #include @@ -45,6 +44,7 @@ #include #include #include +#include #if defined(_WIN32) && !defined(__CYGWIN__) # define _GLIBCXX_FILESYSTEM_IS_WINDOWS 1 @@ -169,12 +169,13 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11 path(const path& __p) = default; - path(path&& __p) noexcept - : _M_pathname(std::move(__p._M_pathname)), _M_type(__p._M_type) - { - _M_split_cmpts(); - __p.clear(); - } + path(path&& __p) +#if _GLIBCXX_USE_CXX11_ABI || _GLIBCXX_FULLY_DYNAMIC_STRING == 0 + noexcept +#endif + : _M_pathname(std::move(__p._M_pathname)), + _M_cmpts(std::move(__p._M_cmpts)) + { __p.clear(); } path(string_type&& __source, format = auto_format) : _M_pathname(std::move(__source)) @@ -213,8 +214,8 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11 // assignments - path& operator=(const path& __p) = default; - path& operator=(path&& __p) noexcept; + path& operator=(const path&); + path& operator=(path&&) noexcept; path& operator=(string_type&& __source); path& assign(string_type&& __source); @@ -240,17 +241,26 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11 template _Path<_Source>& operator/=(_Source const& __source) - { return _M_append(path(__source)); } + { + _M_append(_S_convert(_S_range_begin(__source), _S_range_end(__source))); + return *this; + } template _Path<_Source>& append(_Source const& __source) - { return _M_append(path(__source)); } + { + _M_append(_S_convert(_S_range_begin(__source), _S_range_end(__source))); + return *this; + } template _Path<_InputIterator, _InputIterator>& append(_InputIterator __first, _InputIterator __last) - { return _M_append(path(__first, __last)); } + { + _M_append(_S_convert(__first, __last)); + return *this; + } // concatenation @@ -271,12 +281,18 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11 template _Path<_Source>& concat(_Source const& __x) - { return *this += _S_convert(_S_range_begin(__x), _S_range_end(__x)); } + { + _M_concat(_S_convert(_S_range_begin(__x), _S_range_end(__x))); + return *this; + } template _Path<_InputIterator, _InputIterator>& concat(_InputIterator __first, _InputIterator __last) - { return *this += _S_convert(__first, __last); } + { + _M_concat(_S_convert(__first, __last)); + return *this; + } // modifiers @@ -402,30 +418,41 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11 private: enum class _Type : unsigned char { - _Multi, _Root_name, _Root_dir, _Filename + _Multi = 0, _Root_name, _Root_dir, _Filename }; - path(string_type __str, _Type __type) : _M_pathname(__str), _M_type(__type) + path(basic_string_view __str, _Type __type) + : _M_pathname(__str) { - __glibcxx_assert(_M_type != _Type::_Multi); + __glibcxx_assert(__type != _Type::_Multi); + _M_cmpts.type(__type); } enum class _Split { _Stem, _Extension }; - path& _M_append(path __p); + void _M_append(basic_string_view); + void _M_concat(basic_string_view); pair _M_find_extension() const; template struct _Cvt; - static string_type + static basic_string_view _S_convert(value_type* __src, __null_terminated) - { return string_type(__src); } + { return __src; } - static string_type + static basic_string_view _S_convert(const value_type* __src, __null_terminated) - { return string_type(__src); } + { return __src; } + + static basic_string_view + _S_convert(value_type* __first, value_type* __last) + { return {__first, __last - __first}; } + + static basic_string_view + _S_convert(const value_type* __first, const value_type* __last) + { return {__first, __last - __first}; } template static string_type @@ -440,8 +467,10 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11 static string_type _S_convert(_InputIterator __src, __null_terminated) { + // Read from iterator into basic_string until a null value is seen: auto __s = _S_string_from_iter(__src); - return _S_convert(__s.c_str(), __s.c_str() + __s.size()); + // Convert (if needed) from iterator's value type to path::value_type: + return string_type(_S_convert(__s.data(), __s.data() + __s.size())); } static string_type @@ -469,27 +498,65 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11 static basic_string<_CharT, _Traits, _Allocator> _S_str_convert(const string_type&, const _Allocator& __a); - bool _S_is_dir_sep(value_type __ch) - { -#ifdef _GLIBCXX_FILESYSTEM_IS_WINDOWS - return __ch == L'/' || __ch == preferred_separator; -#else - return __ch == '/'; -#endif - } - void _M_split_cmpts(); - void _M_trim(); - void _M_add_root_name(size_t __n); - void _M_add_root_dir(size_t __pos); - void _M_add_filename(size_t __pos, size_t __n); + + _Type _M_type() const noexcept { return _M_cmpts.type(); } string_type _M_pathname; struct _Cmpt; - using _List = _GLIBCXX_STD_C::vector<_Cmpt>; - _List _M_cmpts; // empty unless _M_type == _Type::_Multi - _Type _M_type = _Type::_Filename; + + struct _List + { + using value_type = _Cmpt; + using iterator = value_type*; + using const_iterator = const value_type*; + + _List(); + _List(const _List&); + _List(_List&&) = default; + _List& operator=(const _List&); + _List& operator=(_List&&) = default; + ~_List() = default; + + _Type type() const noexcept + { return _Type{reinterpret_cast(_M_impl.get()) & 0x3}; } + + void type(_Type) noexcept; + + int size() const noexcept; // zero unless type() == _Type::_Multi + bool empty() const noexcept; // true unless type() == _Type::_Multi + void clear(); + void swap(_List& __l) noexcept { _M_impl.swap(__l._M_impl); } + int capacity() const noexcept; + void reserve(int, bool); ///< @pre type() == _Type::_Multi + + // All the member functions below here have a precondition !empty() + // (and they should only be called from within the library). + + iterator begin(); + iterator end(); + const_iterator begin() const; + const_iterator end() const; + + value_type& front() noexcept; + value_type& back() noexcept; + const value_type& front() const noexcept; + const value_type& back() const noexcept; + + void erase(const_iterator); + void erase(const_iterator, const_iterator); + + struct _Impl; + struct _Impl_deleter + { + void operator()(_Impl*) const noexcept; + }; + unique_ptr<_Impl, _Impl_deleter> _M_impl; + }; + _List _M_cmpts; + + struct _Parser; }; inline void swap(path& __lhs, path& __rhs) noexcept { __lhs.swap(__rhs); } @@ -605,8 +672,8 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11 struct path::_Cmpt : path { - _Cmpt(string_type __s, _Type __t, size_t __pos) - : path(std::move(__s), __t), _M_pos(__pos) { } + _Cmpt(basic_string_view __s, _Type __t, size_t __pos) + : path(__s, __t), _M_pos(__pos) { } _Cmpt() : _M_pos(-1) { } @@ -733,7 +800,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11 private: friend class path; - bool _M_is_multi() const { return _M_path->_M_type == _Type::_Multi; } + bool _M_is_multi() const { return _M_path->_M_type() == _Type::_Multi; } friend difference_type __path_iter_distance(const iterator& __first, const iterator& __last) @@ -785,7 +852,6 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11 { _M_pathname = std::move(__p._M_pathname); _M_cmpts = std::move(__p._M_cmpts); - _M_type = __p._M_type; __p.clear(); return *this; } @@ -798,41 +864,31 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11 path::assign(string_type&& __source) { return *this = path(std::move(__source)); } - inline path& - path::operator+=(const path& __p) - { - return operator+=(__p.native()); - } - inline path& path::operator+=(const string_type& __x) { - _M_pathname += __x; - _M_split_cmpts(); + _M_concat(__x); return *this; } inline path& path::operator+=(const value_type* __x) { - _M_pathname += __x; - _M_split_cmpts(); + _M_concat(__x); return *this; } inline path& path::operator+=(value_type __x) { - _M_pathname += __x; - _M_split_cmpts(); + _M_concat(basic_string_view(&__x, 1)); return *this; } inline path& path::operator+=(basic_string_view __x) { - _M_pathname.append(__x.data(), __x.size()); - _M_split_cmpts(); + _M_concat(__x); return *this; } @@ -858,7 +914,6 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11 { _M_pathname.swap(__rhs._M_pathname); _M_cmpts.swap(__rhs._M_cmpts); - std::swap(_M_type, __rhs._M_type); } template @@ -968,7 +1023,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11 #endif string_type __str(__a); - if (_M_type == _Type::_Root_dir) + if (_M_type() == _Type::_Root_dir) __str.assign(1, __slash); else { @@ -979,7 +1034,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11 if (__add_slash) __str += __slash; __str += __elem._M_pathname; - __add_slash = __elem._M_type == _Type::_Filename; + __add_slash = __elem._M_type() == _Type::_Filename; } } @@ -1026,14 +1081,14 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11 { if (empty()) return {}; - else if (_M_type == _Type::_Filename) + else if (_M_type() == _Type::_Filename) return *this; - else if (_M_type == _Type::_Multi) + else if (_M_type() == _Type::_Multi) { if (_M_pathname.back() == preferred_separator) return {}; auto& __last = *--end(); - if (__last._M_type == _Type::_Filename) + if (__last._M_type() == _Type::_Filename) return __last; } return {}; @@ -1084,7 +1139,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11 inline path::iterator path::begin() const { - if (_M_type == _Type::_Multi) + if (_M_type() == _Type::_Multi) return iterator(this, _M_cmpts.begin()); return iterator(this, empty()); } @@ -1092,48 +1147,16 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11 inline path::iterator path::end() const { - if (_M_type == _Type::_Multi) + if (_M_type() == _Type::_Multi) return iterator(this, _M_cmpts.end()); return iterator(this, true); } -#ifndef _GLIBCXX_FILESYSTEM_IS_WINDOWS - inline path& path::operator/=(const path& __p) - { - // Much simpler than the specification in the standard, - // as any path with root-name or root-dir is absolute. - if (__p.is_absolute()) - operator=(__p); - else - { - if (has_filename() || (_M_type == _Type::_Root_name)) - _M_pathname += preferred_separator; - _M_pathname += __p.native(); - _M_split_cmpts(); - } - return *this; - } -#endif - - inline path& - path::_M_append(path __p) - { - if (__p.is_absolute()) - operator=(std::move(__p)); -#ifdef _GLIBCXX_FILESYSTEM_IS_WINDOWS - else if (__p.has_root_name() && __p.root_name() != root_name()) - operator=(std::move(__p)); -#endif - else - operator/=(const_cast(__p)); - return *this; - } - inline path::iterator& path::iterator::operator++() { __glibcxx_assert(_M_path != nullptr); - if (_M_path->_M_type == _Type::_Multi) + if (_M_path->_M_type() == _Type::_Multi) { __glibcxx_assert(_M_cur != _M_path->_M_cmpts.end()); ++_M_cur; @@ -1150,7 +1173,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11 path::iterator::operator--() { __glibcxx_assert(_M_path != nullptr); - if (_M_path->_M_type == _Type::_Multi) + if (_M_path->_M_type() == _Type::_Multi) { __glibcxx_assert(_M_cur != _M_path->_M_cmpts.begin()); --_M_cur; @@ -1167,7 +1190,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11 path::iterator::operator*() const { __glibcxx_assert(_M_path != nullptr); - if (_M_path->_M_type == _Type::_Multi) + if (_M_path->_M_type() == _Type::_Multi) { __glibcxx_assert(_M_cur != _M_path->_M_cmpts.end()); return *_M_cur; @@ -1182,7 +1205,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11 return false; if (_M_path == nullptr) return true; - if (_M_path->_M_type == path::_Type::_Multi) + if (_M_path->_M_type() == path::_Type::_Multi) return _M_cur == __rhs._M_cur; return _M_at_end == __rhs._M_at_end; } diff --git a/libstdc++-v3/python/libstdcxx/v6/printers.py b/libstdc++-v3/python/libstdcxx/v6/printers.py index 7c3c9d00ef3..2baaf1e8b73 100644 --- a/libstdc++-v3/python/libstdcxx/v6/printers.py +++ b/libstdc++-v3/python/libstdcxx/v6/printers.py @@ -1244,6 +1244,77 @@ class StdExpPathPrinter: def children(self): return self._iterator(self.val['_M_cmpts']) +class StdPathPrinter: + "Print a std::filesystem::path" + + def __init__ (self, typename, val): + self.val = val + self.typename = typename + impl = self.val['_M_cmpts']['_M_impl']['_M_t']['_M_t']['_M_head_impl'] + self.type = impl.cast(gdb.lookup_type('uintptr_t')) & 3 + if self.type == 0: + self.impl = impl + else: + self.impl = None + + def _path_type(self): + t = str(self.type.cast(gdb.lookup_type(self.typename + '::_Type'))) + if t[-9:] == '_Root_dir': + return "root-directory" + if t[-10:] == '_Root_name': + return "root-name" + return None + + def to_string (self): + path = "%s" % self.val ['_M_pathname'] + if self.type != 0: + t = self._path_type() + if t: + path = '%s [%s]' % (path, t) + return "filesystem::path %s" % path + + class _iterator(Iterator): + def __init__(self, impl, pathtype): + if impl: + # We can't access _Impl::_M_size because _Impl is incomplete + # so cast to int* to access the _M_size member at offset zero, + int_type = gdb.lookup_type('int') + cmpt_type = gdb.lookup_type(pathtype+'::_Cmpt') + char_type = gdb.lookup_type('char') + impl = impl.cast(int_type.pointer()) + size = impl.dereference() + #self.capacity = (impl + 1).dereference() + if hasattr(gdb.Type, 'alignof'): + sizeof_Impl = max(2 * int_type.sizeof, cmpt_type.alignof) + else: + sizeof_Impl = 2 * int_type.sizeof + begin = impl.cast(char_type.pointer()) + sizeof_Impl + self.item = begin.cast(cmpt_type.pointer()) + self.finish = self.item + size + self.count = 0 + else: + self.item = None + self.finish = None + + def __iter__(self): + return self + + def __next__(self): + if self.item == self.finish: + raise StopIteration + item = self.item.dereference() + count = self.count + self.count = self.count + 1 + self.item = self.item + 1 + path = item['_M_pathname'] + t = StdPathPrinter(item.type.name, item)._path_type() + if not t: + t = count + return ('[%s]' % t, path) + + def children(self): + return self._iterator(self.impl, self.typename) + class StdPairPrinter: "Print a std::pair object, with 'first' and 'second' as children" @@ -1759,9 +1830,9 @@ def build_libstdcxx_dictionary (): libstdcxx_printer.add_version('std::experimental::filesystem::v1::__cxx11::', 'path', StdExpPathPrinter) libstdcxx_printer.add_version('std::filesystem::', - 'path', StdExpPathPrinter) + 'path', StdPathPrinter) libstdcxx_printer.add_version('std::filesystem::__cxx11::', - 'path', StdExpPathPrinter) + 'path', StdPathPrinter) # C++17 components libstdcxx_printer.add_version('std::', diff --git a/libstdc++-v3/src/filesystem/std-path.cc b/libstdc++-v3/src/filesystem/std-path.cc index 06d882cf4de..e9c78924b8e 100644 --- a/libstdc++-v3/src/filesystem/std-path.cc +++ b/libstdc++-v3/src/filesystem/std-path.cc @@ -27,19 +27,410 @@ #endif #include -#ifdef _GLIBCXX_FILESYSTEM_IS_WINDOWS -# include -#endif +#include +#include namespace fs = std::filesystem; using fs::path; -constexpr path::value_type path::preferred_separator; +static inline bool is_dir_sep(path::value_type ch) +{ +#ifdef _GLIBCXX_FILESYSTEM_IS_WINDOWS + return ch == L'/' || ch == path::preferred_separator; +#else + return ch == '/'; +#endif +} + +struct path::_Parser +{ + using string_view_type = std::basic_string_view; + + struct cmpt + { + string_view_type str; + _Type type = _Type::_Multi; + + bool valid() const { return type != _Type::_Multi; } + }; + + string_view_type input; + string_view_type::size_type pos = 0; + size_t origin; + _Parser(string_view_type s, size_t o = 0) : input(s), origin(o) { } + + pair root_path() noexcept + { + pos = 0; + pair root; + + const size_t len = input.size(); + + // look for root name or root directory + if (is_dir_sep(input[0])) + { +#ifdef __CYGWIN__ + // look for root name, such as "//foo" + if (len > 2 && input[1] == input[0]) + { + if (!is_dir_sep(input[2])) + { + // got root name, find its end + pos = 3; + while (pos < len && !is_dir_sep(input[pos])) + ++pos; + root.first.str = input.substr(0, pos); + root.first.type = _Type::_Root_name; + + if (pos < len) // also got root directory + { + root.second.str = input.substr(pos, 1); + root.second.type = _Type::_Root_dir; + ++pos; + } + } + else + { + // got something like "///foo" which is just a root directory + // composed of multiple redundant directory separators + root.first.str = input.substr(0, 1); + root.first.type = _Type::_Root_dir; + pos += 2; + } + } + else +#endif + { + root.first.str = input.substr(0, 1); + root.first.type = _Type::_Root_dir; + ++pos; + } + // Find the start of the first filename + while (pos < len && is_dir_sep(input[pos])) + ++pos; + } #ifdef _GLIBCXX_FILESYSTEM_IS_WINDOWS + else if (len > 1 && input[1] == L':') + { + // got disk designator + root.first.str = input.substr(0, 2); + root.first.type = _Type::_Root_name; + if (len > 2 && is_dir_sep(input[2])) + { + root.second.str = input.substr(2, 1); + root.second.type = _Type::_Root_dir; + } + pos = input.find_first_not_of(L"/\\", 2); + } +#endif + return root; + } + + cmpt next() noexcept + { +#ifdef _GLIBCXX_FILESYSTEM_IS_WINDOWS + string_view_type sep = L"/\\"; +#else + char sep = '/'; +#endif + + cmpt f; + pos = input.find_first_not_of(sep, pos); + if (pos != input.npos) + { + const auto end = input.find_first_of(sep, pos); + f.str = input.substr(pos, end - pos); + f.type = _Type::_Filename; + pos = end; + } + return f; + } + + string_view_type::size_type + offset(const cmpt& c) const noexcept + { return origin + c.str.data() - input.data(); } +}; + +struct path::_List::_Impl +{ + using value_type = _Cmpt; + + _Impl(int cap) : _M_size(0), _M_capacity(cap) { } + + alignas(value_type) int _M_size; + int _M_capacity; + + using iterator = value_type*; + using const_iterator = const value_type*; + + iterator begin() { return reinterpret_cast(this + 1); } + iterator end() { return begin() + size(); } + + const_iterator begin() const + { return reinterpret_cast(this + 1); } + const_iterator end() const { return begin() + size(); } + + const value_type& front() const { return *begin(); } + const value_type& back() const { return end()[-1]; } + + int size() const { return _M_size; } + int capacity() const { return _M_capacity; } + bool empty() const { return _M_size == 0; } + + void clear() { std::destroy_n(begin(), _M_size); _M_size = 0; } + + void erase(const_iterator cpos) + { + iterator pos = begin() + (cpos - begin()); + if (pos + 1 != end()) + std::move(pos + 1, end(), pos); + pos->~_Cmpt(); + --_M_size; + } + + void erase(const_iterator cfirst, const_iterator clast) + { + iterator first = begin() + (cfirst - begin()); + iterator last = begin() + (clast - begin()); + if (last != end()) + std::move(last, end(), first); + std::destroy(first + (end() - last), end()); + _M_size -= last - first; + } + + unique_ptr<_Impl, _Impl_deleter> copy() const + { + const auto n = size(); + void* p = ::operator new(sizeof(_Impl) + n * sizeof(value_type)); + unique_ptr<_Impl, _Impl_deleter> newptr(::new (p) _Impl{n}); + std::uninitialized_copy_n(begin(), n, newptr->begin()); + newptr->_M_size = n; + return newptr; + } + + // Clear the lowest two bits from the pointer (i.e. remove the _Type value) + static _Impl* notype(_Impl* p) + { + constexpr uintptr_t mask = ~(uintptr_t)0x3; + return reinterpret_cast<_Impl*>(reinterpret_cast(p) & mask); + } +}; + +void path::_List::_Impl_deleter::operator()(_Impl* p) const noexcept +{ + p = _Impl::notype(p); + if (p) + { + __glibcxx_assert(p->_M_size <= p->_M_capacity); + p->clear(); + ::operator delete(p, sizeof(*p) + p->_M_capacity * sizeof(value_type)); + } +} + +path::_List::_List() : _M_impl(reinterpret_cast<_Impl*>(_Type::_Filename)) { } + +path::_List::_List(const _List& other) +{ + if (!other.empty()) + _M_impl = other._M_impl->copy(); + else + type(other.type()); +} + +path::_List& +path::_List::operator=(const _List& other) +{ + if (!other.empty()) + { + // copy in-place if there is capacity + const int newsize = other._M_impl->size(); + auto impl = _Impl::notype(_M_impl.get()); + if (impl && impl->capacity() >= newsize) + { + const int oldsize = impl->_M_size; + auto to = impl->begin(); + auto from = other._M_impl->begin(); + const int minsize = std::min(newsize, oldsize); + for (int i = 0; i < minsize; ++i) + to[i]._M_pathname.reserve(from[i]._M_pathname.length()); + if (newsize > oldsize) + { + std::uninitialized_copy_n(to + oldsize, newsize - oldsize, + from + oldsize); + impl->_M_size = newsize; + } + else if (newsize < oldsize) + impl->erase(impl->begin() + newsize, impl->end()); + std::copy_n(from, minsize, to); + type(_Type::_Multi); + } + else + _M_impl = other._M_impl->copy(); + } + else + { + clear(); + type(other.type()); + } + return *this; +} + +inline void +path::_List::type(_Type t) noexcept +{ + auto val = reinterpret_cast(_Impl::notype(_M_impl.release())); + _M_impl.reset(reinterpret_cast<_Impl*>(val | (unsigned char)t)); +} + +inline int +path::_List::size() const noexcept +{ + if (auto* ptr = _Impl::notype(_M_impl.get())) + return ptr->size(); + return 0; +} + +inline int +path::_List::capacity() const noexcept +{ + if (auto* ptr = _Impl::notype(_M_impl.get())) + return ptr->capacity(); + return 0; +} + +inline bool +path::_List::empty() const noexcept +{ + return size() == 0; +} + +inline auto +path::_List::begin() noexcept +-> iterator +{ + __glibcxx_assert(!empty()); + if (auto* ptr = _Impl::notype(_M_impl.get())) + return ptr->begin(); + return nullptr; +} + +inline auto +path::_List::end() noexcept +-> iterator +{ + __glibcxx_assert(!empty()); + if (auto* ptr = _Impl::notype(_M_impl.get())) + return ptr->end(); + return nullptr; +} + +auto +path::_List::begin() const noexcept +-> const_iterator +{ + __glibcxx_assert(!empty()); + if (auto* ptr = _Impl::notype(_M_impl.get())) + return ptr->begin(); + return nullptr; +} + +auto +path::_List::end() const noexcept +-> const_iterator +{ + __glibcxx_assert(!empty()); + if (auto* ptr = _Impl::notype(_M_impl.get())) + return ptr->end(); + return nullptr; +} + +inline auto +path::_List::front() noexcept +-> value_type& +{ + return *_M_impl->begin(); +} + +inline auto +path::_List::back() noexcept +-> value_type& +{ + return _M_impl->begin()[_M_impl->size() - 1]; +} + +inline auto +path::_List::front() const noexcept +-> const value_type& +{ + return *_M_impl->begin(); +} + +inline auto +path::_List::back() const noexcept +-> const value_type& +{ + return _M_impl->begin()[_M_impl->size() - 1]; +} + +inline void +path::_List::erase(const_iterator pos) +{ + _M_impl->erase(pos); +} + +inline void +path::_List::erase(const_iterator first, const_iterator last) +{ + _M_impl->erase(first, last); +} + +inline void +path::_List::clear() +{ + if (auto ptr = _Impl::notype(_M_impl.get())) + ptr->clear(); +} + +void +path::_List::reserve(int newcap, bool exact = false) +{ + // __glibcxx_assert(type() == _Type::_Multi); + + _Impl* curptr = _Impl::notype(_M_impl.get()); + + int curcap = curptr ? curptr->capacity() : 0; + + if (curcap < newcap) + { + if (!exact && newcap < int(1.5 * curcap)) + newcap = 1.5 * curcap; + + void* p = ::operator new(sizeof(_Impl) + newcap * sizeof(value_type)); + std::unique_ptr<_Impl, _Impl_deleter> newptr(::new(p) _Impl{newcap}); + const int cursize = curptr ? curptr->size() : 0; + if (cursize) + { + std::uninitialized_move_n(curptr->begin(), cursize, newptr->begin()); + newptr->_M_size = cursize; + } + std::swap(newptr, _M_impl); + } +} + +path& +path::operator=(const path& p) +{ + _M_pathname.reserve(p._M_pathname.length()); + _M_cmpts = p._M_cmpts; // might throw + _M_pathname = p._M_pathname; // won't throw because we reserved enough space + return *this; +} + path& path::operator/=(const path& __p) { +#ifdef _GLIBCXX_FILESYSTEM_IS_WINDOWS if (__p.is_absolute() || (__p.has_root_name() && __p.root_name() != root_name())) return operator=(__p); @@ -50,10 +441,10 @@ path::operator/=(const path& __p) if (__p.has_root_directory()) { // Remove any root directory and relative path - if (_M_type != _Type::_Root_name) + if (_M_type() != _Type::_Root_name) { if (!_M_cmpts.empty() - && _M_cmpts.front()._M_type == _Type::_Root_name) + && _M_cmpts.front()._M_type() == _Type::_Root_name) __lhs = _M_cmpts.front()._M_pathname; else __lhs = {}; @@ -64,14 +455,14 @@ path::operator/=(const path& __p) basic_string_view __rhs = __p._M_pathname; // Omit any root-name from the generic format pathname: - if (__p._M_type == _Type::_Root_name) + if (__p._M_type() == _Type::_Root_name) __rhs = {}; else if (!__p._M_cmpts.empty() - && __p._M_cmpts.front()._M_type == _Type::_Root_name) + && __p._M_cmpts.front()._M_type() == _Type::_Root_name) __rhs.remove_prefix(__p._M_cmpts.front()._M_pathname.size()); const size_t __len = __lhs.size() + (int)__add_sep + __rhs.size(); - const size_t __maxcmpts = _M_cmpts.size() + __p._M_cmpts.size(); + const int __maxcmpts = _M_cmpts.size() + __p._M_cmpts.size(); if (_M_pathname.capacity() < __len || _M_cmpts.capacity() < __maxcmpts) { // Construct new path and swap (strong exception-safety guarantee). @@ -90,36 +481,688 @@ path::operator/=(const path& __p) if (__add_sep) _M_pathname += preferred_separator; _M_pathname += __rhs; - _M_split_cmpts(); + __try + { + _M_split_cmpts(); + } + __catch (...) + { + __try + { + // try to restore original state + _M_pathname.resize(__lhs.length()); + _M_split_cmpts(); + } + __catch (...) + { + // give up, basic exception safety guarantee only: + clear(); + __throw_exception_again; + } + } } +#else + // POSIX version is simpler than the specification in the standard, + // as any path with root-name or root-dir is absolute. + + if (__p.is_absolute() || this->empty()) + { + return operator=(__p); + } + + using string_view_type = basic_string_view; + + string_view_type sep; + if (has_filename()) + sep = { &preferred_separator, 1 }; // need to add a separator +#ifdef __CYGWIN__ + else if (_M_type() == _Type::_Root_name) // root-name with no root-dir + sep = { &preferred_separator, 1 }; // need to add a separator +#endif + else if (__p.empty()) + return *this; // nothing to do + + const auto orig_pathlen = _M_pathname.length(); + const auto orig_size = _M_cmpts.size(); + const auto orig_type = _M_type(); + + int capacity = 0; + if (_M_type() == _Type::_Multi) + capacity += _M_cmpts.size(); + else if (!empty()) + capacity += 1; + if (__p._M_type() == _Type::_Multi) + capacity += __p._M_cmpts.size(); + else if (!__p.empty() || !sep.empty()) + capacity += 1; + + if (orig_type == _Type::_Multi) + { + const int curcap = _M_cmpts._M_impl->capacity(); + if (capacity > curcap) + capacity = std::max(capacity, (int) (curcap * 1.5)); + } + + _M_pathname.reserve(_M_pathname.length() + sep.length() + + __p._M_pathname.length()); + + __try + { + _M_pathname += sep; + const auto basepos = _M_pathname.length(); + _M_pathname += __p.native(); + + _M_cmpts.type(_Type::_Multi); + _M_cmpts.reserve(capacity); + _Cmpt* output = _M_cmpts._M_impl->end(); + + if (orig_type == _Type::_Multi) + { + // Remove empty final component + if (_M_cmpts._M_impl->back().empty()) + _M_cmpts._M_impl->erase(--output); + } + else if (orig_pathlen != 0) + { + // Create single component from original path + string_view_type s(_M_pathname.data(), orig_pathlen); + ::new(output++) _Cmpt(s, orig_type, 0); + ++_M_cmpts._M_impl->_M_size; + } + + if (__p._M_type() == _Type::_Multi) + { + for (auto& c : *__p._M_cmpts._M_impl) + { + ::new(output++) _Cmpt(c._M_pathname, _Type::_Filename, + c._M_pos + basepos); + ++_M_cmpts._M_impl->_M_size; + } + } + else if (!__p.empty() || !sep.empty()) + { + __glibcxx_assert(__p._M_type() == _Type::_Filename); + ::new(output) _Cmpt(__p._M_pathname, __p._M_type(), basepos); + ++_M_cmpts._M_impl->_M_size; + } + } + __catch (...) + { + _M_pathname.resize(orig_pathlen); + if (orig_type == _Type::_Multi) + _M_cmpts.erase(_M_cmpts.begin() + orig_size, _M_cmpts.end()); + else + _M_cmpts.clear(); + _M_cmpts.type(orig_type); + __throw_exception_again; + } +#endif return *this; } + +// [fs.path.append] +void +path::_M_append(basic_string_view s) +{ + _Parser parser(s); + auto root_path = parser.root_path(); + +#ifdef _GLIBCXX_FILESYSTEM_IS_WINDOWS + bool is_absolute = root_path.second.type == _Type::_Root_dir; + bool has_root_name = root_path.first.type == _Type::_Root_name; + if (is_absolute || (has_root_name && root_path.first.str != root_name())) + { + operator=(s); + return; + } + + basic_string_view lhs = _M_pathname; + bool add_sep = false; + + bool has_root_directory = root_path.first.type == _Type::_Root_dir + || root_path.second.type == _Type::_Root_dir; + + if (has_root_directory) + { + // Remove any root directory and relative path + if (_M_type() != _Type::_Root_name) + { + if (!_M_cmpts.empty() + && _M_cmpts.front()._M_type() == _Type::_Root_name) + lhs = _M_cmpts.front()._M_pathname; + else + lhs = {}; + } + } + else if (has_filename() || (!has_root_directory && is_absolute)) + add_sep = true; + + basic_string_view rhs = s; + // Omit any root-name from the generic format pathname: + if (has_root_name) + rhs.remove_prefix(root_path.first.str.length()); + + // Construct new path and swap (strong exception-safety guarantee). + string_type tmp; + tmp.reserve(lhs.size() + (int)add_sep + rhs.size()); + tmp = lhs; + if (add_sep) + tmp += preferred_separator; + tmp += rhs; + path newp = std::move(tmp); + swap(newp); +#else + + bool is_absolute = root_path.first.type == _Type::_Root_dir + || root_path.second.type == _Type::_Root_dir; + if (is_absolute || this->empty()) + { + operator=(s); + return; + } + + const auto orig_pathlen = _M_pathname.length(); + const auto orig_size = _M_cmpts.size(); + const auto orig_type = _M_type(); + + basic_string_view sep; + if (has_filename()) + sep = { &preferred_separator, 1 }; // need to add a separator +#ifdef __CYGWIN__ + else if (_M_type() == _Type::_Root_name) // root-name with no root-dir + sep = { &preferred_separator, 1 }; // need to add a separator +#endif + else if (s.empty()) + return; // nothing to do + + // Copy the input into _M_pathname: + _M_pathname += s; + _M_pathname.insert(orig_pathlen, sep); + // Update s to refer to the new copy (this ensures s is not a dangling + // reference to deallocated characters, in the case where it was referring + // into _M_pathname or a member of _M_cmpts). + s = _M_pathname; + const auto orig_pathname = s.substr(0, orig_pathlen); + s.remove_prefix(orig_pathlen + sep.length()); + + parser.input = s; // reset parser to use updated string view + const auto basepos = orig_pathname.length() + sep.length(); + parser.origin = basepos; + + std::array<_Parser::cmpt, 64> buf; + auto next = buf.begin(); + + int capacity = 0; + if (_M_type() == _Type::_Multi) + capacity += _M_cmpts.size(); + else if (!empty()) + capacity += 1; + + auto cmpt = parser.next(); + if (cmpt.valid()) + { + do + { + *next++ = cmpt; + cmpt = parser.next(); + } + while (cmpt.valid() && next != buf.end()); + + capacity += next - buf.begin(); + if (cmpt.valid()) // filled buffer before parsing whole input + { + ++capacity; + _Parser parser2(parser); + while (parser2.next().valid()) + ++capacity; + } + + if (s.back() == '/') + ++capacity; + } + else if (!sep.empty()) + ++capacity; + + __try + { + _M_cmpts.type(_Type::_Multi); + _M_cmpts.reserve(capacity); + _Cmpt* output = _M_cmpts._M_impl->end(); + + if (orig_type == _Type::_Multi) + { + // Remove empty final component + if (_M_cmpts._M_impl->back().empty()) + _M_cmpts._M_impl->erase(--output); + } + else if (orig_pathlen != 0) + { + // Create single component from original path + ::new(output++) _Cmpt(orig_pathname, orig_type, 0); + ++_M_cmpts._M_impl->_M_size; + } + + if (next != buf.begin()) + { + for (auto it = buf.begin(); it != next; ++it) + { + auto c = *it; + ::new(output++) _Cmpt(c.str, c.type, parser.offset(c)); + ++_M_cmpts._M_impl->_M_size; + } + for (auto c = parser.next(); c.valid(); c = parser.next()) + { + ::new(output++) _Cmpt(c.str, c.type, parser.offset(c)); + ++_M_cmpts._M_impl->_M_size; + } + + if (s.back() == '/') + { + ::new(output++) _Cmpt({}, _Type::_Filename, _M_pathname.length()); + ++_M_cmpts._M_impl->_M_size; + } + } + else if (!sep.empty()) + { + // Empty filename at the end: + ::new(output) _Cmpt({}, _Type::_Filename, basepos); + ++_M_cmpts._M_impl->_M_size; + } + } + __catch (...) + { + _M_pathname.resize(orig_pathlen); + if (orig_type == _Type::_Multi) + _M_cmpts.erase(_M_cmpts.begin() + orig_size, _M_cmpts.end()); + else + _M_cmpts.clear(); + _M_cmpts.type(orig_type); + __throw_exception_again; + } +#endif +} + +// [fs.path.concat] +path& +path::operator+=(const path& p) +{ + if (p.empty()) + return *this; + + if (this->empty()) + { + operator=(p); + return *this; + } + + const auto orig_pathlen = _M_pathname.length(); + const auto orig_type = _M_type(); + const auto orig_size = _M_cmpts.size(); + int orig_filenamelen = -1; + basic_string_view extra; + + // Ensure that '_M_pathname += p._M_pathname' won't throw: + _M_pathname.reserve(orig_pathlen + p._M_pathname.length()); + + _Cmpt c; + _Cmpt* it = nullptr; + _Cmpt* last = nullptr; + if (p._M_type() == _Type::_Multi) + { + it = p._M_cmpts._M_impl->begin(); + last = p._M_cmpts._M_impl->end(); + } + else + { + c = _Cmpt(p._M_pathname, p._M_type(), 0); + it = &c; + last = it + 1; + } + + if (it->_M_type() == _Type::_Filename) + { + // See if there's a filename or root-name at the end of the original path + // that we can add to. + if (_M_type() == _Type::_Filename) + { + if (p._M_type() == _Type::_Filename) + { + // Simplest case where we just add the whole of p to the + // original path. + _M_pathname += p._M_pathname; + return *this; + } + // Only the first component of s should be appended, do so below: + extra = it->_M_pathname; + ++it; + } + else if (_M_type() == _Type::_Multi + && _M_cmpts.back()._M_type() == _Type::_Filename) + { + auto& back = _M_cmpts.back(); + if (p._M_type() == _Type::_Filename) + { + basic_string_view s = p._M_pathname; + back._M_pathname += s; + _M_pathname += s; + return *this; + } + + orig_filenamelen = back._M_pathname.length(); + back._M_pathname += it->_M_pathname; + extra = it->_M_pathname; + ++it; + } + } + else if (is_dir_sep(_M_pathname.back()) && _M_type() == _Type::_Multi + && _M_cmpts.back()._M_type() == _Type::_Filename) + orig_filenamelen = 0; // current path has empty filename at end + + // TODO handle "//rootname" + "foo" case for Cygwin. + + int capacity = 0; + if (_M_type() == _Type::_Multi) + capacity += _M_cmpts.size(); + else + capacity += 1; + if (p._M_type() == _Type::_Multi) + capacity += p._M_cmpts.size(); + else + capacity += 1; + + __try + { + _M_cmpts.type(_Type::_Multi); + _M_cmpts.reserve(capacity); + _Cmpt* output = _M_cmpts._M_impl->end(); + + if (orig_type != _Type::_Multi) + { + // Create single component from original path + auto ptr = ::new(output++) _Cmpt({}, orig_type, 0); + ++_M_cmpts._M_impl->_M_size; + ptr->_M_pathname.reserve(_M_pathname.length() + extra.length()); + ptr->_M_pathname = _M_pathname; + ptr->_M_pathname += extra; + } + else if (orig_filenamelen == 0 && it != last) + { + // Remove empty filename at end of original path. + _M_cmpts.erase(std::prev(output)); + } + + if (it != last && it->_M_type() == _Type::_Root_name) + { + basic_string_view s = it->_M_pathname; + auto pos = orig_pathlen; +#ifdef __CYGWIN__ + s.remove_prefix(2); + pos += 2; #endif + ::new(output++) _Cmpt(s, _Type::_Filename, pos); + ++_M_cmpts._M_impl->_M_size; + ++it; + } + + if (it != last && it->_M_type() == _Type::_Root_dir) + { + ++it; + if (it == last) + { + // This root-dir becomes a trailing slash + auto pos = _M_pathname.length() + p._M_pathname.length(); + ::new(output++) _Cmpt({}, _Type::_Filename, pos); + ++_M_cmpts._M_impl->_M_size; + } + } + + while (it != last) + { + auto pos = it->_M_pos + orig_pathlen; + ::new(output++) _Cmpt(it->_M_pathname, _Type::_Filename, pos); + ++_M_cmpts._M_impl->_M_size; + ++it; + } + + _M_pathname += p._M_pathname; + + if (is_dir_sep(_M_pathname.back())) + { + ::new(output++) _Cmpt({}, _Type::_Filename, _M_pathname.length()); + ++_M_cmpts._M_impl->_M_size; + } + } + __catch (...) + { + _M_pathname.resize(orig_pathlen); + if (orig_type == _Type::_Multi) + { + if (_M_cmpts.size() > orig_size) + _M_cmpts.erase(_M_cmpts.begin() + orig_size, _M_cmpts.end()); + if (orig_filenamelen != -1) + { + if (_M_cmpts.size() == orig_size) + { + auto& back = _M_cmpts.back(); + back._M_pathname.resize(orig_filenamelen); + if (orig_filenamelen == 0) + back._M_pos = orig_pathlen; + } + else + { + auto output = _M_cmpts._M_impl->end(); + ::new(output) _Cmpt({}, _Type::_Filename, orig_pathlen); + ++_M_cmpts._M_impl->_M_size; + } + } + } + else + _M_cmpts.clear(); + _M_cmpts.type(orig_type); + __throw_exception_again; + } + return *this; +} + +// [fs.path.concat] +void +path::_M_concat(basic_string_view s) +{ + if (s.empty()) + return; + + if (this->empty()) + { + operator=(s); + return; + } + + const auto orig_pathlen = _M_pathname.length(); + const auto orig_type = _M_type(); + const auto orig_size = _M_cmpts.size(); + int orig_filenamelen = -1; + basic_string_view extra; + + // Copy the input into _M_pathname: + _M_pathname += s; + // Update s to refer to the new copy (this ensures s is not a dangling + // reference to deallocated characters, in the case where it was referring + // into _M_pathname or a member of _M_cmpts). + s = _M_pathname; + const auto orig_pathname = s.substr(0, orig_pathlen); + s.remove_prefix(orig_pathlen); + + _Parser parser(s, orig_pathlen); + auto cmpt = parser.next(); + + if (cmpt.str.data() == s.data()) + { + // See if there's a filename or root-name at the end of the original path + // that we can add to. + if (_M_type() == _Type::_Filename) + { + if (cmpt.str.length() == s.length()) + { + // Simplest case where we just need to add the whole of s + // to the original path, which was already done above. + return; + } + // Only the first component of s should be appended, do so below: + extra = cmpt.str; + cmpt = {}; // so we don't process it again + } + else if (_M_type() == _Type::_Multi + && _M_cmpts.back()._M_type() == _Type::_Filename) + { + auto& back = _M_cmpts.back(); + if (cmpt.str.length() == s.length()) + { + back._M_pathname += s; + return; + } + + orig_filenamelen = back._M_pathname.length(); + back._M_pathname += cmpt.str; + extra = cmpt.str; + cmpt = {}; + } + } + else if (is_dir_sep(orig_pathname.back()) && _M_type() == _Type::_Multi + && _M_cmpts.back()._M_type() == _Type::_Filename) + orig_filenamelen = 0; // original path had empty filename at end + + + // TODO handle "//rootname" + "foo" case for Cygwin. + + std::array<_Parser::cmpt, 64> buf; + auto next = buf.begin(); + + if (cmpt.valid()) + *next++ = cmpt; + + cmpt = parser.next(); + while (cmpt.valid() && next != buf.end()) + { + *next++ = cmpt; + cmpt = parser.next(); + } + + int capacity = 0; + if (_M_type() == _Type::_Multi) + capacity += _M_cmpts.size(); + else + capacity += 1; + + capacity += next - buf.begin(); + + if (cmpt.valid()) // filled buffer before parsing whole input + { + ++capacity; + _Parser parser2(parser); + while (parser2.next().valid()) + ++capacity; + } + if (is_dir_sep(s.back())) + ++capacity; + + __try + { + _M_cmpts.type(_Type::_Multi); + _M_cmpts.reserve(capacity); + _Cmpt* output = _M_cmpts._M_impl->end(); + auto it = buf.begin(); + + if (orig_type != _Type::_Multi) + { + // Create single component from original path + auto p = ::new(output++) _Cmpt({}, orig_type, 0); + ++_M_cmpts._M_impl->_M_size; + p->_M_pathname.reserve(orig_pathname.length() + extra.length()); + p->_M_pathname = orig_pathname; + p->_M_pathname += extra; + } + else if (orig_filenamelen == 0) + { + // Replace empty filename at end of original path. + std::prev(output)->_M_pathname = it->str; + std::prev(output)->_M_pos = parser.offset(*it); + ++it; + } + + while (it != next) + { + ::new(output++) _Cmpt(it->str, _Type::_Filename, parser.offset(*it)); + ++_M_cmpts._M_impl->_M_size; + ++it; + } + + if (next == buf.end()) + { + while (cmpt.valid()) + { + auto pos = parser.offset(cmpt); + ::new(output++) _Cmpt(cmpt.str, _Type::_Filename, pos); + ++_M_cmpts._M_impl->_M_size; + cmpt = parser.next(); + } + } + + if (is_dir_sep(s.back())) + { + // Empty filename at the end: + ::new(output++) _Cmpt({}, _Type::_Filename, _M_pathname.length()); + ++_M_cmpts._M_impl->_M_size; + } + } + __catch (...) + { + _M_pathname.resize(orig_pathlen); + if (orig_type == _Type::_Multi) + { + _M_cmpts.erase(_M_cmpts.begin() + orig_size, _M_cmpts.end()); + if (orig_filenamelen != -1) + { + auto& back = _M_cmpts.back(); + back._M_pathname.resize(orig_filenamelen); + if (orig_filenamelen == 0) + back._M_pos = orig_pathlen; + } + } + else + _M_cmpts.clear(); + _M_cmpts.type(orig_type); + __throw_exception_again; + } +} path& path::remove_filename() { - if (_M_type == _Type::_Multi) + if (_M_type() == _Type::_Multi) { if (!_M_cmpts.empty()) { auto cmpt = std::prev(_M_cmpts.end()); - if (cmpt->_M_type == _Type::_Filename && !cmpt->empty()) + if (cmpt->_M_type() == _Type::_Filename && !cmpt->empty()) { _M_pathname.erase(cmpt->_M_pos); auto prev = std::prev(cmpt); - if (prev->_M_type == _Type::_Root_dir - || prev->_M_type == _Type::_Root_name) + if (prev->_M_type() == _Type::_Root_dir + || prev->_M_type() == _Type::_Root_name) { _M_cmpts.erase(cmpt); - _M_trim(); + if (_M_cmpts.size() == 1) + { + _M_cmpts.type(_M_cmpts.front()._M_type()); + _M_cmpts.clear(); + } } else cmpt->clear(); } } } - else if (_M_type == _Type::_Filename) + else if (_M_type() == _Type::_Filename) clear(); return *this; } @@ -201,15 +1244,15 @@ path::compare(const path& p) const noexcept if (empty() && p.empty()) return 0; - else if (_M_type == _Type::_Multi && p._M_type == _Type::_Multi) + else if (_M_type() == _Type::_Multi && p._M_type() == _Type::_Multi) return do_compare(_M_cmpts.begin(), _M_cmpts.end(), p._M_cmpts.begin(), p._M_cmpts.end()); - else if (_M_type == _Type::_Multi) + else if (_M_type() == _Type::_Multi) { CmptRef c[1] = { { &p } }; return do_compare(_M_cmpts.begin(), _M_cmpts.end(), c, c+1); } - else if (p._M_type == _Type::_Multi) + else if (p._M_type() == _Type::_Multi) { CmptRef c[1] = { { this } }; return do_compare(c, c+1, p._M_cmpts.begin(), p._M_cmpts.end()); @@ -222,9 +1265,9 @@ path path::root_name() const { path __ret; - if (_M_type == _Type::_Root_name) + if (_M_type() == _Type::_Root_name) __ret = *this; - else if (_M_cmpts.size() && _M_cmpts.begin()->_M_type == _Type::_Root_name) + else if (_M_cmpts.size() && _M_cmpts.begin()->_M_type() == _Type::_Root_name) __ret = *_M_cmpts.begin(); return __ret; } @@ -233,17 +1276,17 @@ path path::root_directory() const { path __ret; - if (_M_type == _Type::_Root_dir) + if (_M_type() == _Type::_Root_dir) { - __ret._M_type = _Type::_Root_dir; + __ret._M_cmpts.type(_Type::_Root_dir); __ret._M_pathname.assign(1, preferred_separator); } else if (!_M_cmpts.empty()) { auto __it = _M_cmpts.begin(); - if (__it->_M_type == _Type::_Root_name) + if (__it->_M_type() == _Type::_Root_name) ++__it; - if (__it != _M_cmpts.end() && __it->_M_type == _Type::_Root_dir) + if (__it != _M_cmpts.end() && __it->_M_type() == _Type::_Root_dir) __ret = *__it; } return __ret; @@ -253,23 +1296,23 @@ path path::root_path() const { path __ret; - if (_M_type == _Type::_Root_name) + if (_M_type() == _Type::_Root_name) __ret = *this; - else if (_M_type == _Type::_Root_dir) + else if (_M_type() == _Type::_Root_dir) { __ret._M_pathname.assign(1, preferred_separator); - __ret._M_type = _Type::_Root_dir; + __ret._M_cmpts.type(_Type::_Root_dir); } else if (!_M_cmpts.empty()) { auto __it = _M_cmpts.begin(); - if (__it->_M_type == _Type::_Root_name) + if (__it->_M_type() == _Type::_Root_name) { __ret = *__it++; - if (__it != _M_cmpts.end() && __it->_M_type == _Type::_Root_dir) + if (__it != _M_cmpts.end() && __it->_M_type() == _Type::_Root_dir) __ret /= *__it; } - else if (__it->_M_type == _Type::_Root_dir) + else if (__it->_M_type() == _Type::_Root_dir) __ret = *__it; } return __ret; @@ -279,14 +1322,14 @@ path path::relative_path() const { path __ret; - if (_M_type == _Type::_Filename) + if (_M_type() == _Type::_Filename) __ret = *this; else if (!_M_cmpts.empty()) { auto __it = _M_cmpts.begin(); - if (__it->_M_type == _Type::_Root_name) + if (__it->_M_type() == _Type::_Root_name) ++__it; - if (__it != _M_cmpts.end() && __it->_M_type == _Type::_Root_dir) + if (__it != _M_cmpts.end() && __it->_M_type() == _Type::_Root_dir) ++__it; if (__it != _M_cmpts.end()) __ret.assign(_M_pathname.substr(__it->_M_pos)); @@ -314,9 +1357,9 @@ path::parent_path() const bool path::has_root_name() const { - if (_M_type == _Type::_Root_name) + if (_M_type() == _Type::_Root_name) return true; - if (!_M_cmpts.empty() && _M_cmpts.begin()->_M_type == _Type::_Root_name) + if (!_M_cmpts.empty() && _M_cmpts.begin()->_M_type() == _Type::_Root_name) return true; return false; } @@ -324,14 +1367,14 @@ path::has_root_name() const bool path::has_root_directory() const { - if (_M_type == _Type::_Root_dir) + if (_M_type() == _Type::_Root_dir) return true; if (!_M_cmpts.empty()) { auto __it = _M_cmpts.begin(); - if (__it->_M_type == _Type::_Root_name) + if (__it->_M_type() == _Type::_Root_name) ++__it; - if (__it != _M_cmpts.end() && __it->_M_type == _Type::_Root_dir) + if (__it != _M_cmpts.end() && __it->_M_type() == _Type::_Root_dir) return true; } return false; @@ -340,11 +1383,11 @@ path::has_root_directory() const bool path::has_root_path() const { - if (_M_type == _Type::_Root_name || _M_type == _Type::_Root_dir) + if (_M_type() == _Type::_Root_name || _M_type() == _Type::_Root_dir) return true; if (!_M_cmpts.empty()) { - auto __type = _M_cmpts.front()._M_type; + auto __type = _M_cmpts.front()._M_type(); if (__type == _Type::_Root_name || __type == _Type::_Root_dir) return true; } @@ -354,14 +1397,14 @@ path::has_root_path() const bool path::has_relative_path() const { - if (_M_type == _Type::_Filename && !_M_pathname.empty()) + if (_M_type() == _Type::_Filename && !_M_pathname.empty()) return true; if (!_M_cmpts.empty()) { auto __it = _M_cmpts.begin(); - if (__it->_M_type == _Type::_Root_name) + if (__it->_M_type() == _Type::_Root_name) ++__it; - if (__it != _M_cmpts.end() && __it->_M_type == _Type::_Root_dir) + if (__it != _M_cmpts.end() && __it->_M_type() == _Type::_Root_dir) ++__it; if (__it != _M_cmpts.end() && !__it->_M_pathname.empty()) return true; @@ -383,9 +1426,9 @@ path::has_filename() const { if (empty()) return false; - if (_M_type == _Type::_Filename) + if (_M_type() == _Type::_Filename) return !_M_pathname.empty(); - if (_M_type == _Type::_Multi) + if (_M_type() == _Type::_Multi) { if (_M_pathname.back() == preferred_separator) return false; @@ -436,7 +1479,7 @@ path::lexically_normal() const { #ifdef _GLIBCXX_FILESYSTEM_IS_WINDOWS // Replace each slash character in the root-name - if (p._M_type == _Type::_Root_name || p._M_type == _Type::_Root_dir) + if (p._M_type() == _Type::_Root_name || p._M_type() == _Type::_Root_dir) { string_type s = p.native(); std::replace(s.begin(), s.end(), L'/', L'\\'); @@ -477,7 +1520,7 @@ path::lexically_normal() const { ret._M_pathname.erase(elem._M_cur->_M_pos); // Do we still have a trailing slash? - if (std::prev(elem)->_M_type == _Type::_Filename) + if (std::prev(elem)->_M_type() == _Type::_Filename) ret._M_cmpts.erase(elem._M_cur); else ret._M_cmpts.erase(elem._M_cur, ret._M_cmpts.end()); @@ -560,12 +1603,12 @@ path::_M_find_extension() const { const string_type* s = nullptr; - if (_M_type == _Type::_Filename) + if (_M_type() == _Type::_Filename) s = &_M_pathname; - else if (_M_type == _Type::_Multi && !_M_cmpts.empty()) + else if (_M_type() == _Type::_Multi && !_M_cmpts.empty()) { const auto& c = _M_cmpts.back(); - if (c._M_type == _Type::_Filename) + if (c._M_type() == _Type::_Filename) s = &c._M_pathname; } @@ -586,120 +1629,88 @@ void path::_M_split_cmpts() { _M_cmpts.clear(); + if (_M_pathname.empty()) { - _M_type = _Type::_Filename; + _M_cmpts.type(_Type::_Filename); + return; + } + if (_M_pathname.length() == 1 && _M_pathname[0] == preferred_separator) + { + _M_cmpts.type(_Type::_Root_dir); return; } - _M_type = _Type::_Multi; - size_t pos = 0; - const size_t len = _M_pathname.size(); + _Parser parser(_M_pathname); + + std::array<_Parser::cmpt, 64> buf; + auto next = buf.begin(); // look for root name or root directory - if (_S_is_dir_sep(_M_pathname[0])) + auto root_path = parser.root_path(); + if (root_path.first.valid()) { -#ifdef __CYGWIN__ - // look for root name, such as "//foo" - if (len > 2 && _M_pathname[1] == _M_pathname[0]) + *next++ = root_path.first; + if (root_path.second.valid()) + *next++ = root_path.second; + } + + bool got_at_least_one_filename = false; + + auto cmpt = parser.next(); + while (cmpt.valid()) + { + got_at_least_one_filename = true; + do { - if (!_S_is_dir_sep(_M_pathname[2])) - { - // got root name, find its end - pos = 3; - while (pos < len && !_S_is_dir_sep(_M_pathname[pos])) - ++pos; - _M_add_root_name(pos); - if (pos < len) // also got root directory - _M_add_root_dir(pos); - } - else - { - // got something like "///foo" which is just a root directory - // composed of multiple redundant directory separators - _M_add_root_dir(0); - } + *next++ = cmpt; + cmpt = parser.next(); } - else -#endif - { - // got root directory - if (_M_pathname.find_first_not_of('/') == string_type::npos) + while (cmpt.valid() && next != buf.end()); + + if (next == buf.end()) + { + _M_cmpts.type(_Type::_Multi); + _M_cmpts.reserve(_M_cmpts.size() + buf.size()); + auto output = _M_cmpts._M_impl->end(); + for (auto& c : buf) { - // entire path is just slashes - _M_type = _Type::_Root_dir; - return; + auto pos = c.str.data() - _M_pathname.data(); + ::new(output++) _Cmpt(c.str, c.type, pos); + ++_M_cmpts._M_impl->_M_size; } - _M_add_root_dir(0); - ++pos; + next = buf.begin(); } } -#ifdef _GLIBCXX_FILESYSTEM_IS_WINDOWS - else if (len > 1 && _M_pathname[1] == L':') + + // [fs.path.itr]/4 + // An empty element, if trailing non-root directory-separator present. + if (got_at_least_one_filename && is_dir_sep(_M_pathname.back())) { - // got disk designator - _M_add_root_name(2); - if (len > 2 && _S_is_dir_sep(_M_pathname[2])) - _M_add_root_dir(2); - pos = 2; + next->str = { _M_pathname.data() + _M_pathname.length(), 0 }; + next->type = _Type::_Filename; + ++next; } -#endif - size_t back = pos; - while (pos < len) + if (auto n = next - buf.begin()) { - if (_S_is_dir_sep(_M_pathname[pos])) + if (n == 1 && _M_cmpts.empty()) { - if (back != pos) - _M_add_filename(back, pos - back); - back = ++pos; + _M_cmpts.type(buf.front().type); + return; } - else - ++pos; - } - if (back != pos) - _M_add_filename(back, pos - back); - else if (_S_is_dir_sep(_M_pathname.back())) - { - // [fs.path.itr]/4 - // An empty element, if trailing non-root directory-separator present. - if (_M_cmpts.back()._M_type == _Type::_Filename) + _M_cmpts.type(_Type::_Multi); + _M_cmpts.reserve(_M_cmpts.size() + n, true); + auto output = _M_cmpts._M_impl->end(); + for (int i = 0; i < n; ++i) { - pos = _M_pathname.size(); - _M_cmpts.emplace_back(string_type(), _Type::_Filename, pos); + auto c = buf[i]; + auto pos = c.str.data() - _M_pathname.data(); + ::new(output++) _Cmpt(c.str, c.type, pos); + ++_M_cmpts._M_impl->_M_size; } } - - _M_trim(); -} - -void -path::_M_add_root_name(size_t n) -{ - _M_cmpts.emplace_back(_M_pathname.substr(0, n), _Type::_Root_name, 0); -} - -void -path::_M_add_root_dir(size_t pos) -{ - _M_cmpts.emplace_back(_M_pathname.substr(pos, 1), _Type::_Root_dir, pos); -} - -void -path::_M_add_filename(size_t pos, size_t n) -{ - _M_cmpts.emplace_back(_M_pathname.substr(pos, n), _Type::_Filename, pos); -} - -void -path::_M_trim() -{ - if (_M_cmpts.size() == 1) - { - _M_type = _M_cmpts.front()._M_type; - _M_cmpts.clear(); - } } path::string_type diff --git a/libstdc++-v3/testsuite/27_io/filesystem/path/append/source.cc b/libstdc++-v3/testsuite/27_io/filesystem/path/append/source.cc index df917c9c5e8..e440ca921c7 100644 --- a/libstdc++-v3/testsuite/27_io/filesystem/path/append/source.cc +++ b/libstdc++-v3/testsuite/27_io/filesystem/path/append/source.cc @@ -112,6 +112,33 @@ test04() #endif } +void +test05() +{ + std::basic_string_view s; + + path p = "0/1/2/3/4/5/6"; + // The string_view aliases the path's internal string: + s = p.native(); + // Append that string_view, which must work correctly even though the + // internal string will be reallocated during the operation: + p /= s; + VERIFY( p.string() == "0/1/2/3/4/5/6/0/1/2/3/4/5/6" ); + + // Same again with a trailing slash: + path p2 = "0/1/2/3/4/5/"; + s = p2.native(); + p2 /= s; + VERIFY( p2.string() == "0/1/2/3/4/5/0/1/2/3/4/5/" ); + + // And aliasing one of the components of the path: + path p3 = "0/123456789/a"; + path::iterator second = std::next(p3.begin()); + s = second->native(); + p3 /= s; + VERIFY( p3.string() == "0/123456789/a/123456789" ); +} + int main() { @@ -119,4 +146,5 @@ main() test02(); test03(); test04(); + test05(); } diff --git a/libstdc++-v3/testsuite/27_io/filesystem/path/concat/strings.cc b/libstdc++-v3/testsuite/27_io/filesystem/path/concat/strings.cc index 67637890c7f..eea9b6dc69b 100644 --- a/libstdc++-v3/testsuite/27_io/filesystem/path/concat/strings.cc +++ b/libstdc++-v3/testsuite/27_io/filesystem/path/concat/strings.cc @@ -57,8 +57,36 @@ test01() VERIFY( p.filename().string() == file ); } +void +test02() +{ + std::basic_string_view s; + + path p = "0/1/2/3/4/5/6"; + // The string_view aliases the path's internal string: + s = p.native(); + // Append that string_view, which must work correctly even though the + // internal string will be reallocated during the operation: + p += s; + VERIFY( p.string() == "0/1/2/3/4/5/60/1/2/3/4/5/6" ); + + // Same again with a trailing slash: + path p2 = "0/1/2/3/4/5/"; + s = p2.native(); + p2 += s; + VERIFY( p2.string() == "0/1/2/3/4/5/0/1/2/3/4/5/" ); + + // And aliasing one of the components of the path: + path p3 = "0/123456789"; + path::iterator second = std::next(p3.begin()); + s = second->native(); + p3 += s; + VERIFY( p3.string() == "0/123456789123456789" ); +} + int main() { test01(); + test02(); } -- 2.30.2