diff --git a/lib/Support/Path.cpp b/lib/Support/Path.cpp index 6f065b9c5a3..0a4d5818703 100644 --- a/lib/Support/Path.cpp +++ b/lib/Support/Path.cpp @@ -684,43 +684,69 @@ StringRef remove_leading_dotslash(StringRef Path, Style style) { return Path; } -static SmallString<256> remove_dots(StringRef path, bool remove_dot_dot, - Style style) { +// Remove path traversal components ("." and "..") when possible, and +// canonicalize slashes. +bool remove_dots(SmallVectorImpl &the_path, bool remove_dot_dot, + Style style) { + style = real_style(style); + StringRef remaining(the_path.data(), the_path.size()); + bool needs_change = false; SmallVector components; - // Skip the root path, then look for traversal in the components. - StringRef rel = path::relative_path(path, style); - for (StringRef C : - llvm::make_range(path::begin(rel, style), path::end(rel))) { - if (C == ".") - continue; - // Leading ".." will remain in the path unless it's at the root. - if (remove_dot_dot && C == "..") { + // Consume the root path, if present. + StringRef root = path::root_path(remaining, style); + bool absolute = !root.empty(); + if (absolute) + remaining = remaining.drop_front(root.size()); + + // Loop over path components manually. This makes it easier to detect + // non-preferred slashes and double separators that must be canonicalized. + while (!remaining.empty()) { + size_t next_slash = remaining.find_first_of(separators(style)); + if (next_slash == StringRef::npos) + next_slash = remaining.size(); + StringRef component = remaining.take_front(next_slash); + remaining = remaining.drop_front(next_slash); + + // Eat the slash, and check if it is the preferred separator. + if (!remaining.empty()) { + needs_change |= remaining.front() != preferred_separator(style); + remaining = remaining.drop_front(); + // The path needs to be rewritten if it has a trailing slash. + // FIXME: This is emergent behavior that could be removed. + needs_change |= remaining.empty(); + } + + // Check for path traversal components or double separators. + if (component.empty() || component == ".") { + needs_change = true; + } else if (remove_dot_dot && component == "..") { + needs_change = true; + // Do not allow ".." to remove the root component. If this is the + // beginning of a relative path, keep the ".." component. if (!components.empty() && components.back() != "..") { components.pop_back(); - continue; + } else if (!absolute) { + components.push_back(component); } - if (path::is_absolute(path, style)) - continue; + } else { + components.push_back(component); } - components.push_back(C); } - SmallString<256> buffer = path::root_path(path, style); - for (StringRef C : components) - path::append(buffer, style, C); - return buffer; -} - -bool remove_dots(SmallVectorImpl &path, bool remove_dot_dot, - Style style) { - StringRef p(path.data(), path.size()); - - SmallString<256> result = remove_dots(p, remove_dot_dot, style); - if (result == path) + // Avoid rewriting the path unless we have to. + if (!needs_change) return false; - path.swap(result); + SmallString<256> buffer = root; + if (!components.empty()) { + buffer += components[0]; + for (StringRef C : makeArrayRef(components).drop_front()) { + buffer += preferred_separator(style); + buffer += C; + } + } + the_path.swap(buffer); return true; } diff --git a/lib/Support/Windows/Path.inc b/lib/Support/Windows/Path.inc index 2e148847909..ec62e656ddf 100644 --- a/lib/Support/Windows/Path.inc +++ b/lib/Support/Windows/Path.inc @@ -101,18 +101,13 @@ std::error_code widenPath(const Twine &Path8, SmallVectorImpl &Path16, } // Remove '.' and '..' because long paths treat these as real path components. + llvm::sys::path::native(Path8Str, path::Style::windows); llvm::sys::path::remove_dots(Path8Str, true); const StringRef RootName = llvm::sys::path::root_name(Path8Str); assert(!RootName.empty() && "Root name cannot be empty for an absolute path!"); - // llvm::sys::path::remove_dots, used above, can leave a '/' after the root - // name and long paths must use '\' as the separator. - const size_t RootNameSize = RootName.size(); - if (RootNameSize < Path8Str.size() && Path8Str[RootNameSize] == '/') - Path8Str[RootNameSize] = '\\'; - SmallString<2 * MAX_PATH> FullPath(LongPathPrefix); if (RootName[1] != ':') { // Check if UNC. FullPath.append("UNC\\"); diff --git a/unittests/Support/Path.cpp b/unittests/Support/Path.cpp index 3030fb2ebb2..b2eddd52e68 100644 --- a/unittests/Support/Path.cpp +++ b/unittests/Support/Path.cpp @@ -1253,6 +1253,30 @@ TEST(Support, RemoveDots) { remove_dots("..\\a\\b\\..\\c", true, path::Style::windows)); EXPECT_EQ("..\\..\\a\\c", remove_dots("..\\..\\a\\b\\..\\c", true, path::Style::windows)); + EXPECT_EQ("C:\\a\\c", remove_dots("C:\\foo\\bar//..\\..\\a\\c", true, + path::Style::windows)); + + // FIXME: These leading forward slashes are emergent behavior. VFS depends on + // this behavior now. + EXPECT_EQ("C:/bar", + remove_dots("C:/foo/../bar", true, path::Style::windows)); + EXPECT_EQ("C:/foo\\bar", + remove_dots("C:/foo/bar", true, path::Style::windows)); + EXPECT_EQ("C:/foo\\bar", + remove_dots("C:/foo\\bar", true, path::Style::windows)); + EXPECT_EQ("/", remove_dots("/", true, path::Style::windows)); + EXPECT_EQ("C:/", remove_dots("C:/", true, path::Style::windows)); + + // Some clients of remove_dots expect it to remove trailing slashes. Again, + // this is emergent behavior that VFS relies on, and not inherently part of + // the specification. + EXPECT_EQ("C:\\foo\\bar", + remove_dots("C:\\foo\\bar\\", true, path::Style::windows)); + EXPECT_EQ("/foo/bar", + remove_dots("/foo/bar/", true, path::Style::posix)); + + // A double separator is rewritten. + EXPECT_EQ("C:/foo\\bar", remove_dots("C:/foo//bar", true, path::Style::windows)); SmallString<64> Path1(".\\.\\c"); EXPECT_TRUE(path::remove_dots(Path1, true, path::Style::windows)); @@ -1271,6 +1295,11 @@ TEST(Support, RemoveDots) { EXPECT_EQ("/a/c", remove_dots("/../../a/c", true, path::Style::posix)); EXPECT_EQ("/a/c", remove_dots("/../a/b//../././/c", true, path::Style::posix)); + EXPECT_EQ("/", remove_dots("/", true, path::Style::posix)); + + // FIXME: Leaving behind this double leading slash seems like a bug. + EXPECT_EQ("//foo/bar", + remove_dots("//foo/bar/", true, path::Style::posix)); SmallString<64> Path2("././c"); EXPECT_TRUE(path::remove_dots(Path2, true, path::Style::posix));