From 2a25a91d8d237c94096b5ea4489429795231ba9f Mon Sep 17 00:00:00 2001 From: David Capello Date: Tue, 2 Jul 2024 18:20:44 -0300 Subject: [PATCH] Fix get_canonical/absolute/normalize_path() functions (fix #98) Several refactors in this commit: - Moved is_path_separator() function and path_separator as a constexpr to the header file - Added path_separators string (for Windows, which includes \ and /) - get_canonical_path() works only when the file exists (returns an empty string in other case) - normalize_path() and get_absolute_path() work in a lexical level, handling only strings and getting the current path when needed, but without checking the existence of the file --- base/fs.cpp | 117 ++++++++++++++++++++++------------ base/fs.h | 36 ++++++++--- base/fs_tests.cpp | 155 +++++++++++++++++++++++++++++++++++++++------- base/fs_unix.h | 18 +++++- base/fs_win32.h | 37 +++++++---- 5 files changed, 275 insertions(+), 88 deletions(-) diff --git a/base/fs.cpp b/base/fs.cpp index 29311bda7..7ae6ef69e 100644 --- a/base/fs.cpp +++ b/base/fs.cpp @@ -1,5 +1,5 @@ // LAF Base Library -// Copyright (c) 2021-2022 Igara Studio S.A. +// Copyright (c) 2021-2024 Igara Studio S.A. // Copyright (c) 2001-2018 David Capello // // This file is released under the terms of the MIT license. @@ -27,16 +27,18 @@ namespace base { +// On Windows we can use \ or / as path separators, but on Unix-like +// platforms it's just /, as \ can be part of the file name. #if LAF_WINDOWS - const std::string::value_type path_separator = '\\'; + const std::string::value_type* path_separators = "\\/"; #else - const std::string::value_type path_separator = '/'; + const std::string::value_type* path_separators = "/"; #endif void make_all_directories(const std::string& path) { std::vector parts; - split_string(path, parts, "/\\"); + split_string(path, parts, path_separators); std::string intermediate; for (const std::string& component : parts) { @@ -55,31 +57,6 @@ void make_all_directories(const std::string& path) } } -std::string get_absolute_path(const std::string& filename) -{ - std::string fn = filename; - if (fn.size() > 2 && -#if LAF_WINDOWS - fn[1] != ':' -#else - fn[0] != '/' -#endif - ) { - fn = base::join_path(base::get_current_path(), fn); - } - fn = base::get_canonical_path(fn); - return fn; -} - -bool is_path_separator(std::string::value_type chr) -{ - return ( -#if LAF_WINDOWS - chr == '\\' || -#endif - chr == '/'); -} - std::string get_file_path(const std::string& filename) { std::string::const_reverse_iterator rit; @@ -213,10 +190,10 @@ std::string get_file_title_with_path(const std::string& filename) std::string get_relative_path(const std::string& filename, const std::string& base_path) { std::vector baseDirs; - split_string(base_path, baseDirs, "/\\"); + split_string(base_path, baseDirs, path_separators); std::vector toParts; - split_string(filename, toParts, "/\\"); + split_string(filename, toParts, path_separators); // Find the common prefix auto itFrom = baseDirs.begin(); @@ -270,19 +247,79 @@ std::string remove_path_separator(const std::string& path) std::string fix_path_separators(const std::string& filename) { - std::string result(filename); - - // Replace any separator with the system path separator. - std::replace_if(result.begin(), result.end(), - is_path_separator, path_separator); - + std::string result; + result.reserve(filename.size()); + for (auto chr : filename) { + if (is_path_separator(chr)) { + if (result.empty() || !is_path_separator(result.back())) + result.push_back(path_separator); + } + else + result.push_back(chr); + } return result; } -std::string normalize_path(const std::string& filename) +// It tries to replicate the standard path::lexically_normal() +// algorithm from https://en.cppreference.com/w/cpp/filesystem/path +std::string normalize_path(const std::string& _path) { - std::string fn = base::get_canonical_path(filename); - fn = base::fix_path_separators(fn); + // Normal form of an empty path is an empty path. + if (_path.empty()) + return std::string(); + + // Replace multiple slashes with a single path_separator. + std::string path = fix_path_separators(_path); + + std::string fn; + if (!path.empty() && path[0] == path_separator) + fn.push_back(path_separator); + + std::vector parts; + split_string(path, parts, path_separators); + + // Last element generates a final dot or slash in normalized path. + bool last_dot = false; + + auto n = int(parts.size()); + for (int i=0; i + +#if !LAF_MACOS + #define COMPARE_WITH_STD_FS 1 +#endif + +#if COMPARE_WITH_STD_FS + // We cannot use the on macOS yet because we are + // targetting macOS 10.9 platform. + #include + namespace fs = std::filesystem; +#endif + using namespace base; +#if COMPARE_WITH_STD_FS +// We want to test against std::filesystem for future replacement of +// some of our functions with the standard ones. +TEST(FS, CurrentPath) +{ + // Compare with + EXPECT_EQ(fs::current_path(), get_current_path()); + EXPECT_EQ(fs::path::preferred_separator, path_separator); +} +#endif + +TEST(FS, FixPathSeparators) +{ + const std::string sep(1, path_separator); + EXPECT_EQ(sep, fix_path_separators("/")); + EXPECT_EQ(sep, fix_path_separators("///")); + EXPECT_EQ("a"+sep+"b"+sep, fix_path_separators("a///b/")); +} + TEST(FS, MakeDirectory) { EXPECT_FALSE(is_directory("a")); @@ -191,10 +223,85 @@ TEST(FS, GetRelativePath) #endif } +TEST(FS, GetAbsolutePath) +{ + const auto cp = get_current_path(); + + EXPECT_EQ(join_path(cp, "a"), get_absolute_path("a")); + EXPECT_EQ(join_path(cp, "a"), get_absolute_path("./a")); + EXPECT_EQ(cp, get_absolute_path(".")); + EXPECT_EQ(cp, get_absolute_path("./.")); + EXPECT_EQ(cp, get_absolute_path("./a/..")); + EXPECT_EQ(cp, get_absolute_path(".////.")); + +#if LAF_WINDOWS + EXPECT_EQ("C:\\file", get_absolute_path("C:/path/../file")); +#else + EXPECT_EQ("/file", get_absolute_path("/path/../file")); +#endif +} + +TEST(FS, GetCanonicalPath) +{ + const auto cp = get_current_path(); + + EXPECT_EQ("", get_canonical_path("./non_existent_file")); + EXPECT_EQ("", get_canonical_path("non_existent_file")); + EXPECT_EQ(cp, get_canonical_path(".")); + + // Creates a file so get_canonical_path() returns its absolute path + write_file_content("_test_existing_file.txt", (uint8_t*)"123", 3); + EXPECT_EQ(join_path(cp, "_test_existing_file.txt"), + get_canonical_path("_test_existing_file.txt")); +} + +TEST(FS, NormalizePath) +{ + const std::string sep(1, path_separator); + + EXPECT_EQ("", normalize_path("")); + EXPECT_EQ(".", normalize_path(".")); + EXPECT_EQ(".", normalize_path("./.")); + EXPECT_EQ(".", normalize_path(".///./.")); + EXPECT_EQ(".", normalize_path(".///./")); + + EXPECT_EQ("a"+sep, normalize_path("a/.")); + EXPECT_EQ("a"+sep, normalize_path("a/")); + EXPECT_EQ("a", normalize_path("./a")); + EXPECT_EQ("a"+sep+"b"+sep+"c", normalize_path("a///b/./c")); + + EXPECT_EQ("..", normalize_path("..")); + EXPECT_EQ(".."+sep+"..", normalize_path("../..")); + EXPECT_EQ(".."+sep+"..", normalize_path("../../")); + EXPECT_EQ(".."+sep+"..", normalize_path(".././..")); + EXPECT_EQ(".."+sep+"..", normalize_path("./.././../.")); + + EXPECT_EQ(".", normalize_path("a/..")); + EXPECT_EQ("..", normalize_path("../a/..")); + EXPECT_EQ(".."+sep+"..", normalize_path("../a/../..")); + EXPECT_EQ("..", normalize_path("a/../..")); + EXPECT_EQ(sep+"b", normalize_path("/a/../b")); +} + +#if COMPARE_WITH_STD_FS +TEST(FS, CompareNormalizePathWithStd) +{ + for (const char* sample : { "", ".", "./.", ".///./.", ".///./", + "a/.", "a/", "./a", "a///b/./c", + "..", "../..", + "../../", ".././..", "./.././../.", + "a/..", "../a/..", "../a/../..", "a/../..", + "/a/../b" }) { + EXPECT_EQ(fs::path(sample).lexically_normal(), + normalize_path(sample)) + << " sample=\"" << sample << "\""; + } +} +#endif + TEST(FS, JoinPath) { - std::string sep; - sep.push_back(path_separator); + const std::string sep(1, path_separator); EXPECT_EQ("", join_path("", "")); EXPECT_EQ("fn", join_path("", "fn")); @@ -211,31 +318,31 @@ TEST(FS, JoinPath) TEST(FS, RemovePathSeparator) { - EXPECT_EQ("C:/foo", remove_path_separator("C:/foo/")); - EXPECT_EQ("C:\\foo\\main.cpp", remove_path_separator("C:\\foo\\main.cpp")); - EXPECT_EQ("C:\\foo\\main.cpp", remove_path_separator("C:\\foo\\main.cpp/")); + EXPECT_EQ("C:/foo", remove_path_separator("C:/foo/")); + EXPECT_EQ("C:\\foo\\main.cpp", remove_path_separator("C:\\foo\\main.cpp")); + EXPECT_EQ("C:\\foo\\main.cpp", remove_path_separator("C:\\foo\\main.cpp/")); #if LAF_WINDOWS - EXPECT_EQ("C:\\foo", remove_path_separator("C:\\foo\\")); + EXPECT_EQ("C:\\foo", remove_path_separator("C:\\foo\\")); #else - EXPECT_EQ("C:\\foo\\", remove_path_separator("C:\\foo\\")); + EXPECT_EQ("C:\\foo\\", remove_path_separator("C:\\foo\\")); #endif } TEST(FS, HasFileExtension) { - EXPECT_TRUE (has_file_extension("hi.png", base::paths{"png"})); - EXPECT_FALSE(has_file_extension("hi.png", base::paths{"pngg"})); - EXPECT_FALSE(has_file_extension("hi.png", base::paths{"ppng"})); - EXPECT_TRUE (has_file_extension("hi.jpeg", base::paths{"jpg","jpeg"})); - EXPECT_TRUE (has_file_extension("hi.jpg", base::paths{"jpg","jpeg"})); - EXPECT_FALSE(has_file_extension("hi.ase", base::paths{"jpg","jpeg"})); - EXPECT_TRUE (has_file_extension("hi.ase", base::paths{"jpg","jpeg","ase"})); - EXPECT_TRUE (has_file_extension("hi.ase", base::paths{"ase","jpg","jpeg"})); - - EXPECT_TRUE (has_file_extension("hi.png", base::paths{"Png"})); - EXPECT_TRUE (has_file_extension("hi.pnG", base::paths{"bmp","PNg"})); - EXPECT_TRUE (has_file_extension("hi.bmP", base::paths{"bMP","PNg"})); + EXPECT_TRUE (has_file_extension("hi.png", paths{"png"})); + EXPECT_FALSE(has_file_extension("hi.png", paths{"pngg"})); + EXPECT_FALSE(has_file_extension("hi.png", paths{"ppng"})); + EXPECT_TRUE (has_file_extension("hi.jpeg", paths{"jpg","jpeg"})); + EXPECT_TRUE (has_file_extension("hi.jpg", paths{"jpg","jpeg"})); + EXPECT_FALSE(has_file_extension("hi.ase", paths{"jpg","jpeg"})); + EXPECT_TRUE (has_file_extension("hi.ase", paths{"jpg","jpeg","ase"})); + EXPECT_TRUE (has_file_extension("hi.ase", paths{"ase","jpg","jpeg"})); + + EXPECT_TRUE (has_file_extension("hi.png", paths{"Png"})); + EXPECT_TRUE (has_file_extension("hi.pnG", paths{"bmp","PNg"})); + EXPECT_TRUE (has_file_extension("hi.bmP", paths{"bMP","PNg"})); } TEST(FS, ReplaceExtension) @@ -284,13 +391,13 @@ TEST(FS, CopyFiles) std::vector data = { 'H', 'e', 'l', 'l', 'o', ' ', 'W', 'o', 'r', 'l', 'd' }; const std::string dst = "_test_copy_.tmp"; - if (base::is_file(dst)) - base::delete_file(dst); + if (is_file(dst)) + delete_file(dst); - base::write_file_content("_test_orig_.tmp", data.data(), data.size()); - base::copy_file("_test_orig_.tmp", dst, true); + write_file_content("_test_orig_.tmp", data.data(), data.size()); + copy_file("_test_orig_.tmp", dst, true); - EXPECT_EQ(data, base::read_file_content(dst)); + EXPECT_EQ(data, read_file_content(dst)); } int main(int argc, char** argv) diff --git a/base/fs_unix.h b/base/fs_unix.h index e4b0c830e..3d8c2e846 100644 --- a/base/fs_unix.h +++ b/base/fs_unix.h @@ -6,6 +6,7 @@ // Read LICENSE.txt for more information. #include "base/file_handle.h" +#include "base/fs.h" #include "base/ints.h" #include "base/paths.h" #include "base/time.h" @@ -208,11 +209,24 @@ std::string get_user_docs_folder() std::string get_canonical_path(const std::string& path) { + const std::string full = get_absolute_path(path); char buffer[PATH_MAX]; // Ignore return value as realpath() returns nullptr anyway when the // resolved_path parameter is specified. - realpath(path.c_str(), buffer); - return buffer; + if (realpath(full.c_str(), buffer)) + return buffer; // No error, the file/dir exists + return std::string(); +} + +std::string get_absolute_path(const std::string& path) +{ + std::string full = path; + if (!full.empty() && full[0] != '/') + full = join_path(get_current_path(), full); + full = normalize_path(full); + if (!full.empty() && full.back() == path_separator) + full.erase(full.size()-1); + return full; } paths list_files(const std::string& path) diff --git a/base/fs_win32.h b/base/fs_win32.h index 3edc6d12d..6366ff1a6 100644 --- a/base/fs_win32.h +++ b/base/fs_win32.h @@ -5,17 +5,18 @@ // This file is released under the terms of the MIT license. // Read LICENSE.txt for more information. -#include -#include -#include -#include - +#include "base/fs.h" #include "base/paths.h" #include "base/string.h" #include "base/time.h" #include "base/version.h" #include "base/win/win32_exception.h" +#include +#include +#include +#include + namespace base { bool is_file(const std::string& path) @@ -119,8 +120,7 @@ std::string get_current_path() TCHAR buffer[MAX_PATH+1]; if (::GetCurrentDirectory(sizeof(buffer)/sizeof(TCHAR), buffer)) return to_utf8(buffer); - else - return ""; + return std::string(); } void set_current_path(const std::string& path) @@ -133,8 +133,7 @@ std::string get_app_path() TCHAR buffer[MAX_PATH+1]; if (::GetModuleFileName(NULL, buffer, sizeof(buffer)/sizeof(TCHAR))) return to_utf8(buffer); - else - return ""; + return std::string(); } std::string get_temp_path() @@ -152,15 +151,29 @@ std::string get_user_docs_folder() buffer); if (hr == S_OK) return to_utf8(buffer); - else - return ""; + return std::string(); } std::string get_canonical_path(const std::string& path) { + std::string full = get_absolute_path(path); + DWORD attr = ::GetFileAttributes(from_utf8(full).c_str()); + if (attr != INVALID_FILE_ATTRIBUTES) + return full; + return std::string(); +} + +std::string get_absolute_path(const std::string& path) +{ + std::string full; + if (path.size() > 2 && path[1] != ':') + full = base::join_path(base::get_current_path(), path); + else + full = path; + TCHAR buffer[MAX_PATH+1]; GetFullPathName( - from_utf8(path).c_str(), + from_utf8(full).c_str(), sizeof(buffer)/sizeof(TCHAR), buffer, nullptr);