# # patch "ChangeLog" # from [e9dcee3b7793fdab9c0c489ed0cd6d99f8f98c6e] # to [b9651fb96cca33510997fbba9c4ef18ad86cc310] # # patch "commands.cc" # from [8792a0f332033eefb23dce29a50ac93e002374a5] # to [dffe0c62fc0c4a64311912ebf5d68b1846ecdf1f] # # patch "transforms.cc" # from [7ee0ff93f777c9d6e076394671b7af5a628b94b2] # to [6ea138ac776c8d19aaf274a8a93f370a60de3e05] # # patch "transforms.hh" # from [b3a75dc2ddc98261388ae0dbcdcf23e93eecd6da] # to [a6b73afd42a8ffa3ff52368cc3b5c7b8e445b713] # # patch "unix/inodeprint.cc" # from [01ee59f9f98edf2aee545ad583dd6edf83e4149d] # to [edddf107b95ea1501993cbc6f43efe8ea4aadc1a] # # patch "vocab.cc" # from [2c41cbb5d3163362b8ade235b944e808e97dbf99] # to [6e07baffa263c80f013d474ad362c3b52c162201] # # patch "win32/inodeprint.cc" # from [24bb5c58766d541461f7de8bafdf64f9ffb652ee] # to [40ec91986e7a59d0f427e293d08879887b06ce97] # --- ChangeLog +++ ChangeLog @@ -1,3 +1,18 @@ +2005-04-30 Nathaniel Smith + + * vocab.cc (trivially_safe_file_path): New function. + (verify): Use it. + (test_file_path_verification, test_file_path_normalization): Add a + few more checks. + + * transforms.{cc,hh} (localized_as_string): New function. + * {win32,unix}/inodeprint.cc (inodeprint_file): Use it, to avoid + mkpath(). + + * commands.cc (add_intermediate_paths): Hand-code intermediate + path generator, taking advantage of normalization of file_path's, + to avoid mkpath(). + 2005-04-29 Nathaniel Smith * commands.cc (ls_tags): Sort output. --- commands.cc +++ commands.cc @@ -290,17 +290,16 @@ for (path_set::const_iterator i = paths.begin(); i != paths.end(); ++i) { - fs::path p = mkpath((*i)()); - while (p.has_branch_path()) + // we know that file_path's are normalized relative paths. So we can + // find intermediate paths simply by searching for /. + std::string::size_type j = std::string::npos; + while ((j = (*i)().rfind('/', j)) != std::string::npos) { - p = p.branch_path(); - file_path dir(p.string()); - - // once we hit a subdir that exists or has been added we're done. - if (paths.find(dir) != paths.end()) break; + file_path dir((*i)().substr(0, j)); if (intermediate_paths.find(dir) != intermediate_paths.end()) break; - + if (paths.find(dir) != paths.end()) break; intermediate_paths.insert(dir); + --j; } } --- transforms.cc +++ transforms.cc @@ -631,6 +631,17 @@ return it_is; } +inline static bool +is_all_ascii(string const & utf) +{ + // could speed this up by vectorization -- mask against 0x80808080, + // process a whole word at at time... + for (std::string::const_iterator i = utf.begin(); i != utf.end(); ++i) + if (0x80 & *i) + return false; + return true; +} + inline static fs::path localized_impl(string const & utf) { @@ -640,20 +651,8 @@ #else if (filesystem_is_utf8()) return mkpath(utf); - if (filesystem_is_ascii_extension()) - { - bool is_all_ascii = true; - // could speed this up by vectorization -- mask against 0x80808080, - // process a whole word at at time... - for (std::string::const_iterator i = utf.begin(); i != utf.end(); ++i) - if (0x80 & *i) - { - is_all_ascii = false; - break; - } - if (is_all_ascii) - return mkpath(utf); - } + if (filesystem_is_ascii_extension() && is_all_ascii(utf)) + return mkpath(utf); fs::path tmp = mkpath(utf), ret; for (fs::path::iterator i = tmp.begin(); i != tmp.end(); ++i) { @@ -665,6 +664,21 @@ #endif } +std::string +localized_as_string(file_path const & fp) +{ +#ifdef __APPLE__ + // on OS X paths for the filesystem/kernel are UTF-8 encoded. + return fp(); +#else + if (filesystem_is_utf8()) + return fp(); + if (filesystem_is_ascii_extension() && is_all_ascii(fp())) + return fp(); + return localized(fp).native_file_string(); +#endif +} + fs::path localized(file_path const & fp) { --- transforms.hh +++ transforms.hh @@ -176,6 +176,7 @@ fs::path localized(file_path const & path); fs::path localized(local_path const & path); fs::path localized(utf8 const & path); +std::string localized_as_string(file_path const & path); // specific internal / external conversions for various vocab terms void internalize_cert_name(utf8 const & utf, cert_name & c); --- unix/inodeprint.cc +++ unix/inodeprint.cc @@ -30,7 +30,7 @@ bool inodeprint_file(file_path const & file, hexenc & ip) { struct stat st; - if (stat(localized(file).native_file_string().c_str(), &st) < 0) + if (stat(localized_as_string(file).c_str(), &st) < 0) return false; CryptoPP::SHA hash; --- vocab.cc +++ vocab.cc @@ -155,6 +155,33 @@ val.ok = true; } +// returns true if the given string is obviously a normalized file path (no +// . or .. components, a relative path, no doubled //s, does not end in /, +// does not start with MT) +inline bool +trivially_safe_file_path(std::string const & f) +{ + const static std::string bad_chars = std::string("\\:") + constants::illegal_path_bytes + std::string(1, '\0'); + const static char sep_char('/'); + const static std::string bad_after_sep_chars("./"); + if (f.empty()) + return true; + char prev = sep_char; + for (std::string::const_iterator i = f.begin(); i != f.end(); ++i) + { + if (bad_chars.find(*i) != std::string::npos) + return false; + if (prev == sep_char && bad_after_sep_chars.find(*i) != std::string::npos) + return false; + prev = *i; + } + if (prev == sep_char) + return false; + if (f.size() >= 2 && f[0] == 'M' && f[1] == 'T') + return false; + return true; +} + inline void verify(file_path & val) { @@ -166,13 +193,18 @@ std::map::const_iterator j = known_good.find(val()); if (j == known_good.end()) { - local_path loc(val()); - verify(loc); - N(!book_keeping_file(loc), - F("prohibited book-keeping path in '%s'") % val); - const std::string & normalized_val = loc(); - val.s = normalized_val; - known_good.insert(std::make_pair(val(), normalized_val)); + if (trivially_safe_file_path(val())) + known_good.insert(std::make_pair(val(), val())); + else + { + local_path loc(val()); + verify(loc); + N(!book_keeping_file(loc), + F("prohibited book-keeping path in '%s'") % val); + const std::string & normalized_val = loc(); + val.s = normalized_val; + known_good.insert(std::make_pair(val(), normalized_val)); + } } else { @@ -297,6 +329,7 @@ badboy[1] = *c; BOOST_CHECK_THROW(file_path p(badboy), informative_failure); } + BOOST_CHECK_THROW(file_path p(std::string(1, '\0')), informative_failure); char const * goodies [] = {"unrooted", "unrooted.txt", @@ -316,6 +349,7 @@ BOOST_CHECK(file_path("./foo") == file_path("foo")); BOOST_CHECK(file_path("foo/bar/./baz") == file_path("foo/bar/baz")); BOOST_CHECK(file_path("foo/bar/../baz") == file_path("foo/baz")); + BOOST_CHECK(file_path("foo/bar/baz/") == file_path("foo/bar/baz")); } void add_vocab_tests(test_suite * suite) --- win32/inodeprint.cc +++ win32/inodeprint.cc @@ -28,7 +28,7 @@ bool inodeprint_file(file_path const & file, hexenc & ip) { struct _stati64 st; - if (_stati64(localized(file).native_file_string().c_str(), &st) < 0) + if (_stati64(localized_as_string(file).c_str(), &st) < 0) return false; CryptoPP::SHA hash;