# # # delete "tests/t_add_intermediate__MTN_path.at" # # delete "tests/t_approval_semantics.at" # # delete "tests/t_automate_heads.at" # # delete "tests/t_automate_version.at" # # delete "tests/t_branch_checkout.at" # # delete "tests/t_cdiff.at" # # delete "tests/t_change_empty_file.at" # # delete "tests/t_checkout_noop_on_fail.at" # # delete "tests/t_db_missing.at" # # delete "tests/t_delete_dir_patch.at" # # delete "tests/t_i18n_changelog.at" # # delete "tests/t_largish_file.at" # # delete "tests/t_load_into_existing.at" # # delete "tests/t_merge_3.at" # # delete "tests/t_merge_4.at" # # delete "tests/t_merge_normalization_edge_case.at" # # delete "tests/t_monotone_agent.at" # # delete "tests/t_need_mt_revision.at" # # delete "tests/t_no_rename_overwrite.at" # # delete "tests/t_restrictions_warn_on_unknown.at" # # delete "tests/t_revert_dirs.at" # # delete "tests/t_revert_rename.at" # # delete "tests/t_revert_unchanged.at" # # delete "tests/t_undo_update.at" # # delete "tests/t_update_null_revision.at" # # add_dir "tests/(imp)_merge((patch_foo_a),_(delete_foo_))" # # add_dir "tests/(minor)_context_diff" # # add_dir "tests/(minor)_test_a_merge_3" # # add_dir "tests/(minor)_test_a_merge_4" # # add_dir "tests/(todo)_design_approval_semantics" # # add_dir "tests/(todo)_undo_update_command" # # add_dir "tests/(todo)_write_monotone-agent" # # add_dir "tests/_MTN_revision_is_required" # # add_dir "tests/automate_automate_version" # # add_dir "tests/automate_heads" # # add_dir "tests/branch-based_checkout" # # add_dir "tests/committing_with_a_non-english_message" # # add_dir "tests/db_load_must_create_a_new_db" # # add_dir "tests/db_missing" # # add_dir "tests/failed_checkout_is_a_no-op" # # add_dir "tests/files_with_intermediate__MTN_path_elements" # # add_dir "tests/largish_file" # # add_dir "tests/merge_normalization_edge_case" # # add_dir "tests/modification_of_an_empty_file" # # add_dir "tests/rename_cannot_overwrite_files" # # add_dir "tests/revert_directories" # # add_dir "tests/revert_renames" # # add_dir "tests/revert_unchanged_file_preserves_mtime" # # add_dir "tests/update_no-ops_when_no_parent_revision" # # add_dir "tests/warn_on_bad_restriction" # # add_file "tests/(imp)_merge((patch_foo_a),_(delete_foo_))/__driver__.lua" # content [afd3d334f88c38e8483688bdb719f32f6a015d82] # # add_file "tests/(minor)_context_diff/__driver__.lua" # content [6729332d60b677ae1533e8b052ac085f0418d8cb] # # add_file "tests/(minor)_test_a_merge_3/__driver__.lua" # content [d0fe9f78d0d218a6a740031db33bac03b5063a1c] # # add_file "tests/(minor)_test_a_merge_3/ancestor" # content [4f7cfb26927467e9f2a37070edbb19785cbb2f2d] # # add_file "tests/(minor)_test_a_merge_3/left" # content [adc1ca256e9313dd387448ffcd5cf7572eb58d8e] # # add_file "tests/(minor)_test_a_merge_3/merge.diff3" # content [adc1ca256e9313dd387448ffcd5cf7572eb58d8e] # # add_file "tests/(minor)_test_a_merge_3/right" # content [63ad35cd3955bfa681b76b31d7f2fd745e84f654] # # add_file "tests/(minor)_test_a_merge_4/__driver__.lua" # content [7a01317489a6c5c99bbdbcf11531d1110f9ffdd3] # # add_file "tests/(minor)_test_a_merge_4/ancestor" # content [a2c50da63f01b242d8aaeb34d65e48edf0fef21b] # # add_file "tests/(minor)_test_a_merge_4/left" # content [8d5a2273e0e3da4aa55ff731e7152a673b63f08a] # # add_file "tests/(minor)_test_a_merge_4/merge.diff3" # content [8d5a2273e0e3da4aa55ff731e7152a673b63f08a] # # add_file "tests/(minor)_test_a_merge_4/right" # content [6745b398ffecec36bc4fc45598e678b3391d91b2] # # add_file "tests/(todo)_design_approval_semantics/__driver__.lua" # content [753895fefbba1ab18378b41847714c209454a7bc] # # add_file "tests/(todo)_undo_update_command/__driver__.lua" # content [045bc521e2380813d982f8efdc5476221f266088] # # add_file "tests/(todo)_write_monotone-agent/__driver__.lua" # content [4a0d2d148acb0b7eac94291927ccbbfc07f0b74e] # # add_file "tests/_MTN_revision_is_required/__driver__.lua" # content [69297e83d80bddb26d885cac8d9171f1079df2ef] # # add_file "tests/automate_automate_version/__driver__.lua" # content [6fdc200cf0f8c01e6174ebb04f521b0e645198ec] # # add_file "tests/automate_heads/__driver__.lua" # content [388eb896ee29c7d14a5294ebafbc60b43faa1bef] # # add_file "tests/branch-based_checkout/__driver__.lua" # content [90dc840151263a46c883a9a489dfbb8ee279a16a] # # add_file "tests/committing_with_a_non-english_message/__driver__.lua" # content [184d6d737de6b9eb5a2a23b1c6f786cf972f452c] # # add_file "tests/db_load_must_create_a_new_db/__driver__.lua" # content [bc64c2372001fc1e56dcb48b77ecea82e8483511] # # add_file "tests/db_missing/__driver__.lua" # content [0f4c308a335dee7535e06b7b7a63d2c5229a779e] # # add_file "tests/failed_checkout_is_a_no-op/__driver__.lua" # content [a0e0274a4b6f5b96ef1b38a7bddc82432c5597b7] # # add_file "tests/files_with_intermediate__MTN_path_elements/__driver__.lua" # content [8740ff57f33dadbd67b2f008dc84ef0f0b774497] # # add_file "tests/largish_file/__driver__.lua" # content [d879bd8f8dc04288cf3f6f88963f2ff1e725ce72] # # add_file "tests/merge_normalization_edge_case/__driver__.lua" # content [540f5a97e67577161144a624a7b84717d0aef727] # # add_file "tests/merge_normalization_edge_case/left" # content [f4657ce998dd0e39465a3f345f3540b689fd60ad] # # add_file "tests/merge_normalization_edge_case/parent" # content [fe24df7edf04cb06161defc10b252c5fa32bf1f7] # # add_file "tests/merge_normalization_edge_case/right" # content [1836ed24710f5b8943bed224cf296689c6a106c2] # # add_file "tests/modification_of_an_empty_file/__driver__.lua" # content [9831152a78a69ab9feb3aae2a1399d7574aaecac] # # add_file "tests/rename_cannot_overwrite_files/__driver__.lua" # content [f6e813874fc679c957fecc77f83d0e4ff2ac2c09] # # add_file "tests/revert_directories/__driver__.lua" # content [c5ea51f18aae3233942a3269799c1e56593ff59a] # # add_file "tests/revert_renames/__driver__.lua" # content [59e1fa11598a797b4a59bd6ee2375e90939ef13c] # # add_file "tests/revert_unchanged_file_preserves_mtime/__driver__.lua" # content [2f7232fe73c897383827d0e4d01de564eca715ba] # # add_file "tests/update_no-ops_when_no_parent_revision/__driver__.lua" # content [70cee6aee810a93daf6a9f173b531fdf5992bb25] # # add_file "tests/warn_on_bad_restriction/__driver__.lua" # content [37ba16972b88f387c1de33a07495ae1f3dbf07ef] # # patch "tester.cc" # from [01ceaa6974040125059f3d96204399ef8faa3103] # to [3967bacd4cf0166faf4cf2b3e2cc792cf52c0bb6] # # patch "tester.lua" # from [12fd8a1b610f7db1e60069e9940767b262dc4a7d] # to [d71188603ab47fded0896bad4d89444fe49205f0] # # patch "tests/calculation_of_unidiffs/__driver__.lua" # from [7f47ccd3bd189c2eaca4ae5126635032509195ab] # to [c670daa765bad08b28e1c8b2fe6ae7b0159153a7] # # patch "tests/checkout_validates_target_directory/__driver__.lua" # from [8e28d8ea40dc00dd3c11342d218c84b96df8ebf8] # to [99d066e1edbe5d34e4b41d4311a1634879e04d47] # # patch "tests/importing_CVS_files/__driver__.lua" # from [c0187ccc14b3b1beb413aeee4361c19d52387b97] # to [b29bac5aeb02c39454b51e0982c1a9911c85bf63] # # patch "tests/importing_a_CVS_file_with_one_version/__driver__.lua" # from [44497c092858e43cd0dbb2ee1204bb5ec5fa0787] # to [6c24fc69840226c7d8562d20ed93d4b8930baf97] # # patch "testsuite.at" # from [2bcc5a412f1cb76ab24165ad2697e8f881c368a2] # to [7ac716d85aeee1483af8a1891243a9daa3fa7a53] # # patch "testsuite.lua" # from [49e9abcd6630b351684304699ffa16ea87b0f993] # to [f61bf454ad6bfaa304208da8de782c89e9715909] # ============================================================ --- tests/(imp)_merge((patch_foo_a),_(delete_foo_))/__driver__.lua afd3d334f88c38e8483688bdb719f32f6a015d82 +++ tests/(imp)_merge((patch_foo_a),_(delete_foo_))/__driver__.lua afd3d334f88c38e8483688bdb719f32f6a015d82 @@ -0,0 +1,25 @@ + +mtn_setup() + +mkdir("foo") +addfile("foo/a", "blah blah") +commit() +base = base_revision() + +rename("foo", "bar") +check(mtn("drop", "--recursive", "foo"), 0, false, false) +commit() + +rename("bar", "foo") +revert_to(base) + +writefile("foo/a", "some other stuff") +commit() + +check(mtn("--branch=testbranch", "merge"), 0, false, false) + +check(mtn("checkout", "--revision", base, "test_dir"), 0, false, false) +check(indir("test_dir", mtn("update", "--branch=testbranch")), 0, false, false) + +check(not exists("test_dir/foo/a")) +check(not exists("test_dir/bar/a")) ============================================================ --- tests/(minor)_context_diff/__driver__.lua 6729332d60b677ae1533e8b052ac085f0418d8cb +++ tests/(minor)_context_diff/__driver__.lua 6729332d60b677ae1533e8b052ac085f0418d8cb @@ -0,0 +1,8 @@ + +mtn_setup() + +-- This test is a bug report. + +-- Need test for "diff --context". + +xfail_if(true, false) ============================================================ --- tests/(minor)_test_a_merge_3/__driver__.lua d0fe9f78d0d218a6a740031db33bac03b5063a1c +++ tests/(minor)_test_a_merge_3/__driver__.lua d0fe9f78d0d218a6a740031db33bac03b5063a1c @@ -0,0 +1,33 @@ + +mtn_setup() + +-- This test is a bug report. + +-- This is a real merge error -- it should be a clean merge, but it +-- produces a conflict. + +getfile("ancestor") +getfile("left") +getfile("right") + +anc = "4f7cfb26927467e9f2a37070edbb19785cbb2f2d" +left = "adc1ca256e9313dd387448ffcd5cf7572eb58d8e" +right = "63ad35cd3955bfa681b76b31d7f2fd745e84f654" + +check(anc == sha1("ancestor")) +check(left == sha1("left")) +check(right == sha1("right")) + +copyfile("ancestor", "stdin") +check(mtn("fload"), 0, false, false, true) +copyfile("left", "stdin") +check(mtn("fload"), 0, false, false, true) +copyfile("right", "stdin") +check(mtn("fload"), 0, false, false, true) + +getfile("merge.diff3") + +xfail_if(true, mtn("fmerge", anc, left, right), 0, true, false) +rename("stdout", "merge.monotone") + +check(samefile("merge.diff3", "merge.monotone"), 0, false, false) ============================================================ --- tests/(minor)_test_a_merge_3/ancestor 4f7cfb26927467e9f2a37070edbb19785cbb2f2d +++ tests/(minor)_test_a_merge_3/ancestor 4f7cfb26927467e9f2a37070edbb19785cbb2f2d @@ -0,0 +1,3536 @@ +// copyright (C) 2004 graydon hoare +// all rights reserved. +// licensed to the public under the terms of the GNU GPL (>= 2) +// see the file COPYING for details + +// this is how you "ask for" the C99 constant constructor macros. *and* +// you have to do so before any other files accidentally include +// stdint.h. awesome. +#define __STDC_CONSTANT_MACROS + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "basic_io.hh" +#include "change_set.hh" +#include "constants.hh" +#include "diff_patch.hh" +#include "file_io.hh" +#include "interner.hh" +#include "numeric_vocab.hh" +#include "sanity.hh" + +// our analyses in this file happen on one of two families of +// related structures: a path_analysis or a directory_map. +// +// a path_analysis corresponds exactly to a normalized +// path_rearrangement; they are two ways of writing the +// same information +// +// the path_analysis stores two path_states. each path_state is a map from +// transient identifiers (tids) to items. each item represents a semantic +// element of a filesystem which has a type (file or directory), a name, +// and a parent link (another tid). tids should be unique across a +// path_analysis. + +typedef enum { ptype_directory, ptype_file } ptype; +typedef u64 tid; +static tid root_tid = 0; + +struct +tid_source +{ + tid ctr; + tid_source() : ctr(root_tid + 1) {} + tid next() { I(ctr != UINT64_C(0xffffffffffffffff)); return ctr++; } +}; + +typedef unsigned long path_component; + +struct +path_component_maker +{ + path_component make(std::string const & s) + { + bool is_new; + path_component pc = intern.intern(s, is_new); + // sanity check new ones + if (is_new) + { + // must be a valid file_path + file_path tmp_file_path = file_path(s); + // must contain exactly 0 or 1 components + fs::path tmp_fs_path = mkpath(s); + I(null_name(s) || ++(tmp_fs_path.begin()) == tmp_fs_path.end()); + } + return pc; + } + std::string lookup(path_component pc) const + { + return intern.lookup(pc); + } +private: + interner intern; +}; + +static path_component_maker the_path_component_maker; + +static path_component +make_null_component() +{ + static path_component null_pc = the_path_component_maker.make(""); + return null_pc; +} + +static bool +null_name(path_component pc) +{ + return pc == make_null_component(); +} + +struct +path_item +{ + tid parent; + ptype ty; + path_component name; + path_item() {} + path_item(tid p, ptype t, path_component n); + path_item(path_item const & other); + path_item const & operator=(path_item const & other); + bool operator==(path_item const & other) const; +}; + + +template struct identity +{ + size_t operator()(T const & v) const + { + return static_cast(v); + } +}; + +typedef __gnu_cxx::hash_map > path_state; +typedef __gnu_cxx::hash_map > state_renumbering; +typedef std::pair path_analysis; + +// nulls and tests + +static file_id null_ident; + +// a directory_map is a more "normal" representation of a directory tree, +// which you can traverse more conveniently from root to tip +// +// tid -> [ name -> (ptype, tid), +// name -> (ptype, tid), +// ... ] +// +// tid -> [ name -> (ptype, tid), +// name -> (ptype, tid), +// ... ] + +typedef __gnu_cxx::hash_map, + identity > directory_node; + +typedef __gnu_cxx::hash_map, + identity > directory_map; + +static path_component +directory_entry_name(directory_node::const_iterator const & i) +{ + return i->first; +} + +static ptype +directory_entry_type(directory_node::const_iterator const & i) +{ + return i->second.first; +} + +static tid +directory_entry_tid(directory_node::const_iterator const & i) +{ + return i->second.second; +} + +void +change_set::add_file(file_path const & a) +{ + I(rearrangement.added_files.find(a) == rearrangement.added_files.end()); + rearrangement.added_files.insert(a); +} + +void +change_set::add_file(file_path const & a, file_id const & ident) +{ + I(rearrangement.added_files.find(a) == rearrangement.added_files.end()); + I(deltas.find(a) == deltas.end()); + rearrangement.added_files.insert(a); + deltas.insert(std::make_pair(a, std::make_pair(null_ident, ident))); +} + +void +change_set::apply_delta(file_path const & path, + file_id const & src, + file_id const & dst) +{ + I(deltas.find(path) == deltas.end()); + deltas.insert(std::make_pair(path, std::make_pair(src, dst))); +} + +void +change_set::delete_file(file_path const & d) +{ + I(rearrangement.deleted_files.find(d) == rearrangement.deleted_files.end()); + rearrangement.deleted_files.insert(d); +} + +void +change_set::delete_dir(file_path const & d) +{ + I(rearrangement.deleted_dirs.find(d) == rearrangement.deleted_dirs.end()); + rearrangement.deleted_dirs.insert(d); +} + +void +change_set::rename_file(file_path const & a, file_path const & b) +{ + I(rearrangement.renamed_files.find(a) == rearrangement.renamed_files.end()); + rearrangement.renamed_files.insert(std::make_pair(a,b)); +} + +void +change_set::rename_dir(file_path const & a, file_path const & b) +{ + I(rearrangement.renamed_dirs.find(a) == rearrangement.renamed_dirs.end()); + rearrangement.renamed_dirs.insert(std::make_pair(a,b)); +} + + +bool +change_set::path_rearrangement::operator==(path_rearrangement const & other) const +{ + return deleted_files == other.deleted_files && + deleted_dirs == other.deleted_dirs && + renamed_files == other.renamed_files && + renamed_dirs == other.renamed_dirs && + added_files == other.added_files; +} + +bool +change_set::path_rearrangement::empty() const +{ + return deleted_files.empty() && + deleted_dirs.empty() && + renamed_files.empty() && + renamed_dirs.empty() && + added_files.empty(); +} + +bool +change_set::path_rearrangement::has_added_file(file_path const & file) const +{ + return added_files.find(file) != added_files.end(); +} + +bool +change_set::path_rearrangement::has_deleted_file(file_path const & file) const +{ + return deleted_files.find(file) != deleted_files.end(); +} + +bool +change_set::path_rearrangement::has_renamed_file_dst(file_path const & file) const +{ + // FIXME: this is inefficient, but improvements would require a different + // structure for renamed_files (or perhaps a second reverse map). For now + // we'll assume that few files will be renamed per changeset. + for (std::map::const_iterator rf = renamed_files.begin(); + rf != renamed_files.end(); ++rf) + if (rf->second == file) + return true; + return false; +} + +bool +change_set::path_rearrangement::has_renamed_file_src(file_path const & file) const +{ + return renamed_files.find(file) != renamed_files.end(); +} + +bool +change_set::empty() const +{ + return deltas.empty() && rearrangement.empty(); +} + +bool +change_set::operator==(change_set const & other) const +{ + return rearrangement == other.rearrangement && + deltas == other.deltas; +} + + +// simple accessors + +inline tid const & +path_item_parent(path_item const & p) +{ + return p.parent; +} + +inline ptype const & +path_item_type(path_item const & p) +{ + return p.ty; +} + +inline path_component +path_item_name(path_item const & p) +{ + return p.name; +} + +inline tid +path_state_tid(path_state::const_iterator i) +{ + return i->first; +} + +inline path_item const & +path_state_item(path_state::const_iterator i) +{ + return i->second; +} + + + +// structure dumping +/* + +static void +dump_renumbering(std::string const & s, + state_renumbering const & r) +{ + L(F("BEGIN dumping renumbering '%s'\n") % s); + for (state_renumbering::const_iterator i = r.begin(); + i != r.end(); ++i) + { + L(F("%d -> %d\n") % i->first % i->second); + } + L(F("END dumping renumbering '%s'\n") % s); +} + +static void +dump_state(std::string const & s, + path_state const & st) +{ + L(F("BEGIN dumping state '%s'\n") % s); + for (path_state::const_iterator i = st.begin(); + i != st.end(); ++i) + { + L(F("state '%s': tid %d, parent %d, type %s, name %s\n") + % s + % path_state_tid(i) + % path_item_parent(path_state_item(i)) + % (path_item_type(path_state_item(i)) == ptype_directory ? "dir" : "file") + % the_path_component_maker.lookup(path_item_name(path_state_item(i)))); + } + L(F("END dumping state '%s'\n") % s); +} + +static void +dump_analysis(std::string const & s, + path_analysis const & t) +{ + L(F("BEGIN dumping tree '%s'\n") % s); + dump_state(s + " first", t.first); + dump_state(s + " second", t.second); + L(F("END dumping tree '%s'\n") % s); +} + +*/ + + +// sanity checking + +static void +check_sets_disjoint(std::set const & a, + std::set const & b) +{ + std::set isect; + std::set_intersection(a.begin(), a.end(), + b.begin(), b.end(), + std::inserter(isect, isect.begin())); + if (!global_sanity.relaxed) + { + I(isect.empty()); + } +} + +change_set::path_rearrangement::path_rearrangement(path_rearrangement const & other) +{ + other.check_sane(); + deleted_files = other.deleted_files; + deleted_dirs = other.deleted_dirs; + renamed_files = other.renamed_files; + renamed_dirs = other.renamed_dirs; + added_files = other.added_files; +} + +change_set::path_rearrangement const & +change_set::path_rearrangement::operator=(path_rearrangement const & other) +{ + other.check_sane(); + deleted_files = other.deleted_files; + deleted_dirs = other.deleted_dirs; + renamed_files = other.renamed_files; + renamed_dirs = other.renamed_dirs; + added_files = other.added_files; + return *this; +} + +static void +extract_pairs_and_insert(std::map const & in, + std::set & firsts, + std::set & seconds) +{ + for (std::map::const_iterator i = in.begin(); + i != in.end(); ++i) + { + firsts.insert(i->first); + seconds.insert(i->second); + } +} + +template +static void +extract_first(std::map const & m, std::set & s) +{ + s.clear(); + for (typename std::map::const_iterator i = m.begin(); + i != m.end(); ++i) + { + s.insert(i->first); + } +} + +static void +extract_killed(path_analysis const & a, + std::set & killed); + + +static void +check_no_deltas_on_killed_files(path_analysis const & pa, + change_set::delta_map const & del) +{ + std::set killed; + std::set delta_paths; + + extract_killed(pa, killed); + extract_first(del, delta_paths); + check_sets_disjoint(killed, delta_paths); +} + +static void +check_delta_entries_not_directories(path_analysis const & pa, + change_set::delta_map const & dels); + +void +analyze_rearrangement(change_set::path_rearrangement const & pr, + path_analysis & pa, + tid_source & ts); + +void +sanity_check_path_analysis(path_analysis const & pr); + +void +change_set::path_rearrangement::check_sane() const +{ + delta_map del; + this->check_sane(del); +} + +void +change_set::path_rearrangement::check_sane(delta_map const & deltas) const +{ + tid_source ts; + path_analysis pa; + analyze_rearrangement(*this, pa, ts); + sanity_check_path_analysis (pa); + + check_no_deltas_on_killed_files(pa, deltas); + check_delta_entries_not_directories(pa, deltas); + + // FIXME: extend this as you manage to think of more invariants + // which are cheap enough to check at this level. + std::set renamed_srcs, renamed_dsts; + extract_pairs_and_insert(renamed_files, renamed_srcs, renamed_dsts); + extract_pairs_and_insert(renamed_dirs, renamed_srcs, renamed_dsts); + + // Files cannot be split nor joined by renames. + I(renamed_files.size() + renamed_dirs.size() == renamed_srcs.size()); + I(renamed_files.size() + renamed_dirs.size() == renamed_dsts.size()); + + check_sets_disjoint(deleted_files, deleted_dirs); + check_sets_disjoint(deleted_files, renamed_srcs); + check_sets_disjoint(deleted_dirs, renamed_srcs); + + check_sets_disjoint(added_files, renamed_dsts); +} + +change_set::change_set(change_set const & other) +{ + other.check_sane(); + rearrangement = other.rearrangement; + deltas = other.deltas; +} + +change_set const &change_set::operator=(change_set const & other) +{ + other.check_sane(); + rearrangement = other.rearrangement; + deltas = other.deltas; + return *this; +} + +void +change_set::check_sane() const +{ + // FIXME: extend this as you manage to think of more invariants + // which are cheap enough to check at this level. + + rearrangement.check_sane(this->deltas); + + for (std::set::const_iterator i = rearrangement.added_files.begin(); + i != rearrangement.added_files.end(); ++i) + { + delta_map::const_iterator j = deltas.find(*i); + if (!global_sanity.relaxed) + { + I(j != deltas.end()); + I(null_id(delta_entry_src(j))); + I(!null_id(delta_entry_dst(j))); + } + } + + for (delta_map::const_iterator i = deltas.begin(); + i != deltas.end(); ++i) + { + if (!global_sanity.relaxed) + { + I(!null_name(delta_entry_path(i))); + I(!null_id(delta_entry_dst(i))); + I(!(delta_entry_src(i) == delta_entry_dst(i))); + if (null_id(delta_entry_src(i))) + I(rearrangement.added_files.find(delta_entry_path(i)) + != rearrangement.added_files.end()); + } + } + +} + +static void +sanity_check_path_item(path_item const & pi) +{ +} + +static void +confirm_proper_tree(path_state const & ps) +{ + std::set confirmed; + I(ps.find(root_tid) == ps.end()); + for (path_state::const_iterator i = ps.begin(); i != ps.end(); ++i) + { + tid curr = i->first; + path_item item = i->second; + std::set ancs; + + while (confirmed.find(curr) == confirmed.end()) + { + sanity_check_path_item(item); + I(ancs.find(curr) == ancs.end()); + ancs.insert(curr); + if (path_item_parent(item) == root_tid) + break; + else + { + curr = path_item_parent(item); + path_state::const_iterator j = ps.find(curr); + I(j != ps.end()); + + // if we're null, our parent must also be null + if (null_name(item.name)) + I(null_name(path_state_item(j).name)); + + item = path_state_item(j); + I(path_item_type(item) == ptype_directory); + } + } + std::copy(ancs.begin(), ancs.end(), + inserter(confirmed, confirmed.begin())); + } + I(confirmed.find(root_tid) == confirmed.end()); +} + +static void +confirm_unique_entries_in_directories(path_state const & ps) +{ + std::set< std::pair > entries; + for (path_state::const_iterator i = ps.begin(); i != ps.end(); ++i) + { + if (null_name(path_item_name(i->second))) + { + I(path_item_parent(i->second) == root_tid); + continue; + } + + std::pair p = std::make_pair(path_item_parent(i->second), + path_item_name(i->second)); + I(entries.find(p) == entries.end()); + entries.insert(p); + } +} + +static void +sanity_check_path_state(path_state const & ps) +{ + confirm_proper_tree(ps); + confirm_unique_entries_in_directories(ps); +} + +path_item::path_item(tid p, ptype t, path_component n) + : parent(p), ty(t), name(n) +{ + sanity_check_path_item(*this); +} + +path_item::path_item(path_item const & other) + : parent(other.parent), ty(other.ty), name(other.name) +{ + sanity_check_path_item(*this); +} + +path_item const & path_item::operator=(path_item const & other) +{ + parent = other.parent; + ty = other.ty; + name = other.name; + sanity_check_path_item(*this); + return *this; +} + +bool path_item::operator==(path_item const & other) const +{ + return this->parent == other.parent && + this->ty == other.ty && + this->name == other.name; +} + + +static void +check_states_agree(path_state const & p1, + path_state const & p2) +{ + path_analysis pa; + pa.first = p1; + pa.second = p2; + // dump_analysis("agreement", pa); + for (path_state::const_iterator i = p1.begin(); i != p1.end(); ++i) + { + path_state::const_iterator j = p2.find(i->first); + I(j != p2.end()); + I(path_item_type(i->second) == path_item_type(j->second)); + // I(! (null_name(path_item_name(i->second)) + // && + // null_name(path_item_name(j->second)))); + } +} + +void +sanity_check_path_analysis(path_analysis const & pr) +{ + sanity_check_path_state(pr.first); + sanity_check_path_state(pr.second); + check_states_agree(pr.first, pr.second); + check_states_agree(pr.second, pr.first); +} + + +// construction helpers + +static boost::shared_ptr +new_dnode() +{ + return boost::shared_ptr(new directory_node()); +} + +static boost::shared_ptr +dnode(directory_map & dir, tid t) +{ + boost::shared_ptr node; + directory_map::const_iterator dirent = dir.find(t); + if (dirent == dir.end()) + { + node = new_dnode(); + dir.insert(std::make_pair(t, node)); + } + else + node = dirent->second; + return node; +} + + +// This function takes a vector of path components and joins them into a +// single file_path. Valid input may be a single-element vector whose sole +// element is the empty path component (""); this represents the null path, +// which we use to represent non-existent files. Alternatively, input may be +// a multi-element vector, in which case all elements of the vector are +// required to be non-null. The following are valid inputs (with strings +// replaced by their interned version, of course): +// - [""] +// - ["foo"] +// - ["foo", "bar"] +// The following are not: +// - [] +// - ["foo", ""] +// - ["", "bar"] +static void +compose_path(std::vector const & names, + file_path & path) +{ + try + { + std::vector::const_iterator i = names.begin(); + I(i != names.end()); + fs::path p = mkpath(the_path_component_maker.lookup(*i)); + ++i; + if (names.size() > 1) + I(!null_name(*i)); + for ( ; i != names.end(); ++i) + { + I(!null_name(*i)); + p /= mkpath(the_path_component_maker.lookup(*i)); + } + path = file_path(p.string()); + } + catch (std::runtime_error &e) + { + throw informative_failure(e.what()); + } +} + +static void +get_full_path(path_state const & state, + tid t, + std::vector & pth) +{ + std::vector tmp; + while(t != root_tid) + { + path_state::const_iterator i = state.find(t); + I(i != state.end()); + tmp.push_back(path_item_name(i->second)); + t = path_item_parent(i->second); + } + pth.clear(); + std::copy(tmp.rbegin(), tmp.rend(), inserter(pth, pth.begin())); +} + +static void +get_full_path(path_state const & state, + tid t, + file_path & pth) +{ + std::vector tmp; + get_full_path(state, t, tmp); + // L(F("got %d-entry path for tid %d\n") % tmp.size() % t); + compose_path(tmp, pth); +} + +static void +clear_rearrangement(change_set::path_rearrangement & pr) +{ + pr.deleted_files.clear(); + pr.deleted_dirs.clear(); + pr.renamed_files.clear(); + pr.renamed_dirs.clear(); + pr.added_files.clear(); +} + +static void +clear_change_set(change_set & cs) +{ + clear_rearrangement(cs.rearrangement); + cs.deltas.clear(); +} + +static void +compose_rearrangement(path_analysis const & pa, + change_set::path_rearrangement & pr) +{ + clear_rearrangement(pr); + + for (path_state::const_iterator i = pa.first.begin(); + i != pa.first.end(); ++i) + { + tid curr(path_state_tid(i)); + std::vector old_name, new_name; + file_path old_path, new_path; + + path_state::const_iterator j = pa.second.find(curr); + I(j != pa.second.end()); + path_item old_item(path_state_item(i)); + path_item new_item(path_state_item(j)); + + // compose names + if (!null_name(path_item_name(old_item))) + { + get_full_path(pa.first, curr, old_name); + compose_path(old_name, old_path); + } + + if (!null_name(path_item_name(new_item))) + { + get_full_path(pa.second, curr, new_name); + compose_path(new_name, new_path); + } + + if (old_path == new_path) + { + L(F("skipping preserved %s %d : '%s'\n") + % (path_item_type(old_item) == ptype_directory ? "directory" : "file") + % curr % old_path); + continue; + } + + L(F("analyzing %s %d : '%s' -> '%s'\n") + % (path_item_type(old_item) == ptype_directory ? "directory" : "file") + % curr % old_path % new_path); + + if (null_name(path_item_name(old_item))) + { + // an addition (which must be a file, not a directory) + I(! null_name(path_item_name(new_item))); + I(path_item_type(new_item) != ptype_directory); + pr.added_files.insert(new_path); + } + else if (null_name(path_item_name(new_item))) + { + // a deletion + I(! null_name(path_item_name(old_item))); + switch (path_item_type(new_item)) + { + case ptype_directory: + pr.deleted_dirs.insert(old_path); + break; + case ptype_file: + pr.deleted_files.insert(old_path); + break; + } + } + else + { + // a generic rename + switch (path_item_type(new_item)) + { + case ptype_directory: + pr.renamed_dirs.insert(std::make_pair(old_path, new_path)); + break; + case ptype_file: + pr.renamed_files.insert(std::make_pair(old_path, new_path)); + break; + } + } + } +} + + + + +// +// this takes a path of the form +// +// "p[0]/p[1]/.../p[n-1]/p[n]" +// +// and fills in a vector of paths corresponding to p[0] ... p[n-1], +// along with a separate "leaf path" for element p[n]. +// + +static void +split_path(file_path const & p, + std::vector & components) +{ + components.clear(); + fs::path tmp = mkpath(p()); + for (fs::path::iterator i = tmp.begin(); i != tmp.end(); ++i) + components.push_back(the_path_component_maker.make(*i)); +} + +static void +split_path(file_path const & p, + std::vector & prefix, + path_component & leaf_path) +{ + split_path(p, prefix); + I(prefix.size() > 0); + leaf_path = prefix.back(); + prefix.pop_back(); +} + +static bool +lookup_path(std::vector const & pth, + directory_map const & dir, + tid & t) +{ + t = root_tid; + for (std::vector::const_iterator i = pth.begin(); + i != pth.end(); ++i) + { + directory_map::const_iterator dirent = dir.find(t); + if (dirent != dir.end()) + { + boost::shared_ptr node = dirent->second; + directory_node::const_iterator entry = node->find(*i); + if (entry == node->end()) + return false; + t = directory_entry_tid(entry); + } + else + return false; + } + return true; +} + +static bool +lookup_path(file_path const & pth, + directory_map const & dir, + tid & t) +{ + std::vector vec; + split_path(pth, vec); + return lookup_path(vec, dir, t); +} + +static tid +ensure_entry(directory_map & dmap, + path_state & state, + tid dir_tid, + ptype entry_ty, + path_component entry, + tid_source & ts) +{ + I(! null_name(entry)); + + if (dir_tid != root_tid) + { + path_state::const_iterator parent = state.find(dir_tid); + I( parent != state.end()); + + // if our parent is null, we immediately become null too, and attach to + // the root node (where all null entries reside) + if (null_name(path_item_name(path_state_item(parent)))) + { + tid new_tid = ts.next(); + state.insert(std::make_pair(new_tid, path_item(root_tid, entry_ty, make_null_component()))); + return new_tid; + } + } + + boost::shared_ptr node = dnode(dmap, dir_tid); + directory_node::const_iterator node_entry = node->find(entry); + + if (node_entry != node->end()) + { + I(node_entry->second.first == entry_ty); + return node_entry->second.second; + } + else + { + tid new_tid = ts.next(); + state.insert(std::make_pair(new_tid, path_item(dir_tid, entry_ty, entry))); + node->insert(std::make_pair(entry, std::make_pair(entry_ty, new_tid))); + return new_tid; + } +} + +static tid +ensure_dir_in_map (std::vector pth, + directory_map & dmap, + path_state & state, + tid_source & ts) +{ + tid dir_tid = root_tid; + for (std::vector::const_iterator p = pth.begin(); + p != pth.end(); ++p) + { + dir_tid = ensure_entry(dmap, state, dir_tid, + ptype_directory, *p, ts); + } + return dir_tid; +} + +static tid +ensure_dir_in_map (file_path const & path, + directory_map & dmap, + path_state & state, + tid_source & ts) +{ + std::vector components; + split_path(path, components); + return ensure_dir_in_map(components, dmap, state, ts); +} + +static tid +ensure_file_in_map (file_path const & path, + directory_map & dmap, + path_state & state, + tid_source & ts) +{ + std::vector prefix; + path_component leaf_path; + split_path(path, prefix, leaf_path); + + I(! null_name(leaf_path)); + tid dir_tid = ensure_dir_in_map(prefix, dmap, state, ts); + return ensure_entry(dmap, state, dir_tid, ptype_file, leaf_path, ts); +} + +static void +ensure_entries_exist (path_state const & self_state, + directory_map & other_dmap, + path_state & other_state, + tid_source & ts) +{ + for (path_state::const_iterator i = self_state.begin(); + i != self_state.end(); ++i) + { + if (other_state.find(path_state_tid(i)) != other_state.end()) + continue; + + if (null_name(path_item_name(path_state_item(i)))) + continue; + + file_path full; + get_full_path(self_state, path_state_tid(i), full); + switch (path_item_type(path_state_item(i))) + { + case ptype_directory: + ensure_dir_in_map(full, other_dmap, other_state, ts); + break; + + case ptype_file: + ensure_file_in_map(full, other_dmap, other_state, ts); + break; + } + } +} + + +static void +apply_state_renumbering(state_renumbering const & renumbering, + path_state & state) +{ + sanity_check_path_state(state); + path_state tmp(state); + state.clear(); + + for (path_state::const_iterator i = tmp.begin(); i != tmp.end(); ++i) + { + path_item item = path_state_item(i); + tid t = path_state_tid(i); + + state_renumbering::const_iterator j = renumbering.find(t); + if (j != renumbering.end()) + t = j->second; + + j = renumbering.find(item.parent); + if (j != renumbering.end()) + item.parent = j->second; + + state.insert(std::make_pair(t, item)); + } + sanity_check_path_state(state); +} + +static void +apply_state_renumbering(state_renumbering const & renumbering, + path_analysis & pa) +{ + apply_state_renumbering(renumbering, pa.first); + apply_state_renumbering(renumbering, pa.second); +} + + +// this takes a path in the path space defined by input_dir and rebuilds it +// in the path space defined by output_space, including any changes to +// parents in the path (rather than directly to the path leaf name). it +// therefore *always* succeeds; sometimes it does nothing if there's no +// affected parent, but you always get a rebuilt path in the output space. + +static void +reconstruct_path(file_path const & input, + directory_map const & input_dir, + path_state const & output_space, + file_path & output) +{ + std::vector vec; + std::vector rebuilt; + + // L(F("reconstructing path '%s' under analysis\n") % input); + + split_path(input, vec); + + tid t = root_tid; + std::vector::const_iterator pth = vec.begin(); + while (pth != vec.end()) + { + directory_map::const_iterator dirent = input_dir.find(t); + if (dirent == input_dir.end()) + break; + + boost::shared_ptr node = dirent->second; + directory_node::const_iterator entry = node->find(*pth); + if (entry == node->end()) + break; + + { + // check to see if this is the image of an added or deleted entry + // (i.e. null name in output space), if so it terminates our + // search. + path_state::const_iterator i = output_space.find(directory_entry_tid(entry)); + I(i != output_space.end()); + if (null_name(path_item_name(path_state_item(i)))) + { + // L(F("input path element '%s' is null in output space, mapping truncated\n") % *pth); + break; + } + } + + // L(F("resolved entry '%s' in reconstruction\n") % *pth); + ++pth; + t = directory_entry_tid(entry); + + if (directory_entry_type(entry) != ptype_directory) + break; + } + + get_full_path(output_space, t, rebuilt); + + while(pth != vec.end()) + { + // L(F("copying tail entry '%s' in reconstruction\n") % *pth); + rebuilt.push_back(*pth); + ++pth; + } + + compose_path(rebuilt, output); + // L(F("reconstructed path '%s' as '%s'\n") % input % output); +} + + +static void +build_directory_map(path_state const & state, + directory_map & dir) +{ + sanity_check_path_state(state); + dir.clear(); + // L(F("building directory map for %d entries\n") % state.size()); + for (path_state::const_iterator i = state.begin(); i != state.end(); ++i) + { + tid curr = path_state_tid(i); + path_item item = path_state_item(i); + tid parent = path_item_parent(item); + path_component name = path_item_name(item); + ptype type = path_item_type(item); + // L(F("adding entry %s (%s %d) to directory node %d\n") + // % name % (type == ptype_directory ? "dir" : "file") % curr % parent); + dnode(dir, parent)->insert(std::make_pair(name,std::make_pair(type, curr))); + + // also, make sure to add current node if it's a directory, even if + // there are no entries in it + if (type == ptype_directory) + dnode(dir, curr); + } +} + + +void +analyze_rearrangement(change_set::path_rearrangement const & pr, + path_analysis & pa, + tid_source & ts) +{ + directory_map first_map, second_map; + state_renumbering renumbering; + std::set damaged_in_first, damaged_in_second; + + pa.first.clear(); + pa.second.clear(); + + for (std::set::const_iterator f = pr.deleted_files.begin(); + f != pr.deleted_files.end(); ++f) + { + tid x = ensure_file_in_map(*f, first_map, pa.first, ts); + pa.second.insert(std::make_pair(x, path_item(root_tid, ptype_file, make_null_component()))); + damaged_in_first.insert(x); + } + + for (std::set::const_iterator d = pr.deleted_dirs.begin(); + d != pr.deleted_dirs.end(); ++d) + { + tid x = ensure_dir_in_map(*d, first_map, pa.first, ts); + pa.second.insert(std::make_pair(x, path_item(root_tid, ptype_directory, make_null_component()))); + damaged_in_first.insert(x); + } + + for (std::map::const_iterator rf = pr.renamed_files.begin(); + rf != pr.renamed_files.end(); ++rf) + { + tid a = ensure_file_in_map(rf->first, first_map, pa.first, ts); + tid b = ensure_file_in_map(rf->second, second_map, pa.second, ts); + I(renumbering.find(a) == renumbering.end()); + renumbering.insert(std::make_pair(b,a)); + damaged_in_first.insert(a); + damaged_in_second.insert(b); + } + + for (std::map::const_iterator rd = pr.renamed_dirs.begin(); + rd != pr.renamed_dirs.end(); ++rd) + { + tid a = ensure_dir_in_map(rd->first, first_map, pa.first, ts); + tid b = ensure_dir_in_map(rd->second, second_map, pa.second, ts); + I(renumbering.find(a) == renumbering.end()); + renumbering.insert(std::make_pair(b,a)); + damaged_in_first.insert(a); + damaged_in_second.insert(b); + } + + for (std::set::const_iterator a = pr.added_files.begin(); + a != pr.added_files.end(); ++a) + { + tid x = ensure_file_in_map(*a, second_map, pa.second, ts); + pa.first.insert(std::make_pair(x, path_item(root_tid, ptype_file, make_null_component()))); + damaged_in_second.insert(x); + } + + // we now have two states which probably have a number of entries in + // common. we know already of an interesting set of entries they have in + // common: all the renamed_foo entries. for each such renamed_foo(a,b) + // entry, we made an entry in our state_renumbering of the form b->a, + // while building the states. + + // dump_analysis("analyzed", pa); + // dump_renumbering("first", renumbering); + apply_state_renumbering(renumbering, pa.second); + build_directory_map(pa.first, first_map); + build_directory_map(pa.second, second_map); + renumbering.clear(); + // dump_analysis("renumbered once", pa); + + // that only gets us half way, though: + // + // - every object which was explicitly moved (thus stayed alive) has been + // renumbered in re.second to have the same tid as in re.first + // + // - every object which was merely mentionned in passing -- say due to + // being an intermediate directory in a path -- and was not moved, still + // has differing tids in re.first and re.second (or worse, may only + // even have an *entry* in one of them) + // + // the second point here is what we need to correct: if a path didn't + // move, wasn't destroyed, and wasn't added, we want it to have the same + // tid. but that's a relatively easy condition to check; we've been + // keeping sets of all the objects which were damaged on each side of + // this business anyways. + + + // pass #1 makes sure that all the entries in each state *exist* within + // the other state, even if they have the wrong numbers + + ensure_entries_exist (pa.first, second_map, pa.second, ts); + ensure_entries_exist (pa.second, first_map, pa.first, ts); + + // pass #2 identifies common un-damaged elements from 2->1 and inserts + // renumberings + + for (path_state::const_iterator i = pa.second.begin(); + i != pa.second.end(); ++i) + { + tid first_tid, second_tid; + second_tid = path_state_tid(i); + file_path full; + if (pa.first.find(second_tid) != pa.first.end()) + continue; + get_full_path(pa.second, second_tid, full); + if (damaged_in_second.find(second_tid) != damaged_in_second.end()) + continue; + if (null_name(path_item_name(path_state_item(i)))) + continue; + I(lookup_path(full, first_map, first_tid)); + renumbering.insert(std::make_pair(second_tid, first_tid)); + } + + // dump_renumbering("second", renumbering); + apply_state_renumbering(renumbering, pa.second); + // dump_analysis("renumbered again", pa); + + // that should be the whole deal; if we don't have consensus at this + // point we have done something wrong. + sanity_check_path_analysis (pa); +} + +void +normalize_path_rearrangement(change_set::path_rearrangement & norm) +{ + path_analysis tmp; + tid_source ts; + + analyze_rearrangement(norm, tmp, ts); + clear_rearrangement(norm); + compose_rearrangement(tmp, norm); +} + +void +normalize_change_set(change_set & norm) +{ + normalize_path_rearrangement(norm.rearrangement); + change_set::delta_map tmp = norm.deltas; + for (change_set::delta_map::const_iterator i = tmp.begin(); + i != tmp.end(); ++i) + { + if (delta_entry_src(i) == delta_entry_dst(i)) + norm.deltas.erase(delta_entry_path(i)); + } +} + + +// begin stuff related to concatenation + +static void +index_entries(path_state const & state, + std::map & files, + std::map & dirs) +{ + for (path_state::const_iterator i = state.begin(); + i != state.end(); ++i) + { + file_path full; + path_item item = path_state_item(i); + get_full_path(state, path_state_tid(i), full); + + if (null_name(path_item_name(item))) + continue; + + switch (path_item_type(item)) + { + case ptype_directory: + dirs.insert(std::make_pair(full, path_state_tid(i))); + break; + + case ptype_file: + files.insert(std::make_pair(full, path_state_tid(i))); + break; + } + } +} + +// this takes every (p1,t1) entry in b and, if (p1,t2) it exists in a, +// inserts (t1,t2) in the rename set. in other words, it constructs the +// renumbering from b->a +static void +extend_renumbering_from_path_identities(std::map const & a, + std::map const & b, + state_renumbering & renumbering) +{ + for (std::map::const_iterator i = b.begin(); + i != b.end(); ++i) + { + I(! null_name(i->first)); + std::map::const_iterator j = a.find(i->first); + if (j == a.end()) + continue; + I(renumbering.find(i->second) == renumbering.end()); + renumbering.insert(std::make_pair(i->second, j->second)); + } +} + +static void +extend_state(path_state const & src, + path_state & dst) +{ + for (path_state::const_iterator i = src.begin(); + i != src.end(); ++i) + { + if (dst.find(path_state_tid(i)) == dst.end()) + dst.insert(*i); + } +} + +static void +ensure_tids_disjoint(path_analysis const & a, + path_analysis const & b) +{ + for (path_state::const_iterator i = a.first.begin(); + i != a.first.end(); ++i) + { + I(b.first.find(path_state_tid(i)) == b.first.end()); + } + for (path_state::const_iterator i = b.first.begin(); + i != b.first.end(); ++i) + { + I(a.first.find(path_state_tid(i)) == a.first.end()); + } +} + +static void +extract_killed(path_analysis const & a, + std::set & killed) + +{ + killed.clear(); + directory_map first_map, second_map; + + build_directory_map(a.first, first_map); + build_directory_map(a.second, second_map); + + for (directory_map::const_iterator i = first_map.begin(); + i != first_map.end(); ++i) + { + tid dir_tid = i->first; + directory_map::const_iterator j = second_map.find(dir_tid); + I(j != second_map.end()); + + // a path P = DIR/LEAF is "killed" by a path_analysis iff the + // directory node named DIR in the post-state contains LEAF in the + // pre-state, and does not contain LEAF in the post-state + + boost::shared_ptr first_node = i->second; + boost::shared_ptr second_node = j->second; + + for (directory_node::const_iterator p = first_node->begin(); + p != first_node->end(); ++p) + { + path_component first_name = directory_entry_name(p); + directory_node::const_iterator q = second_node->find(first_name); + if (q == second_node->end()) + { + // found a killed entry + std::vector killed_name; + file_path killed_path; + get_full_path(a.second, dir_tid, killed_name); + killed_name.push_back(first_name); + compose_path(killed_name, killed_path); + killed.insert(killed_path); + } + } + } +} + +static void +check_delta_entries_not_directories(path_analysis const & pa, + change_set::delta_map const & dels) +{ + directory_map dmap; + build_directory_map(pa.second, dmap); + for (change_set::delta_map::const_iterator i = dels.begin(); + i != dels.end(); ++i) + { + tid delta_tid; + if (lookup_path(delta_entry_path(i), dmap, delta_tid)) + { + path_state::const_iterator j = pa.second.find(delta_tid); + I(j != pa.second.end()); + I(path_item_type(path_state_item(j)) == ptype_file); + } + } +} + +static void +concatenate_disjoint_analyses(path_analysis const & a, + path_analysis const & b, + std::set const & a_killed, + path_analysis & concatenated) +{ + std::map a_second_files, a_second_dirs; + std::map b_first_files, b_first_dirs; + path_analysis a_tmp(a), b_tmp(b); + state_renumbering renumbering; + + // the trick here is that a.second and b.first supposedly refer to the + // same state-of-the-world, so all we need to do is: + // + // - confirm that both analyses have disjoint tids + // - work out which tids in b to identify with tids in a + // - renumber b + // + // - copy a.first -> concatenated.first + // - insert all elements of b.first not already in concatenated.first + // - copy b.second -> concatenated.second + // - insert all elements of a.second not already in concatenated.second + + ensure_tids_disjoint(a_tmp, b_tmp); + + index_entries(a_tmp.second, a_second_files, a_second_dirs); + index_entries(b_tmp.first, b_first_files, b_first_dirs); + + { + std::set + a_second_file_set, a_second_dir_set, + b_first_file_set, b_first_dir_set; + + extract_first(a_second_files, a_second_file_set); + extract_first(a_second_dirs, a_second_dir_set); + extract_first(b_first_files, b_first_file_set); + extract_first(b_first_dirs, b_first_dir_set); + + // check that there are no entry-type mismatches + check_sets_disjoint(a_second_file_set, b_first_dir_set); + check_sets_disjoint(a_second_dir_set, b_first_file_set); + + // check that there's no use of killed entries + check_sets_disjoint(a_killed, b_first_dir_set); + check_sets_disjoint(a_killed, b_first_file_set); + } + + extend_renumbering_from_path_identities(a_second_files, b_first_files, renumbering); + extend_renumbering_from_path_identities(a_second_dirs, b_first_dirs, renumbering); + + // dump_analysis("state A", a_tmp); + // dump_analysis("state B", b_tmp); + // dump_renumbering("concatenation", renumbering); + apply_state_renumbering(renumbering, b_tmp); + + concatenated.first = a_tmp.first; + concatenated.second = b_tmp.second; + + extend_state(b_tmp.first, concatenated.first); + extend_state(a_tmp.second, concatenated.second); + + sanity_check_path_analysis(concatenated); +} + +void +concatenate_rearrangements(change_set::path_rearrangement const & a, + change_set::path_rearrangement const & b, + change_set::path_rearrangement & concatenated) +{ + a.check_sane(); + b.check_sane(); + concatenated = change_set::path_rearrangement(); + + tid_source ts; + path_analysis a_analysis, b_analysis, concatenated_analysis; + + analyze_rearrangement(a, a_analysis, ts); + analyze_rearrangement(b, b_analysis, ts); + + std::set a_killed; + extract_killed(a_analysis, a_killed); + + concatenate_disjoint_analyses(a_analysis, + b_analysis, + a_killed, + concatenated_analysis); + + compose_rearrangement(concatenated_analysis, + concatenated); + + concatenated.check_sane(); +} + +void +concatenate_change_sets(change_set const & a, + change_set const & b, + change_set & concatenated) +{ + a.check_sane(); + b.check_sane(); + + L(F("concatenating change sets\n")); + + tid_source ts; + path_analysis a_analysis, b_analysis, concatenated_analysis; + + analyze_rearrangement(a.rearrangement, a_analysis, ts); + analyze_rearrangement(b.rearrangement, b_analysis, ts); + + std::set a_killed; + extract_killed(a_analysis, a_killed); + + concatenate_disjoint_analyses(a_analysis, + b_analysis, + a_killed, + concatenated_analysis); + + compose_rearrangement(concatenated_analysis, + concatenated.rearrangement); + + // now process the deltas + + concatenated.deltas.clear(); + directory_map a_dst_map, b_src_map; + L(F("concatenating %d and %d deltas\n") + % a.deltas.size() % b.deltas.size()); + build_directory_map(a_analysis.second, a_dst_map); + build_directory_map(b_analysis.first, b_src_map); + + // first rename a's deltas under the rearrangement of b + for (change_set::delta_map::const_iterator del = a.deltas.begin(); + del != a.deltas.end(); ++del) + { + file_path new_pth; + L(F("processing delta on %s\n") % delta_entry_path(del)); + + // work out the name of entry in b.first + reconstruct_path(delta_entry_path(del), b_src_map, b_analysis.second, new_pth); + L(F("delta on %s in first changeset renamed to %s\n") + % delta_entry_path(del) % new_pth); + + if (b.rearrangement.has_deleted_file(delta_entry_path(del))) + // the delta should be removed if the file is going to be deleted + L(F("discarding delta [%s]->[%s] for deleted file '%s'\n") + % delta_entry_src(del) % delta_entry_dst(del) % delta_entry_path(del)); + else + concatenated.deltas.insert(std::make_pair(new_pth, + std::make_pair(delta_entry_src(del), + delta_entry_dst(del)))); + } + + // next fuse any deltas id1->id2 and id2->id3 to id1->id3 + for (change_set::delta_map::const_iterator del = b.deltas.begin(); + del != b.deltas.end(); ++del) + { + + file_path del_pth = delta_entry_path(del); + change_set::delta_map::const_iterator existing = + concatenated.deltas.find(del_pth); + if (existing != concatenated.deltas.end()) + { + L(F("fusing deltas on %s : %s -> %s and %s -> %s\n") + % del_pth + % delta_entry_src(existing) + % delta_entry_dst(existing) + % delta_entry_src(del) + % delta_entry_dst(del)); + I(delta_entry_dst(existing) == delta_entry_src(del)); + std::pair fused = std::make_pair(delta_entry_src(existing), + delta_entry_dst(del)); + concatenated.deltas.erase(del_pth); + concatenated.deltas.insert(std::make_pair((del_pth), fused)); + } + else + { + L(F("delta on %s in second changeset copied forward\n") % del_pth); + // in general don't want deltas on deleted files. however if a + // file has been deleted then re-added, then a delta is valid + // (it applies to the newly-added file) + if (!b.rearrangement.has_deleted_file(del_pth) + || b.rearrangement.has_added_file(del_pth) + || b.rearrangement.has_renamed_file_dst(del_pth)) + concatenated.deltas.insert(*del); + } + } + + normalize_change_set(concatenated); + concatenated.check_sane(); + + L(F("finished concatenation\n")); +} + +// end stuff related to concatenation + + +// begin stuff related to merging + + +static void +extend_renumbering_via_added_files(path_analysis const & a, + path_analysis const & b, + state_renumbering & existing_renumbering, + state_renumbering & renumbering) +{ + directory_map a_second_map; + build_directory_map(a.second, a_second_map); + + for (path_state::const_iterator i = b.first.begin(); + i != b.first.end(); ++i) + { + path_item item = path_state_item(i); + if (path_item_type(item) == ptype_file && null_name(path_item_name(item))) + { + path_state::const_iterator j = b.second.find(path_state_tid(i)); + I(j != b.second.end()); + path_component leaf_name = path_item_name(path_state_item(j)); + + I(path_item_type(path_state_item(j)) == ptype_file); + if (! null_name(leaf_name)) + { + tid added_parent_tid = path_item_parent(path_state_item(j)); + state_renumbering::const_iterator ren = existing_renumbering.find(added_parent_tid); + if (ren != existing_renumbering.end()) + added_parent_tid = ren->second; + directory_map::const_iterator dirent = a_second_map.find(added_parent_tid); + if (dirent != a_second_map.end()) + { + boost::shared_ptr node = dirent->second; + directory_node::const_iterator entry = node->find(leaf_name); + if (entry != node->end() && directory_entry_type(entry) == ptype_file) + { + I(renumbering.find(path_state_tid(i)) == renumbering.end()); + renumbering.insert(std::make_pair(path_state_tid(i), + directory_entry_tid(entry))); + } + } + } + } + } +} + +static bool +find_item(tid t, path_state const & ps, + path_item & item) +{ + path_state::const_iterator i = ps.find(t); + if (i == ps.end()) + return false; + item = path_state_item(i); + return true; +} + +static bool +find_items(tid t, path_analysis const & pa, + path_item & first, path_item & second) +{ + if (find_item(t, pa.first, first)) + { + I(find_item(t, pa.second, second)); + I(path_item_type(first) == path_item_type(second)); + return true; + } + else + { + I(!find_item(t, pa.second, second)); + return false; + } +} + +static void +resolve_conflict(tid t, ptype ty, + path_analysis const & a_tmp, + path_analysis const & b_tmp, + path_item & resolved, + path_state & resolved_conflicts, + app_state & app) +{ + path_state::const_iterator i = resolved_conflicts.find(t); + + path_item a_item, b_item; + find_item(t, a_tmp.second, a_item); + find_item(t, b_tmp.second, b_item); + + file_path anc, a, b, res; + get_full_path(a_tmp.first, t, anc); + get_full_path(a_tmp.second, t, a); + get_full_path(b_tmp.second, t, b); + + if (i != resolved_conflicts.end()) + { + resolved = path_state_item(i); + } + else if (null_name(path_item_name(a_item)) && + ! null_name(path_item_name(b_item))) + { + L(F("delete of %s dominates rename to %s\n") % anc % b); + resolved = a_item; + resolved_conflicts.insert(std::make_pair(t, resolved)); + } + else if (null_name(path_item_name(b_item)) && + ! null_name(path_item_name(a_item))) + { + L(F("delete of %s dominates rename to %s\n") % anc % a); + resolved = b_item; + resolved_conflicts.insert(std::make_pair(t, resolved)); + } + else + { + switch (ty) + { + case ptype_file: + N(app.lua.hook_resolve_file_conflict(anc, a, b, res), + F("unable to resolve file conflict '%s' -> '%s' vs. '%s'") % anc % a % b); + break; + case ptype_directory: + N(app.lua.hook_resolve_dir_conflict(anc, a, b, res), + F("unable to resolve dir conflict '%s' -> '%s' vs. '%s'") % anc % a % b); + break; + } + + N((res == a || res == b), + F("illegal conflict resolution '%s', wanted '%s' or '%s'\n") % res % a % b); + + if (res == a) + I(find_item(t, a_tmp.second, resolved)); + else + I(find_item(t, b_tmp.second, resolved)); + + resolved_conflicts.insert(std::make_pair(t, resolved)); + } +} + +static void +ensure_no_rename_clobbers(path_analysis const & a, + path_analysis const & b) +{ + // there is a special non-mergable pair of changes which we need + // to identify here: + // + // tid i : x -> y in change A + // tid j : z -> x in change B + // + // on the surface it looks like it ought to be mergable, since there is + // no conflict in the tids. except for one problem: B effectively + // clobbered i with j. there is nothing you can append to change B to + // revive the identity of i; in fact you risk having i and j identified + // if you form the naive merge concatenation BA. indeed, since A and B + // both supposedly start in the same state (in which i occupies name x), + // it really ought not to be possible to form B; you should have to + // accompany it with some sort of statement about the fate of i. + // + // as it stands, we're going to fault when we see this. if it turns out + // that there's a legal way of constructing such changes, one option is + // to synthesize a delete of i in B; essentially read "z->x" as an + // implicit "delete x first if it exists in post-state". + // + // however, for the time being this is a fault because we believe they + // should be globally illegal clobbers. + + directory_map b_first_map, b_second_map; + build_directory_map (b.first, b_first_map); + build_directory_map (b.second, b_second_map); + tid a_tid, b_tid; + + for (path_state::const_iterator i = a.first.begin(); + i != a.first.end(); ++i) + { + file_path anc_path, a_second_path; + a_tid = path_state_tid(i); + get_full_path(a.first, a_tid, anc_path); + + if (! lookup_path(anc_path, b_first_map, b_tid)) + { + file_path b_second_path; + reconstruct_path(anc_path, b_first_map, b.second, b_second_path); + + N(! lookup_path(b_second_path, b_second_map, b_tid), + (F("tid %d (%s) clobbered tid %d (%s)\n") + % b_tid % b_second_path + % a_tid % anc_path)); + } + } + +} + +static void +project_missing_changes(path_analysis const & a_tmp, + path_analysis const & b_tmp, + path_analysis & b_merged, + path_state & resolved_conflicts, + app_state & app) +{ + + // for each tid t adjusted in a: + // - if t exists in b: + // - if the change to t in b == change in a, skip + // - else resolve conflict + // - if conflict resolved in favour of a, append to merged + // - if resolved in favour of b, skip + // - else (no t in b) insert a's change to t in merged + + for (path_state::const_iterator i = a_tmp.first.begin(); + i != a_tmp.first.end(); ++i) + { + tid t = path_state_tid(i); + path_item a_first_item, a_second_item; + path_item b_first_item, b_second_item; + I(find_items(t, a_tmp, a_first_item, a_second_item)); + if (find_items(t, b_tmp, b_first_item, b_second_item)) + { + I(a_first_item == b_first_item); + if (a_second_item == b_second_item) + { + L(F("skipping common change on %s (tid %d)\n") + % path_item_name(a_first_item) % t); + } + else if (a_first_item == a_second_item) + { + L(F("skipping neutral change of %s -> %s (tid %d)\n") + % path_item_name(a_first_item) + % path_item_name(a_second_item) + % t); + } + else if (b_first_item == b_second_item) + { + L(F("propagating change on %s -> %s (tid %d)\n") + % path_item_name(b_first_item) + % path_item_name(b_second_item) + % t); + b_merged.first.insert(std::make_pair(t, b_second_item)); + b_merged.second.insert(std::make_pair(t, a_second_item)); + } + else + { + // conflict + path_item resolved; + resolve_conflict(t, path_item_type(a_first_item), a_tmp, b_tmp, + resolved, resolved_conflicts, app); + + if (resolved == a_second_item) + { + L(F("conflict detected, resolved in A's favour\n")); + b_merged.first.insert(std::make_pair(t, b_second_item)); + b_merged.second.insert(std::make_pair(t, a_second_item)); + } + else + { + L(F("conflict detected, resolved in B's favour\n")); + } + } + } + else + { + // there was no entry in b at all for this tid, copy it + b_merged.first.insert(std::make_pair(t, a_first_item)); + b_merged.second.insert(std::make_pair(t, a_second_item)); + } + } + + // now drive through b.second's view of the directory structure, in case + // some intermediate b-only directories showed up the preimages of + // A-favoured conflicts. + extend_state(b_tmp.second, b_merged.first); + extend_state(b_merged.first, b_merged.second); +} + +static void +rebuild_analysis(path_analysis const & src, + path_analysis & dst, + tid_source & ts) +{ + state_renumbering renumbering; + + for (path_state::const_iterator i = src.first.begin(); + i != src.first.end(); ++i) + renumbering.insert(std::make_pair(path_state_tid(i), ts.next())); + + dst = src; + apply_state_renumbering(renumbering, dst); +} + +static void +merge_disjoint_analyses(path_analysis const & a, + path_analysis const & b, + path_analysis & a_renumbered, + path_analysis & b_renumbered, + path_analysis & a_merged, + path_analysis & b_merged, + tid_source & ts, + app_state & app) +{ + // we have anc->a and anc->b and we want to construct a->merged and + // b->merged, leading to the eventual identity concatenate(a,a_merged) == + // concatenate(b,b_merged). + + path_analysis a_tmp(a), b_tmp(b); + state_renumbering renumbering; + + ensure_tids_disjoint(a_tmp, b_tmp); + + // fault on a particular class of mal-formed changesets + ensure_no_rename_clobbers(a_tmp, b_tmp); + ensure_no_rename_clobbers(b_tmp, a_tmp); + + // a.first and b.first refer to the same state-of-the-world. + // + // we begin by driving all the entries in a.first into b.first and vice + // versa. + + { + directory_map a_first_map, b_first_map; + build_directory_map(a_tmp.first, a_first_map); + build_directory_map(b_tmp.first, b_first_map); + ensure_entries_exist(a_tmp.first, b_first_map, b_tmp.first, ts); + ensure_entries_exist(b_tmp.first, a_first_map, a_tmp.first, ts); + } + + // we then drive any of the new arrivals in a.first to a.second, and + // likewise on b + + { + directory_map a_second_map, b_second_map; + build_directory_map(a_tmp.second, a_second_map); + build_directory_map(b_tmp.second, b_second_map); + ensure_entries_exist(a_tmp.first, a_second_map, a_tmp.second, ts); + ensure_entries_exist(b_tmp.first, b_second_map, b_tmp.second, ts); + } + + // we then index, identify, and renumber all the immediately apparant + // entries in each side. + + { + std::map a_first_files, a_first_dirs; + std::map b_first_files, b_first_dirs; + index_entries(a_tmp.first, a_first_files, a_first_dirs); + index_entries(b_tmp.first, b_first_files, b_first_dirs); + extend_renumbering_from_path_identities(a_first_files, b_first_files, renumbering); + extend_renumbering_from_path_identities(a_first_dirs, b_first_dirs, renumbering); + } + + // once renamed, b_tmp will have moved a fair bit closer to a_tmp, in + // terms of tids. there is still one set of files we haven't accounted + // for, however: files added in a and b. + + { + state_renumbering aux_renumbering; + extend_renumbering_via_added_files(a_tmp, b_tmp, renumbering, aux_renumbering); + for (state_renumbering::const_iterator i = aux_renumbering.begin(); + i != aux_renumbering.end(); ++i) + { + I(renumbering.find(i->first) == renumbering.end()); + renumbering.insert(*i); + } + } + + // renumbering now contains a *complete* renumbering of b->a, + // so we reset a_tmp and b_tmp, and renumber b_tmp under this + // scheme. + + a_tmp = a; + b_tmp = b; + apply_state_renumbering(renumbering, b_tmp); + + a_renumbered = a_tmp; + b_renumbered = b_tmp; + + // now we're ready to merge (and resolve conflicts) + path_state resolved_conflicts; + project_missing_changes(a_tmp, b_tmp, b_merged, resolved_conflicts, app); + project_missing_changes(b_tmp, a_tmp, a_merged, resolved_conflicts, app); + + { + // now check: the merge analyses, when concatenated with their + // predecessors, should lead to the same composite rearrangement + + tid_source ts_tmp; + path_analysis anc_a_check, a_merge_check, a_check; + path_analysis anc_b_check, b_merge_check, b_check; + change_set::path_rearrangement a_re, b_re; + + rebuild_analysis(a, anc_a_check, ts_tmp); + rebuild_analysis(b, anc_b_check, ts_tmp); + rebuild_analysis(a_merged, a_merge_check, ts_tmp); + rebuild_analysis(b_merged, b_merge_check, ts_tmp); + + std::set anc_a_killed, anc_b_killed; + extract_killed(anc_a_check, anc_a_killed); + extract_killed(anc_b_check, anc_b_killed); + + concatenate_disjoint_analyses(anc_a_check, a_merge_check, anc_a_killed, a_check); + concatenate_disjoint_analyses(anc_b_check, b_merge_check, anc_b_killed, b_check); + compose_rearrangement(a_check, a_re); + compose_rearrangement(b_check, b_re); + I(a_re == b_re); + } + +} + +static void +merge_deltas(file_path const & anc_path, + file_path const & left_path, + file_path const & right_path, + file_path const & path_in_merged, + std::map & merge_finalists, + file_id const & anc, + file_id const & left, + file_id const & right, + file_id & finalist, + merge_provider & merger) +{ + std::map::const_iterator i = merge_finalists.find(path_in_merged); + if (i != merge_finalists.end()) + { + L(F("reusing merge resolution '%s' : '%s' -> '%s'\n") + % path_in_merged % anc % i->second); + finalist = i->second; + } + else + { + if (null_id(anc)) + { + N(merger.try_to_merge_files(left_path, right_path, path_in_merged, left, right, finalist), + F("merge of '%s' : '%s' vs. '%s' (no common ancestor) failed") + % path_in_merged % left % right); + } + else + { + N(merger.try_to_merge_files(anc_path, left_path, right_path, path_in_merged, + anc, left, right, finalist), + F("merge of '%s' : '%s' -> '%s' vs '%s' failed") + % path_in_merged % anc % left % right); + } + + L(F("merge of '%s' : '%s' -> '%s' vs '%s' resolved to '%s'\n") + % path_in_merged % anc % left % right % finalist); + + merge_finalists.insert(std::make_pair(path_in_merged, finalist)); + } +} + +static void +project_missing_deltas(change_set const & a, + change_set const & b, + path_analysis const & a_analysis, + path_analysis const & b_analysis, + path_analysis const & a_merged_analysis, + change_set & b_merged, + merge_provider & merger, + std::map & merge_finalists) +{ + directory_map a_second_map, b_first_map, a_merged_first_map; + build_directory_map(a_analysis.second, a_second_map); + build_directory_map(b_analysis.first, b_first_map); + build_directory_map(a_merged_analysis.first, a_merged_first_map); + + for (change_set::delta_map::const_iterator i = a.deltas.begin(); + i != a.deltas.end(); ++i) + { + file_path path_in_merged, path_in_anc, path_in_b_second; + + // we have a fork like this: + // + // + // +--> [a2] + // [a1==b1] + // +--> [b2] + // + // and we have a delta applied to a file in a2. we want to + // figure out what to call this delta's path in b2. this means + // reconstructing it in a1==b1, then reconstructing it *again* + // in b2. + + // first work out what the path in a.first == b.first is + reconstruct_path(delta_entry_path(i), a_second_map, + a_analysis.first, path_in_anc); + + // first work out what the path in b.second is + reconstruct_path(path_in_anc, b_first_map, + b_analysis.second, path_in_b_second); + + // then work out what the path in merged is + reconstruct_path(delta_entry_path(i), a_merged_first_map, + a_merged_analysis.second, path_in_merged); + + // now check to see if there was a delta on the b.second name in b + change_set::delta_map::const_iterator j = b.deltas.find(path_in_b_second); + + if (j == b.deltas.end()) + { + // if not, copy ours over using the merged name + L(F("merge is copying delta '%s' : '%s' -> '%s'\n") + % path_in_merged % delta_entry_src(i) % delta_entry_dst(i)); + I(b.deltas.find(path_in_merged) == b.deltas.end()); + if (b.rearrangement.has_deleted_file(path_in_merged)) + // if the file was deleted on the other fork of the merge, then + // we don't want to keep this delta. + L(F("skipping delta '%s'->'%s' on deleted file '%s'\n") + % delta_entry_src(i) % delta_entry_dst(i) % path_in_merged); + else + b_merged.apply_delta(path_in_merged, delta_entry_src(i), delta_entry_dst(i)); + } + else + { + // if so, either... + + if (!(delta_entry_src(i) == delta_entry_src(j))) + { + // This is a bit of a corner case where a file was added then deleted on one + // of the forks. The src for the addition fork will be null_id, but the src + // for the other fork will be the ancestor file's id. + + // if neither of the forks involved a file addition delta (null_id to something) + // then something bad happened. + I(null_id(delta_entry_src(i)) || null_id(delta_entry_src(j))); + + if (null_id(delta_entry_src(i))) + { + // ... use the delta from 'a' + // 'a' change_set included a delta []->[...], ie file added. We want to + // follow this fork so it gets added to the b_merged changeset + L(F("propagating new file addition delta on '%s' : '%s' -> '%s'\n") + % path_in_merged + % delta_entry_src(j) + % delta_entry_dst(i)); + b_merged.apply_delta(path_in_merged, delta_entry_src(i), delta_entry_dst(i)); + } + else if (null_id(delta_entry_src(j))) + { + // ... ignore the delta + // 'b' change_set included a delta []->[...], ie file added. We don't need + // to add it to the b_merged changeset, since any delta in 'a' will be + // ignored (as 'b' includes deletions). + L(F("skipping new file addition delta on '%s' : '' -> '%s'\n") + % path_in_merged + % delta_entry_dst(j)); + } + } + else if (delta_entry_dst(i) == delta_entry_dst(j)) + { + // ... absorb identical deltas + L(F("skipping common delta '%s' : '%s' -> '%s'\n") + % path_in_merged % delta_entry_src(i) % delta_entry_dst(i)); + } + + else if (delta_entry_src(i) == delta_entry_dst(i)) + { + L(F("skipping neutral delta on '%s' : %s -> %s\n") + % delta_entry_path(i) + % delta_entry_src(i) + % delta_entry_dst(i)); + } + + else if (delta_entry_src(j) == delta_entry_dst(j)) + { + L(F("propagating unperturbed delta on '%s' : '%s' -> '%s'\n") + % delta_entry_path(i) + % delta_entry_src(i) + % delta_entry_dst(i)); + b_merged.apply_delta(path_in_merged, delta_entry_dst(j), delta_entry_dst(i)); + } + + else + { + // ... or resolve conflict + L(F("merging delta '%s' : '%s' -> '%s' vs. '%s'\n") + % path_in_merged % delta_entry_src(i) % delta_entry_dst(i) % delta_entry_dst(j)); + file_id finalist; + + merge_deltas(path_in_anc, + delta_entry_path(i), // left_path + delta_entry_path(j), // right_path + path_in_merged, + merge_finalists, + delta_entry_src(i), // anc + delta_entry_dst(i), // left + delta_entry_dst(j), // right + finalist, merger); + L(F("resolved merge to '%s' : '%s' -> '%s'\n") + % path_in_merged % delta_entry_src(i) % finalist); + + // if the conflict resolved to something other than the + // existing post-state of b, add a new entry to the deltas of + // b finishing the job. + if (! (finalist == delta_entry_dst(j))) + b_merged.apply_delta(path_in_merged, delta_entry_dst(j), finalist); + } + } + } +} + + +void +merge_change_sets(change_set const & a, + change_set const & b, + change_set & a_merged, + change_set & b_merged, + merge_provider & merger, + app_state & app) +{ + a.check_sane(); + b.check_sane(); + + L(F("merging change sets\n")); + + tid_source ts; + path_analysis + a_analysis, b_analysis, + a_renumbered, b_renumbered, + a_merged_analysis, b_merged_analysis; + + analyze_rearrangement(a.rearrangement, a_analysis, ts); + analyze_rearrangement(b.rearrangement, b_analysis, ts); + + merge_disjoint_analyses(a_analysis, b_analysis, + a_renumbered, b_renumbered, + a_merged_analysis, b_merged_analysis, + ts, app); + + compose_rearrangement(a_merged_analysis, + a_merged.rearrangement); + + compose_rearrangement(b_merged_analysis, + b_merged.rearrangement); + + std::map merge_finalists; + + project_missing_deltas(a, b, + a_renumbered, b_renumbered, + a_merged_analysis, + b_merged, + merger, merge_finalists); + + project_missing_deltas(b, a, + b_renumbered, a_renumbered, + b_merged_analysis, + a_merged, + merger, merge_finalists); + + { + // confirmation step + change_set a_check, b_check; + // dump_change_set("a", a); + // dump_change_set("a_merged", a_merged); + // dump_change_set("b", b); + // dump_change_set("b_merged", b_merged); + concatenate_change_sets(a, a_merged, a_check); + concatenate_change_sets(b, b_merged, b_check); + // dump_change_set("a_check", a_check); + // dump_change_set("b_check", b_check); + I(a_check == b_check); + } + + normalize_change_set(a_merged); + normalize_change_set(b_merged); + + a_merged.check_sane(); + b_merged.check_sane(); + + L(F("finished merge\n")); +} + +// end stuff related to merging + +void +invert_change_set(change_set const & a2b, + manifest_map const & a_map, + change_set & b2a) +{ + a2b.check_sane(); + tid_source ts; + path_analysis a2b_analysis, b2a_analysis; + + analyze_rearrangement(a2b.rearrangement, a2b_analysis, ts); + + L(F("inverting change set\n")); + b2a_analysis.first = a2b_analysis.second; + b2a_analysis.second = a2b_analysis.first; + compose_rearrangement(b2a_analysis, b2a.rearrangement); + + b2a.deltas.clear(); + + // existing deltas are in "b space" + for (path_state::const_iterator b = b2a_analysis.first.begin(); + b != b2a_analysis.first.end(); ++b) + { + path_state::const_iterator a = b2a_analysis.second.find(path_state_tid(b)); + I(a != b2a_analysis.second.end()); + if (path_item_type(path_state_item(b)) == ptype_file) + { + file_path b_pth, a_pth; + get_full_path(b2a_analysis.first, path_state_tid(b), b_pth); + + if (null_name(path_item_name(path_state_item(b))) && + ! null_name(path_item_name(path_state_item(a)))) + { + // b->a represents an add in "a space" + get_full_path(b2a_analysis.second, path_state_tid(a), a_pth); + manifest_map::const_iterator i = a_map.find(a_pth); + I(i != a_map.end()); + b2a.deltas.insert(std::make_pair(a_pth, + std::make_pair(file_id(), + manifest_entry_id(i)))); + L(F("converted 'delete %s' to 'add as %s' in inverse\n") + % a_pth + % manifest_entry_id(i)); + } + else if (! null_name(path_item_name(path_state_item(b))) && + null_name(path_item_name(path_state_item(a)))) + { + // b->a represents a del from "b space" + get_full_path(b2a_analysis.first, path_state_tid(b), b_pth); + L(F("converted add %s to delete in inverse\n") % b_pth ); + } + else + { + get_full_path(b2a_analysis.first, path_state_tid(b), b_pth); + get_full_path(b2a_analysis.second, path_state_tid(a), a_pth); + change_set::delta_map::const_iterator del = a2b.deltas.find(b_pth); + if (del == a2b.deltas.end()) + continue; + file_id src_id(delta_entry_src(del)), dst_id(delta_entry_dst(del)); + L(F("converting delta %s -> %s on %s\n") + % src_id % dst_id % b_pth); + L(F("inverse is delta %s -> %s on %s\n") + % dst_id % src_id % a_pth); + b2a.deltas.insert(std::make_pair(a_pth, std::make_pair(dst_id, src_id))); + } + } + } + + // some deltas might not have been renamed, however. these we just invert the + // direction on + for (change_set::delta_map::const_iterator del = a2b.deltas.begin(); + del != a2b.deltas.end(); ++del) + { + // check to make sure this isn't the image of an add (now a delete) + if (null_id(delta_entry_src(del))) + continue; + // check to make sure this isn't one of the already-moved deltas + if (b2a.deltas.find(delta_entry_path(del)) != b2a.deltas.end()) + continue; + b2a.deltas.insert(std::make_pair(delta_entry_path(del), + std::make_pair(delta_entry_dst(del), + delta_entry_src(del)))); + } + normalize_change_set(b2a); + b2a.check_sane(); +} + +void +move_files_to_tmp_bottom_up(tid t, + local_path const & temporary_root, + path_state const & state, + directory_map const & dmap) +{ + directory_map::const_iterator dirent = dmap.find(t); + if (dirent != dmap.end()) + { + boost::shared_ptr node = dirent->second; + for (directory_node::const_iterator entry = node->begin(); + entry != node->end(); ++entry) + { + tid child = directory_entry_tid(entry); + file_path path; + path_item item; + + find_item(child, state, item); + + if (null_name(path_item_name(item))) + continue; + + // recursively move all sub-entries + if (path_item_type(item) == ptype_directory) + move_files_to_tmp_bottom_up(child, temporary_root, state, dmap); + + get_full_path(state, child, path); + + local_path src(path()); + local_path dst((mkpath(temporary_root()) + / mkpath(boost::lexical_cast(child))).string()); + + P(F("moving %s -> %s\n") % src % dst); + switch (path_item_type(item)) + { + case ptype_file: + if (file_exists(src)) + move_file(src, dst); + break; + case ptype_directory: + if (directory_exists(src)) + move_dir(src, dst); + break; + } + } + } +} + +void +move_files_from_tmp_top_down(tid t, + local_path const & temporary_root, + path_state const & state, + directory_map const & dmap) +{ + directory_map::const_iterator dirent = dmap.find(t); + if (dirent != dmap.end()) + { + boost::shared_ptr node = dirent->second; + for (directory_node::const_iterator entry = node->begin(); + entry != node->end(); ++entry) + { + tid child = directory_entry_tid(entry); + file_path path; + path_item item; + + find_item(child, state, item); + + if (null_name(path_item_name(item))) + continue; + + get_full_path(state, child, path); + + local_path src((mkpath(temporary_root()) + / mkpath(boost::lexical_cast(child))).string()); + local_path dst(path()); + + switch (path_item_type(item)) + { + case ptype_file: + if (file_exists(src)) + { + P(F("moving file %s -> %s\n") % src % dst); + make_dir_for(path); + move_file(src, dst); + } + break; + case ptype_directory: + if (directory_exists(src)) + { + P(F("moving dir %s -> %s\n") % src % dst); + make_dir_for(path); + move_dir(src, dst); + } + break; + } + + // recursively move all sub-entries + if (path_item_type(item) == ptype_directory) + move_files_from_tmp_top_down(child, temporary_root, state, dmap); + } + } +} + + +void +apply_rearrangement_to_filesystem(change_set::path_rearrangement const & re, + local_path const & temporary_root) +{ + re.check_sane(); + tid_source ts; + path_analysis analysis; + directory_map first_dmap, second_dmap; + + analyze_rearrangement(re, analysis, ts); + build_directory_map(analysis.first, first_dmap); + build_directory_map(analysis.second, second_dmap); + + if (analysis.first.empty()) + return; + + move_files_to_tmp_bottom_up(root_tid, temporary_root, + analysis.first, first_dmap); + + move_files_from_tmp_top_down(root_tid, temporary_root, + analysis.second, second_dmap); +} + +// application stuff + +void +apply_path_rearrangement(path_set const & old_ps, + change_set::path_rearrangement const & pr, + path_set & new_ps) +{ + pr.check_sane(); + change_set::path_rearrangement a, b, c; + a.added_files = old_ps; + concatenate_rearrangements(a, pr, c); + new_ps = c.added_files; +} + +void +build_pure_addition_change_set(manifest_map const & man, + change_set & cs) +{ + for (manifest_map::const_iterator i = man.begin(); i != man.end(); ++i) + cs.add_file(manifest_entry_path(i), manifest_entry_id(i)); + cs.check_sane(); +} + +// this function takes the rearrangement sitting in cs and "completes" the +// changeset by filling in all the deltas + +void +complete_change_set(manifest_map const & m_old, + manifest_map const & m_new, + change_set & cs) +{ + cs.rearrangement.check_sane(); + tid_source ts; + path_analysis analysis; + directory_map first_dmap, second_dmap; + + analyze_rearrangement(cs.rearrangement, analysis, ts); + build_directory_map(analysis.first, first_dmap); + build_directory_map(analysis.second, second_dmap); + + std::set paths; + extract_path_set(m_new, paths); + + for (std::set::const_iterator i = cs.rearrangement.added_files.begin(); + i != cs.rearrangement.added_files.end(); ++i) + { + manifest_map::const_iterator j = m_new.find(*i); + I(j != m_new.end()); + cs.deltas.insert(std::make_pair(*i, + std::make_pair(null_ident, + manifest_entry_id(j)))); + paths.erase(*i); + } + + for (std::set::const_iterator i = paths.begin(); + i != paths.end(); ++i) + { + file_path old_path; + reconstruct_path(*i, second_dmap, analysis.first, old_path); + manifest_map::const_iterator j = m_old.find(old_path); + manifest_map::const_iterator k = m_new.find(*i); + I(j != m_old.end()); + I(k != m_new.end()); + if (!(manifest_entry_id(j) == manifest_entry_id(k))) + cs.deltas.insert(std::make_pair(*i, std::make_pair(manifest_entry_id(j), + manifest_entry_id(k)))); + } + + cs.check_sane(); +} + + +void +apply_change_set(manifest_map const & old_man, + change_set const & cs, + manifest_map & new_man) +{ + cs.check_sane(); + change_set a, b; + build_pure_addition_change_set(old_man, a); + concatenate_change_sets(a, cs, b); + + // If the composed change_set still has renames or deletions in it, then + // they referred to things that weren't in the original manifest, and this + // change_set should never have been applied to this manifest in the first + // place. + I(b.rearrangement.deleted_files.empty()); + I(b.rearrangement.renamed_files.empty()); + // Furthermore, all deltas should be add deltas + for (change_set::delta_map::const_iterator i = b.deltas.begin(); + i != b.deltas.end(); ++i) + { + I(null_id(delta_entry_src(i))); + I(b.rearrangement.added_files.find(delta_entry_path(i)) + != b.rearrangement.added_files.end()); + } + + new_man.clear(); + for (std::set::const_iterator i = b.rearrangement.added_files.begin(); + i != b.rearrangement.added_files.end(); ++i) + { + change_set::delta_map::const_iterator d = b.deltas.find(*i); + I(d != b.deltas.end()); + new_man.insert(std::make_pair(*i, delta_entry_dst(d))); + } +} + +// quick, optimistic and destructive version for log walker +file_path +apply_change_set_inverse(change_set const & cs, + file_path const & file_in_second) +{ + cs.check_sane(); + tid_source ts; + path_analysis analysis; + directory_map second_dmap; + file_path file_in_first; + + analyze_rearrangement(cs.rearrangement, analysis, ts); + build_directory_map(analysis.second, second_dmap); + reconstruct_path(file_in_second, second_dmap, analysis.first, file_in_first); + return file_in_first; +} + +// quick, optimistic and destructive version for rcs importer +void +apply_change_set(change_set const & cs, + manifest_map & man) +{ + cs.check_sane(); + if (cs.rearrangement.renamed_files.empty() + && cs.rearrangement.renamed_dirs.empty() + && cs.rearrangement.deleted_dirs.empty()) + { + // fast path for simple drop/add/delta file operations + for (std::set::const_iterator i = cs.rearrangement.deleted_files.begin(); + i != cs.rearrangement.deleted_files.end(); ++i) + { + man.erase(*i); + } + for (change_set::delta_map::const_iterator i = cs.deltas.begin(); + i != cs.deltas.end(); ++i) + { + if (!null_id(delta_entry_dst(i))) + man[delta_entry_path(i)] = delta_entry_dst(i); + } + } + else + { + // fall back to the slow way + manifest_map tmp; + apply_change_set(man, cs, tmp); + man = tmp; + } +} + + +// i/o stuff + +namespace +{ + namespace syms + { + std::string const patch("patch"); + std::string const from("from"); + std::string const to("to"); + std::string const add_file("add_file"); + std::string const delete_file("delete_file"); + std::string const delete_dir("delete_dir"); + std::string const rename_file("rename_file"); + std::string const rename_dir("rename_dir"); + } +} + +static void +parse_path_rearrangement(basic_io::parser & parser, + change_set & cs) +{ + while (parser.symp()) + { + std::string t1, t2; + if (parser.symp(syms::add_file)) + { + parser.sym(); + parser.str(t1); + cs.add_file(file_path(t1)); + } + else if (parser.symp(syms::delete_file)) + { + parser.sym(); + parser.str(t1); + cs.delete_file(file_path(t1)); + } + else if (parser.symp(syms::delete_dir)) + { + parser.sym(); + parser.str(t1); + cs.delete_dir(file_path(t1)); + } + else if (parser.symp(syms::rename_file)) + { + parser.sym(); + parser.str(t1); + parser.esym(syms::to); + parser.str(t2); + cs.rename_file(file_path(t1), + file_path(t2)); + } + else if (parser.symp(syms::rename_dir)) + { + parser.sym(); + parser.str(t1); + parser.esym(syms::to); + parser.str(t2); + cs.rename_dir(file_path(t1), + file_path(t2)); + } + else + break; + } + cs.rearrangement.check_sane(); +} + + +void +print_path_rearrangement(basic_io::printer & printer, + change_set::path_rearrangement const & pr) +{ + + pr.check_sane(); + for (std::set::const_iterator i = pr.deleted_files.begin(); + i != pr.deleted_files.end(); ++i) + { + basic_io::stanza st; + st.push_str_pair(syms::delete_file, (*i)()); + printer.print_stanza(st); + } + + for (std::set::const_iterator i = pr.deleted_dirs.begin(); + i != pr.deleted_dirs.end(); ++i) + { + basic_io::stanza st; + st.push_str_pair(syms::delete_dir, (*i)()); + printer.print_stanza(st); + } + + for (std::map::const_iterator i = pr.renamed_files.begin(); + i != pr.renamed_files.end(); ++i) + { + basic_io::stanza st; + st.push_str_pair(syms::rename_file, i->first()); + st.push_str_pair(syms::to, i->second()); + printer.print_stanza(st); + } + + for (std::map::const_iterator i = pr.renamed_dirs.begin(); + i != pr.renamed_dirs.end(); ++i) + { + basic_io::stanza st; + st.push_str_pair(syms::rename_dir, i->first()); + st.push_str_pair(syms::to, i->second()); + printer.print_stanza(st); + } + + for (std::set::const_iterator i = pr.added_files.begin(); + i != pr.added_files.end(); ++i) + { + basic_io::stanza st; + st.push_str_pair(syms::add_file, (*i)()); + printer.print_stanza(st); + } +} + +void +parse_change_set(basic_io::parser & parser, + change_set & cs) +{ + clear_change_set(cs); + + parse_path_rearrangement(parser, cs); + + while (parser.symp(syms::patch)) + { + std::string path, src, dst; + parser.sym(); + parser.str(path); + parser.esym(syms::from); + parser.hex(src); + parser.esym(syms::to); + parser.hex(dst); + cs.deltas.insert(std::make_pair(file_path(path), + std::make_pair(file_id(src), + file_id(dst)))); + } + cs.check_sane(); +} + +void +print_change_set(basic_io::printer & printer, + change_set const & cs) +{ + cs.check_sane(); + print_path_rearrangement(printer, cs.rearrangement); + + for (change_set::delta_map::const_iterator i = cs.deltas.begin(); + i != cs.deltas.end(); ++i) + { + basic_io::stanza st; + st.push_str_pair(syms::patch, i->first()); + st.push_hex_pair(syms::from, i->second.first.inner()()); + st.push_hex_pair(syms::to, i->second.second.inner()()); + printer.print_stanza(st); + } +} + +void +read_path_rearrangement(data const & dat, + change_set::path_rearrangement & re) +{ + std::istringstream iss(dat()); + basic_io::input_source src(iss, "path_rearrangement"); + basic_io::tokenizer tok(src); + basic_io::parser pars(tok); + change_set cs; + parse_path_rearrangement(pars, cs); + re = cs.rearrangement; + I(src.lookahead == EOF); + re.check_sane(); +} + +void +read_change_set(data const & dat, + change_set & cs) +{ + std::istringstream iss(dat()); + basic_io::input_source src(iss, "change_set"); + basic_io::tokenizer tok(src); + basic_io::parser pars(tok); + parse_change_set(pars, cs); + I(src.lookahead == EOF); + cs.check_sane(); +} + +void +write_change_set(change_set const & cs, + data & dat) +{ + cs.check_sane(); + std::ostringstream oss; + basic_io::printer pr(oss); + print_change_set(pr, cs); + dat = data(oss.str()); +} + +void +write_path_rearrangement(change_set::path_rearrangement const & re, + data & dat) +{ + re.check_sane(); + std::ostringstream oss; + basic_io::printer pr(oss); + print_path_rearrangement(pr, re); + dat = data(oss.str()); +} + +#ifdef BUILD_UNIT_TESTS +#include "unit_tests.hh" +#include "sanity.hh" + +static void dump_change_set(std::string const & ctx, + change_set const & cs) +{ + data tmp; + write_change_set(cs, tmp); + L(F("[begin changeset %s]\n") % ctx); + L(F("%s") % tmp); + L(F("[end changeset %s]\n") % ctx); +} + +static void +spin_change_set(change_set const & cs) +{ + data tmp1; + change_set cs1; + write_change_set(cs, tmp1); + dump_change_set("normalized", cs); + read_change_set(tmp1, cs1); + for (int i = 0; i < 5; ++i) + { + data tmp2; + change_set cs2; + write_change_set(cs1, tmp2); + BOOST_CHECK(tmp1 == tmp2); + read_change_set(tmp2, cs2); + BOOST_CHECK(cs1.rearrangement == cs2.rearrangement); + BOOST_CHECK(cs1.deltas == cs2.deltas); + cs1 = cs2; + } +} + +static void +disjoint_merge_test(std::string const & ab_str, + std::string const & ac_str) +{ + change_set ab, ac, bm, cm; + + app_state app; + + L(F("beginning disjoint_merge_test\n")); + + read_change_set(data(ab_str), ab); + read_change_set(data(ac_str), ac); + + manifest_map dummy; + + merge_provider merger(app, dummy, dummy, dummy); + merge_change_sets(ab, ac, bm, cm, merger, app); + + dump_change_set("ab", ab); + dump_change_set("ac", ac); + dump_change_set("bm", bm); + dump_change_set("cm", cm); + + BOOST_CHECK(bm.rearrangement == ac.rearrangement); + BOOST_CHECK(cm.rearrangement == ab.rearrangement); + + L(F("finished disjoint_merge_test\n")); +} + +static void +disjoint_merge_tests() +{ + disjoint_merge_test + ("rename_file \"foo\"\n" + " to \"bar\"\n", + + "rename_file \"apple\"\n" + " to \"orange\"\n"); + + disjoint_merge_test + ("rename_file \"foo/a.txt\"\n" + " to \"bar/b.txt\"\n", + + "rename_file \"bar/c.txt\"\n" + " to \"baz/d.txt\"\n"); + + disjoint_merge_test + ("patch \"foo/file.txt\"\n" + " from [c6a4a6196bb4a744207e1a6e90273369b8c2e925]\n" + " to [fe18ec0c55cbc72e4e51c58dc13af515a2f3a892]\n", + + "rename_file \"foo/file.txt\"\n" + " to \"foo/apple.txt\"\n"); + + disjoint_merge_test + ( + "rename_file \"apple.txt\"\n" + " to \"pear.txt\"\n" + "\n" + "patch \"foo.txt\"\n" + " from [c6a4a6196bb4a744207e1a6e90273369b8c2e925]\n" + " to [fe18ec0c55cbc72e4e51c58dc13af515a2f3a892]\n", + + "rename_file \"foo.txt\"\n" + " to \"bar.txt\"\n" + "\n" + "patch \"apple.txt\"\n" + " from [fe18ec0c55cbc72e4e51c58dc13af515a2f3a892]\n" + " to [435e816c30263c9184f94e7c4d5aec78ea7c028a]\n"); +} + +static void +basic_change_set_test() +{ + try + { + + change_set cs; + cs.delete_file(file_path("usr/lib/zombie")); + cs.add_file(file_path("usr/bin/cat"), + file_id(hexenc("adc83b19e793491b1c6ea0fd8b46cd9f32e592fc"))); + cs.add_file(file_path("usr/local/bin/dog"), + file_id(hexenc("adc83b19e793491b1c6ea0fd8b46cd9f32e592fc"))); + cs.rename_file(file_path("usr/local/bin/dog"), file_path("usr/bin/dog")); + cs.rename_file(file_path("usr/bin/cat"), file_path("usr/local/bin/chicken")); + cs.add_file(file_path("usr/lib/libc.so"), + file_id(hexenc("435e816c30263c9184f94e7c4d5aec78ea7c028a"))); + cs.rename_dir(file_path("usr/lib"), file_path("usr/local/lib")); + cs.apply_delta(file_path("usr/local/bin/chicken"), + file_id(hexenc("c6a4a6196bb4a744207e1a6e90273369b8c2e925")), + file_id(hexenc("fe18ec0c55cbc72e4e51c58dc13af515a2f3a892"))); + spin_change_set(cs); + } + catch (informative_failure & exn) + { + L(F("informative failure: %s\n") % exn.what); + } + catch (std::runtime_error & exn) + { + L(F("runtime error: %s\n") % exn.what()); + } +} + +static void +neutralize_change_test() +{ + try + { + + change_set cs1, cs2, csa; + cs1.add_file(file_path("usr/lib/zombie"), + file_id(hexenc("adc83b19e793491b1c6ea0fd8b46cd9f32e592fc"))); + cs1.rename_file(file_path("usr/lib/apple"), + file_path("usr/lib/orange")); + cs1.rename_dir(file_path("usr/lib/moose"), + file_path("usr/lib/squirrel")); + + dump_change_set("neutralize target", cs1); + + cs2.delete_file(file_path("usr/lib/zombie")); + cs2.rename_file(file_path("usr/lib/orange"), + file_path("usr/lib/apple")); + cs2.rename_dir(file_path("usr/lib/squirrel"), + file_path("usr/lib/moose")); + + dump_change_set("neutralizer", cs2); + + concatenate_change_sets(cs1, cs2, csa); + + dump_change_set("neutralized", csa); + + tid_source ts; + path_analysis analysis; + analyze_rearrangement(csa.rearrangement, analysis, ts); + + BOOST_CHECK(analysis.first.empty()); + BOOST_CHECK(analysis.second.empty()); + } + catch (informative_failure & exn) + { + L(F("informative failure: %s\n") % exn.what); + } + catch (std::runtime_error & exn) + { + L(F("runtime error: %s\n") % exn.what()); + } +} + +static void +non_interfering_change_test() +{ + try + { + + change_set cs1, cs2, csa; + cs1.delete_file(file_path("usr/lib/zombie")); + cs1.rename_file(file_path("usr/lib/orange"), + file_path("usr/lib/apple")); + cs1.rename_dir(file_path("usr/lib/squirrel"), + file_path("usr/lib/moose")); + + dump_change_set("non-interference A", cs1); + + cs2.add_file(file_path("usr/lib/zombie"), + file_id(hexenc("adc83b19e793491b1c6ea0fd8b46cd9f32e592fc"))); + cs2.rename_file(file_path("usr/lib/pear"), + file_path("usr/lib/orange")); + cs2.rename_dir(file_path("usr/lib/spy"), + file_path("usr/lib/squirrel")); + + dump_change_set("non-interference B", cs2); + + concatenate_change_sets(cs1, cs2, csa); + + dump_change_set("non-interference combined", csa); + + tid_source ts; + path_analysis analysis; + analyze_rearrangement(csa.rearrangement, analysis, ts); + + BOOST_CHECK(analysis.first.size() == 8); + BOOST_CHECK(analysis.second.size() == 8); + } + catch (informative_failure & exn) + { + L(F("informative failure: %s\n") % exn.what); + } + catch (std::runtime_error & exn) + { + L(F("runtime error: %s\n") % exn.what()); + } +} + +static const file_id fid_null; +static const file_id fid1 = file_id(hexenc("aaaa3831e5eb74e6cd50b94f9e99e6a14d98d702")); +static const file_id fid2 = file_id(hexenc("bbbb3831e5eb74e6cd50b94f9e99e6a14d98d702")); +static const file_id fid3 = file_id(hexenc("cccc3831e5eb74e6cd50b94f9e99e6a14d98d702")); + +typedef enum { in_a, in_b } which_t; +struct bad_concatenate_change_test +{ + change_set a; + change_set b; + change_set combined; + change_set concat; + bool do_combine; + std::string ident; + bad_concatenate_change_test(char const *file, int line) : + do_combine(false), + ident((F("%s:%d") % file % line).str()) + { + L(F("BEGINNING concatenation test %s\n") % ident); + } + + ~bad_concatenate_change_test() + { + L(F("FINISHING concatenation test %s\n") % ident); + } + + change_set & getit(which_t which) + { + if (which == in_a) + return a; + return b; + } + // Call combine() if you want to make sure that the things that are bad when + // concatenated are also bad when all stuck together into a single + // changeset. + void combine() { do_combine = true; } + void add_file(which_t which, std::string const & path, file_id fid = fid1) + { + getit(which).add_file(file_path(path), fid); + if (do_combine) + combined.add_file(file_path(path), fid); + } + void apply_delta(which_t which, std::string const & path, + file_id from_fid, + file_id to_fid) + { + getit(which).apply_delta(file_path(path), from_fid, to_fid); + if (do_combine) + combined.apply_delta(file_path(path), from_fid, to_fid); + } + void delete_file(which_t which, std::string const & path) + { + getit(which).delete_file(file_path(path)); + if (do_combine) + combined.delete_file(file_path(path)); + } + void delete_dir(which_t which, std::string const & path) + { + getit(which).delete_dir(file_path(path)); + if (do_combine) + combined.delete_dir(file_path(path)); + } + void rename_file(which_t which, + std::string const & path1, std::string const & path2) + { + getit(which).rename_file(file_path(path1), file_path(path2)); + if (do_combine) + combined.rename_file(file_path(path1), file_path(path2)); + } + void rename_dir(which_t which, + std::string const & path1, std::string const & path2) + { + getit(which).rename_dir(file_path(path1), file_path(path2)); + if (do_combine) + combined.rename_dir(file_path(path1), file_path(path2)); + } + void run() + { + L(F("RUNNING bad_concatenate_change_test %s\n") % ident); + try + { + dump_change_set("a", a); + dump_change_set("b", b); + } + catch (std::logic_error e) + { + L(F("skipping change_set printing, one or both are not sane\n")); + } + BOOST_CHECK_THROW(concatenate_change_sets(a, b, concat), + std::logic_error); + try { dump_change_set("concat", concat); } + catch (std::logic_error e) { L(F("concat change_set is insane\n")); } + if (do_combine) + { + L(F("Checking combined change set\n")); + change_set empty_cs, combined_concat; + BOOST_CHECK_THROW(concatenate_change_sets(combined, + empty_cs, + combined_concat), + std::logic_error); + try { dump_change_set("combined_concat", combined_concat); } + catch (std::logic_error e) { L(F("combined_concat is insane\n")); } + } + } + void run_both() + { + run(); + L(F("RUNNING bad_concatenate_change_test %s again backwards\n") % ident); + BOOST_CHECK_THROW(concatenate_change_sets(a, b, concat), + std::logic_error); + } +}; + +// We also do a number of just "bad change set" tests here, leaving one of +// them empty; this is because our main line of defense against bad +// change_sets, check_sane_history, does its checking by doing +// concatenations, so it's doing concatenations that we want to be sure does +// sanity checking. +static void +bad_concatenate_change_tests() +{ + // Files/directories can't be dropped on top of each other: + BOOST_CHECKPOINT("on top"); + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.add_file(in_a, "target"); + t.add_file(in_b, "target"); + t.run(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.combine(); + t.rename_file(in_a, "foo", "target"); + t.rename_file(in_b, "bar", "target"); + t.run(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.combine(); + t.rename_dir(in_a, "foo", "target"); + t.rename_dir(in_b, "bar", "target"); + t.run(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.combine(); + t.rename_file(in_a, "foo", "target"); + t.rename_dir(in_b, "bar", "target"); + t.run_both(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.combine(); + t.add_file(in_a, "target"); + t.rename_file(in_b, "foo", "target"); + t.run_both(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.combine(); + t.add_file(in_a, "target"); + t.rename_dir(in_b, "foo", "target"); + t.run_both(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.combine(); + t.add_file(in_a, "target/subfile"); + t.add_file(in_b, "target"); + t.run_both(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.combine(); + t.add_file(in_a, "target/subfile"); + t.rename_file(in_b, "foo", "target"); + t.run_both(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.add_file(in_a, "target/subfile"); + t.rename_dir(in_b, "foo", "target"); + t.run_both(); + } + // You can only delete something once + BOOST_CHECKPOINT("delete once"); + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.delete_file(in_a, "target"); + t.delete_file(in_b, "target"); + t.run(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.combine(); + t.delete_file(in_a, "target"); + t.delete_dir(in_b, "target"); + t.run_both(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.delete_dir(in_a, "target"); + t.delete_dir(in_b, "target"); + t.run(); + } + // You can't delete something that's not there anymore + BOOST_CHECKPOINT("delete after rename"); + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.combine(); + t.delete_file(in_a, "target"); + t.rename_file(in_b, "target", "foo"); + t.run_both(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.combine(); + t.delete_dir(in_a, "target"); + t.rename_file(in_b, "target", "foo"); + t.run_both(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.combine(); + t.delete_dir(in_a, "target"); + t.rename_dir(in_b, "target", "foo"); + t.run_both(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.combine(); + t.delete_file(in_a, "target"); + t.rename_dir(in_b, "target", "foo"); + t.run_both(); + } + // Files/directories can't be split in two + BOOST_CHECKPOINT("splitting files/dirs"); + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.rename_file(in_a, "target", "foo"); + t.rename_file(in_b, "target", "bar"); + t.run(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.rename_dir(in_a, "target", "foo"); + t.rename_dir(in_b, "target", "bar"); + t.run(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.combine(); + t.rename_dir(in_a, "target", "foo"); + t.rename_file(in_b, "target", "bar"); + t.run_both(); + } + // Files and directories are different + BOOST_CHECKPOINT("files != dirs"); + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.add_file(in_a, "target"); + t.delete_dir(in_b, "target"); + t.run(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.add_file(in_a, "target/subfile"); + t.delete_file(in_b, "target"); + t.run(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.add_file(in_a, "target/subfile"); + t.rename_file(in_b, "target", "foo"); + t.run(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.rename_file(in_a, "foo", "target"); + t.delete_dir(in_b, "target"); + t.run(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.combine(); + t.apply_delta(in_a, "target", fid1, fid2); + t.delete_dir(in_b, "target"); + t.run_both(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.rename_dir(in_a, "foo", "target"); + t.delete_file(in_b, "target"); + t.run(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.combine(); + t.rename_dir(in_a, "foo", "target"); + t.apply_delta(in_b, "target", fid1, fid2); + t.run_both(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.combine(); + t.apply_delta(in_a, "target", fid1, fid2); + t.rename_dir(in_b, "target", "bar"); + t.run_both(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.rename_file(in_a, "foo", "target"); + t.rename_dir(in_b, "target", "bar"); + t.run(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.rename_dir(in_a, "foo", "target"); + t.rename_file(in_b, "target", "bar"); + t.run(); + } + // Directories can't be patched, and patches can't be directoried... + BOOST_CHECKPOINT("can't patch dirs or vice versa"); + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.combine(); + t.add_file(in_a, "target/subfile"); + t.apply_delta(in_b, "target", fid_null, fid1); + t.run_both(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.combine(); + t.add_file(in_a, "target/subfile"); + t.apply_delta(in_b, "target", fid1, fid2); + t.run_both(); + } + // Deltas must be consistent + BOOST_CHECKPOINT("consistent deltas"); + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.apply_delta(in_a, "target", fid1, fid2); + t.apply_delta(in_b, "target", fid3, fid1); + t.run(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.add_file(in_a, "target", fid1); + t.apply_delta(in_b, "target", fid2, fid3); + t.run(); + } + // Can't have a null source id if it's not an add + BOOST_CHECKPOINT("null id on non-add"); + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.apply_delta(in_a, "target", fid_null, fid1); + t.run(); + } + // Can't have drop + delta with no add + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.combine(); + t.delete_file(in_a, "target"); + t.apply_delta(in_b, "target", fid1, fid2); + t.run(); + } + // Can't have a null destination id, ever, with or without a delete_file + BOOST_CHECKPOINT("no null destinations"); + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.delete_file(in_a, "target"); + t.apply_delta(in_a, "target", fid1, fid_null); + t.run(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.apply_delta(in_a, "target", fid1, fid_null); + t.run(); + } + // Can't have a patch with src == dst + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.apply_delta(in_a, "target", fid1, fid1); + t.run(); + } +} + +// FIXME: Things that should be added, but can't be trivially because they +// assert too early: +// anything repeated -- multiple adds, multiple deletes, multiple deltas +// including in one changeset, for both files and dirs +// (probably should put these in strings, and do BOOST_CHECK_THROWS in the +// parser?) + +// FIXME: also need tests for the invariants in apply_manifest (and any +// invariants that should be there but aren't, of course) + +void +add_change_set_tests(test_suite * suite) +{ + I(suite); + suite->add(BOOST_TEST_CASE(&basic_change_set_test)); + suite->add(BOOST_TEST_CASE(&neutralize_change_test)); + suite->add(BOOST_TEST_CASE(&non_interfering_change_test)); + suite->add(BOOST_TEST_CASE(&disjoint_merge_tests)); + suite->add(BOOST_TEST_CASE(&bad_concatenate_change_tests)); +} + + +#endif // BUILD_UNIT_TESTS ============================================================ --- tests/(minor)_test_a_merge_3/left adc1ca256e9313dd387448ffcd5cf7572eb58d8e +++ tests/(minor)_test_a_merge_3/left adc1ca256e9313dd387448ffcd5cf7572eb58d8e @@ -0,0 +1,3563 @@ +// copyright (C) 2004 graydon hoare +// all rights reserved. +// licensed to the public under the terms of the GNU GPL (>= 2) +// see the file COPYING for details + +// this is how you "ask for" the C99 constant constructor macros. *and* +// you have to do so before any other files accidentally include +// stdint.h. awesome. +#define __STDC_CONSTANT_MACROS + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "basic_io.hh" +#include "change_set.hh" +#include "constants.hh" +#include "diff_patch.hh" +#include "file_io.hh" +#include "interner.hh" +#include "numeric_vocab.hh" +#include "sanity.hh" +#include "smap.hh" + +// our analyses in this file happen on one of two families of +// related structures: a path_analysis or a directory_map. +// +// a path_analysis corresponds exactly to a normalized +// path_rearrangement; they are two ways of writing the +// same information +// +// the path_analysis stores two path_states. each path_state is a map from +// transient identifiers (tids) to items. each item represents a semantic +// element of a filesystem which has a type (file or directory), a name, +// and a parent link (another tid). tids should be unique across a +// path_analysis. + +typedef enum { ptype_directory, ptype_file } ptype; +typedef u32 tid; +static tid root_tid = 0; + +struct +tid_source +{ + tid ctr; + tid_source() : ctr(root_tid + 1) {} + tid next() { I(ctr != UINT32_C(0xffffffff)); return ctr++; } +}; + +typedef u32 path_component; + +struct +path_component_maker +{ + path_component make(std::string const & s) + { + bool is_new; + path_component pc = intern.intern(s, is_new); + // sanity check new ones + if (is_new) + { + // must be a valid file_path + file_path tmp_file_path = file_path(s); + // must contain exactly 0 or 1 components + fs::path tmp_fs_path = mkpath(s); + I(null_name(s) || ++(tmp_fs_path.begin()) == tmp_fs_path.end()); + } + return pc; + } + std::string lookup(path_component pc) const + { + return intern.lookup(pc); + } +private: + interner intern; +}; + +static path_component_maker the_path_component_maker; + +static path_component +make_null_component() +{ + static path_component null_pc = the_path_component_maker.make(""); + return null_pc; +} + +static bool +null_name(path_component pc) +{ + return pc == make_null_component(); +} + +struct +path_item +{ + tid parent; + ptype ty; + path_component name; + path_item() {} + path_item(tid p, ptype t, path_component n); + path_item(path_item const & other); + path_item const & operator=(path_item const & other); + bool operator==(path_item const & other) const; +}; + + +template struct identity +{ + size_t operator()(T const & v) const + { + return static_cast(v); + } +}; + +typedef smap path_state; +typedef smap state_renumbering; +typedef std::pair path_analysis; + +// nulls and tests + +static file_id null_ident; + +// a directory_map is a more "normal" representation of a directory tree, +// which you can traverse more conveniently from root to tip +// +// tid -> [ name -> (ptype, tid), +// name -> (ptype, tid), +// ... ] +// +// tid -> [ name -> (ptype, tid), +// name -> (ptype, tid), +// ... ] + +typedef smap< path_component, std::pair > directory_node; +typedef smap > directory_map; + +static path_component +directory_entry_name(directory_node::const_iterator const & i) +{ + return i->first; +} + +static ptype +directory_entry_type(directory_node::const_iterator const & i) +{ + return i->second.first; +} + +static tid +directory_entry_tid(directory_node::const_iterator const & i) +{ + return i->second.second; +} + +void +change_set::add_file(file_path const & a) +{ + I(rearrangement.added_files.find(a) == rearrangement.added_files.end()); + rearrangement.added_files.insert(a); +} + +void +change_set::add_file(file_path const & a, file_id const & ident) +{ + I(rearrangement.added_files.find(a) == rearrangement.added_files.end()); + I(deltas.find(a) == deltas.end()); + rearrangement.added_files.insert(a); + deltas.insert(std::make_pair(a, std::make_pair(null_ident, ident))); +} + +void +change_set::apply_delta(file_path const & path, + file_id const & src, + file_id const & dst) +{ + I(deltas.find(path) == deltas.end()); + deltas.insert(std::make_pair(path, std::make_pair(src, dst))); +} + +void +change_set::delete_file(file_path const & d) +{ + I(rearrangement.deleted_files.find(d) == rearrangement.deleted_files.end()); + rearrangement.deleted_files.insert(d); +} + +void +change_set::delete_dir(file_path const & d) +{ + I(rearrangement.deleted_dirs.find(d) == rearrangement.deleted_dirs.end()); + rearrangement.deleted_dirs.insert(d); +} + +void +change_set::rename_file(file_path const & a, file_path const & b) +{ + I(rearrangement.renamed_files.find(a) == rearrangement.renamed_files.end()); + rearrangement.renamed_files.insert(std::make_pair(a,b)); +} + +void +change_set::rename_dir(file_path const & a, file_path const & b) +{ + I(rearrangement.renamed_dirs.find(a) == rearrangement.renamed_dirs.end()); + rearrangement.renamed_dirs.insert(std::make_pair(a,b)); +} + + +bool +change_set::path_rearrangement::operator==(path_rearrangement const & other) const +{ + return deleted_files == other.deleted_files && + deleted_dirs == other.deleted_dirs && + renamed_files == other.renamed_files && + renamed_dirs == other.renamed_dirs && + added_files == other.added_files; +} + +bool +change_set::path_rearrangement::empty() const +{ + return deleted_files.empty() && + deleted_dirs.empty() && + renamed_files.empty() && + renamed_dirs.empty() && + added_files.empty(); +} + +bool +change_set::path_rearrangement::has_added_file(file_path const & file) const +{ + return added_files.find(file) != added_files.end(); +} + +bool +change_set::path_rearrangement::has_deleted_file(file_path const & file) const +{ + return deleted_files.find(file) != deleted_files.end(); +} + +bool +change_set::path_rearrangement::has_renamed_file_dst(file_path const & file) const +{ + // FIXME: this is inefficient, but improvements would require a different + // structure for renamed_files (or perhaps a second reverse map). For now + // we'll assume that few files will be renamed per changeset. + for (std::map::const_iterator rf = renamed_files.begin(); + rf != renamed_files.end(); ++rf) + if (rf->second == file) + return true; + return false; +} + +bool +change_set::path_rearrangement::has_renamed_file_src(file_path const & file) const +{ + return renamed_files.find(file) != renamed_files.end(); +} + +bool +change_set::empty() const +{ + return deltas.empty() && rearrangement.empty(); +} + +bool +change_set::operator==(change_set const & other) const +{ + return rearrangement == other.rearrangement && + deltas == other.deltas; +} + + +// simple accessors + +inline tid const & +path_item_parent(path_item const & p) +{ + return p.parent; +} + +inline ptype const & +path_item_type(path_item const & p) +{ + return p.ty; +} + +inline path_component +path_item_name(path_item const & p) +{ + return p.name; +} + +inline tid +path_state_tid(path_state::const_iterator i) +{ + return i->first; +} + +inline path_item const & +path_state_item(path_state::const_iterator i) +{ + return i->second; +} + + + +// structure dumping +/* + +static void +dump_renumbering(std::string const & s, + state_renumbering const & r) +{ + L(F("BEGIN dumping renumbering '%s'\n") % s); + for (state_renumbering::const_iterator i = r.begin(); + i != r.end(); ++i) + { + L(F("%d -> %d\n") % i->first % i->second); + } + L(F("END dumping renumbering '%s'\n") % s); +} + +static void +dump_state(std::string const & s, + path_state const & st) +{ + L(F("BEGIN dumping state '%s'\n") % s); + for (path_state::const_iterator i = st.begin(); + i != st.end(); ++i) + { + L(F("state '%s': tid %d, parent %d, type %s, name %s\n") + % s + % path_state_tid(i) + % path_item_parent(path_state_item(i)) + % (path_item_type(path_state_item(i)) == ptype_directory ? "dir" : "file") + % the_path_component_maker.lookup(path_item_name(path_state_item(i)))); + } + L(F("END dumping state '%s'\n") % s); +} + +static void +dump_analysis(std::string const & s, + path_analysis const & t) +{ + L(F("BEGIN dumping tree '%s'\n") % s); + dump_state(s + " first", t.first); + dump_state(s + " second", t.second); + L(F("END dumping tree '%s'\n") % s); +} + +*/ + + +// sanity checking + +static void +check_sets_disjoint(std::set const & a, + std::set const & b) +{ + std::set isect; + std::set_intersection(a.begin(), a.end(), + b.begin(), b.end(), + std::inserter(isect, isect.begin())); + if (!global_sanity.relaxed) + { + I(isect.empty()); + } +} + +change_set::path_rearrangement::path_rearrangement(path_rearrangement const & other) +{ + other.check_sane(); + deleted_files = other.deleted_files; + deleted_dirs = other.deleted_dirs; + renamed_files = other.renamed_files; + renamed_dirs = other.renamed_dirs; + added_files = other.added_files; +} + +change_set::path_rearrangement const & +change_set::path_rearrangement::operator=(path_rearrangement const & other) +{ + other.check_sane(); + deleted_files = other.deleted_files; + deleted_dirs = other.deleted_dirs; + renamed_files = other.renamed_files; + renamed_dirs = other.renamed_dirs; + added_files = other.added_files; + return *this; +} + +static void +extract_pairs_and_insert(std::map const & in, + std::set & firsts, + std::set & seconds) +{ + for (std::map::const_iterator i = in.begin(); + i != in.end(); ++i) + { + firsts.insert(i->first); + seconds.insert(i->second); + } +} + +template +static void +extract_first(std::map const & m, std::set & s) +{ + s.clear(); + for (typename std::map::const_iterator i = m.begin(); + i != m.end(); ++i) + { + s.insert(i->first); + } +} + +static void +extract_killed(path_analysis const & a, + std::set & killed); + + +static void +check_no_deltas_on_killed_files(path_analysis const & pa, + change_set::delta_map const & del) +{ + std::set killed; + std::set delta_paths; + + extract_killed(pa, killed); + extract_first(del, delta_paths); + check_sets_disjoint(killed, delta_paths); +} + +static void +check_delta_entries_not_directories(path_analysis const & pa, + change_set::delta_map const & dels); + +void +analyze_rearrangement(change_set::path_rearrangement const & pr, + path_analysis & pa, + tid_source & ts); + +void +sanity_check_path_analysis(path_analysis const & pr); + +void +change_set::path_rearrangement::check_sane() const +{ + delta_map del; + this->check_sane(del); +} + +void +change_set::path_rearrangement::check_sane(delta_map const & deltas) const +{ + tid_source ts; + path_analysis pa; + analyze_rearrangement(*this, pa, ts); + sanity_check_path_analysis (pa); + + check_no_deltas_on_killed_files(pa, deltas); + check_delta_entries_not_directories(pa, deltas); + + // FIXME: extend this as you manage to think of more invariants + // which are cheap enough to check at this level. + std::set renamed_srcs, renamed_dsts; + extract_pairs_and_insert(renamed_files, renamed_srcs, renamed_dsts); + extract_pairs_and_insert(renamed_dirs, renamed_srcs, renamed_dsts); + + // Files cannot be split nor joined by renames. + I(renamed_files.size() + renamed_dirs.size() == renamed_srcs.size()); + I(renamed_files.size() + renamed_dirs.size() == renamed_dsts.size()); + + check_sets_disjoint(deleted_files, deleted_dirs); + check_sets_disjoint(deleted_files, renamed_srcs); + check_sets_disjoint(deleted_dirs, renamed_srcs); + + check_sets_disjoint(added_files, renamed_dsts); +} + +change_set::change_set(change_set const & other) +{ + other.check_sane(); + rearrangement = other.rearrangement; + deltas = other.deltas; +} + +change_set const &change_set::operator=(change_set const & other) +{ + other.check_sane(); + rearrangement = other.rearrangement; + deltas = other.deltas; + return *this; +} + +void +change_set::check_sane() const +{ + // FIXME: extend this as you manage to think of more invariants + // which are cheap enough to check at this level. + + rearrangement.check_sane(this->deltas); + + for (std::set::const_iterator i = rearrangement.added_files.begin(); + i != rearrangement.added_files.end(); ++i) + { + delta_map::const_iterator j = deltas.find(*i); + if (!global_sanity.relaxed) + { + I(j != deltas.end()); + I(null_id(delta_entry_src(j))); + I(!null_id(delta_entry_dst(j))); + } + } + + for (delta_map::const_iterator i = deltas.begin(); + i != deltas.end(); ++i) + { + if (!global_sanity.relaxed) + { + I(!null_name(delta_entry_path(i))); + I(!null_id(delta_entry_dst(i))); + I(!(delta_entry_src(i) == delta_entry_dst(i))); + if (null_id(delta_entry_src(i))) + I(rearrangement.added_files.find(delta_entry_path(i)) + != rearrangement.added_files.end()); + } + } + +} + +static void +sanity_check_path_item(path_item const & pi) +{ +} + +static void +confirm_proper_tree(path_state const & ps) +{ + smap confirmed; + I(ps.find(root_tid) == ps.end()); + for (path_state::const_iterator i = ps.begin(); i != ps.end(); ++i) + { + tid curr = i->first; + path_item item = i->second; + smap ancs; + + while (confirmed.find(curr) == confirmed.end()) + { + sanity_check_path_item(item); + I(ancs.find(curr) == ancs.end()); + ancs.insert(std::make_pair(curr,true)); + if (path_item_parent(item) == root_tid) + break; + else + { + curr = path_item_parent(item); + path_state::const_iterator j = ps.find(curr); + I(j != ps.end()); + + // if we're null, our parent must also be null + if (null_name(item.name)) + I(null_name(path_state_item(j).name)); + + item = path_state_item(j); + I(path_item_type(item) == ptype_directory); + } + } + std::copy(ancs.begin(), ancs.end(), + inserter(confirmed, confirmed.begin())); + } + I(confirmed.find(root_tid) == confirmed.end()); +} + +static void +confirm_unique_entries_in_directories(path_state const & ps) +{ + smap, bool> entries; + for (path_state::const_iterator i = ps.begin(); i != ps.end(); ++i) + { + if (null_name(path_item_name(i->second))) + { + I(path_item_parent(i->second) == root_tid); + continue; + } + + std::pair p = std::make_pair(path_item_parent(i->second), + path_item_name(i->second)); + I(entries.find(p) == entries.end()); + entries.insert(std::make_pair(p,true)); + } +} + +static void +sanity_check_path_state(path_state const & ps) +{ + confirm_proper_tree(ps); + confirm_unique_entries_in_directories(ps); +} + +path_item::path_item(tid p, ptype t, path_component n) + : parent(p), ty(t), name(n) +{ + sanity_check_path_item(*this); +} + +path_item::path_item(path_item const & other) + : parent(other.parent), ty(other.ty), name(other.name) +{ + sanity_check_path_item(*this); +} + +path_item const & path_item::operator=(path_item const & other) +{ + parent = other.parent; + ty = other.ty; + name = other.name; + sanity_check_path_item(*this); + return *this; +} + +bool path_item::operator==(path_item const & other) const +{ + return this->parent == other.parent && + this->ty == other.ty && + this->name == other.name; +} + + +static void +check_states_agree(path_state const & p1, + path_state const & p2) +{ + path_analysis pa; + pa.first = p1; + pa.second = p2; + // dump_analysis("agreement", pa); + for (path_state::const_iterator i = p1.begin(); i != p1.end(); ++i) + { + path_state::const_iterator j = p2.find(i->first); + I(j != p2.end()); + I(path_item_type(i->second) == path_item_type(j->second)); + // I(! (null_name(path_item_name(i->second)) + // && + // null_name(path_item_name(j->second)))); + } +} + +void +sanity_check_path_analysis(path_analysis const & pr) +{ + sanity_check_path_state(pr.first); + sanity_check_path_state(pr.second); + check_states_agree(pr.first, pr.second); + check_states_agree(pr.second, pr.first); +} + + +// construction helpers + +static boost::shared_ptr +new_dnode() +{ + return boost::shared_ptr(new directory_node()); +} + +static boost::shared_ptr +dnode(directory_map & dir, tid t) +{ + boost::shared_ptr node; + directory_map::const_iterator dirent = dir.find(t); + if (dirent == dir.end()) + { + node = new_dnode(); + dir.insert(std::make_pair(t, node)); + } + else + node = dirent->second; + return node; +} + + +// This function takes a vector of path components and joins them into a +// single file_path. Valid input may be a single-element vector whose sole +// element is the empty path component (""); this represents the null path, +// which we use to represent non-existent files. Alternatively, input may be +// a multi-element vector, in which case all elements of the vector are +// required to be non-null. The following are valid inputs (with strings +// replaced by their interned version, of course): +// - [""] +// - ["foo"] +// - ["foo", "bar"] +// The following are not: +// - [] +// - ["foo", ""] +// - ["", "bar"] +static void +compose_path(std::vector const & names, + file_path & path) +{ + try + { + std::vector::const_iterator i = names.begin(); + I(i != names.end()); + fs::path p = mkpath(the_path_component_maker.lookup(*i)); + ++i; + if (names.size() > 1) + I(!null_name(*i)); + for ( ; i != names.end(); ++i) + { + I(!null_name(*i)); + p /= mkpath(the_path_component_maker.lookup(*i)); + } + path = file_path(p.string()); + } + catch (std::runtime_error &e) + { + throw informative_failure(e.what()); + } +} + +static void +get_full_path(path_state const & state, + tid t, + std::vector & pth) +{ + std::vector tmp; + while(t != root_tid) + { + path_state::const_iterator i = state.find(t); + I(i != state.end()); + tmp.push_back(path_item_name(i->second)); + t = path_item_parent(i->second); + } + pth.clear(); + std::copy(tmp.rbegin(), tmp.rend(), inserter(pth, pth.begin())); +} + +static void +get_full_path(path_state const & state, + tid t, + file_path & pth) +{ + std::vector tmp; + get_full_path(state, t, tmp); + // L(F("got %d-entry path for tid %d\n") % tmp.size() % t); + compose_path(tmp, pth); +} + +static void +clear_rearrangement(change_set::path_rearrangement & pr) +{ + pr.deleted_files.clear(); + pr.deleted_dirs.clear(); + pr.renamed_files.clear(); + pr.renamed_dirs.clear(); + pr.added_files.clear(); +} + +static void +clear_change_set(change_set & cs) +{ + clear_rearrangement(cs.rearrangement); + cs.deltas.clear(); +} + +static void +compose_rearrangement(path_analysis const & pa, + change_set::path_rearrangement & pr) +{ + clear_rearrangement(pr); + + for (path_state::const_iterator i = pa.first.begin(); + i != pa.first.end(); ++i) + { + tid curr(path_state_tid(i)); + std::vector old_name, new_name; + file_path old_path, new_path; + + path_state::const_iterator j = pa.second.find(curr); + I(j != pa.second.end()); + path_item old_item(path_state_item(i)); + path_item new_item(path_state_item(j)); + + // compose names + if (!null_name(path_item_name(old_item))) + { + get_full_path(pa.first, curr, old_name); + compose_path(old_name, old_path); + } + + if (!null_name(path_item_name(new_item))) + { + get_full_path(pa.second, curr, new_name); + compose_path(new_name, new_path); + } + + if (old_path == new_path) + { + L(F("skipping preserved %s %d : '%s'\n") + % (path_item_type(old_item) == ptype_directory ? "directory" : "file") + % curr % old_path); + continue; + } + + L(F("analyzing %s %d : '%s' -> '%s'\n") + % (path_item_type(old_item) == ptype_directory ? "directory" : "file") + % curr % old_path % new_path); + + if (null_name(path_item_name(old_item))) + { + // an addition (which must be a file, not a directory) + I(! null_name(path_item_name(new_item))); + I(path_item_type(new_item) != ptype_directory); + pr.added_files.insert(new_path); + } + else if (null_name(path_item_name(new_item))) + { + // a deletion + I(! null_name(path_item_name(old_item))); + switch (path_item_type(new_item)) + { + case ptype_directory: + pr.deleted_dirs.insert(old_path); + break; + case ptype_file: + pr.deleted_files.insert(old_path); + break; + } + } + else + { + // a generic rename + switch (path_item_type(new_item)) + { + case ptype_directory: + pr.renamed_dirs.insert(std::make_pair(old_path, new_path)); + break; + case ptype_file: + pr.renamed_files.insert(std::make_pair(old_path, new_path)); + break; + } + } + } +} + + + + +// +// this takes a path of the form +// +// "p[0]/p[1]/.../p[n-1]/p[n]" +// +// and fills in a vector of paths corresponding to p[0] ... p[n-1], +// along with a separate "leaf path" for element p[n]. +// + +static void +split_path(file_path const & p, + std::vector & components) +{ + components.clear(); + fs::path tmp = mkpath(p()); + for (fs::path::iterator i = tmp.begin(); i != tmp.end(); ++i) + components.push_back(the_path_component_maker.make(*i)); +} + +static void +split_path(file_path const & p, + std::vector & prefix, + path_component & leaf_path) +{ + split_path(p, prefix); + I(prefix.size() > 0); + leaf_path = prefix.back(); + prefix.pop_back(); +} + +static bool +lookup_path(std::vector const & pth, + directory_map const & dir, + tid & t) +{ + t = root_tid; + for (std::vector::const_iterator i = pth.begin(); + i != pth.end(); ++i) + { + directory_map::const_iterator dirent = dir.find(t); + if (dirent != dir.end()) + { + boost::shared_ptr node = dirent->second; + directory_node::const_iterator entry = node->find(*i); + if (entry == node->end()) + return false; + t = directory_entry_tid(entry); + } + else + return false; + } + return true; +} + +static bool +lookup_path(file_path const & pth, + directory_map const & dir, + tid & t) +{ + std::vector vec; + split_path(pth, vec); + return lookup_path(vec, dir, t); +} + +static tid +ensure_entry(directory_map & dmap, + path_state & state, + tid dir_tid, + ptype entry_ty, + path_component entry, + tid_source & ts) +{ + I(! null_name(entry)); + + if (dir_tid != root_tid) + { + path_state::const_iterator parent = state.find(dir_tid); + I( parent != state.end()); + + // if our parent is null, we immediately become null too, and attach to + // the root node (where all null entries reside) + if (null_name(path_item_name(path_state_item(parent)))) + { + tid new_tid = ts.next(); + state.insert(std::make_pair(new_tid, path_item(root_tid, entry_ty, make_null_component()))); + return new_tid; + } + } + + boost::shared_ptr node = dnode(dmap, dir_tid); + directory_node::const_iterator node_entry = node->find(entry); + + if (node_entry != node->end()) + { + I(node_entry->second.first == entry_ty); + return node_entry->second.second; + } + else + { + tid new_tid = ts.next(); + state.insert(std::make_pair(new_tid, path_item(dir_tid, entry_ty, entry))); + node->insert(std::make_pair(entry, std::make_pair(entry_ty, new_tid))); + return new_tid; + } +} + +static tid +ensure_dir_in_map (std::vector pth, + directory_map & dmap, + path_state & state, + tid_source & ts) +{ + tid dir_tid = root_tid; + for (std::vector::const_iterator p = pth.begin(); + p != pth.end(); ++p) + { + dir_tid = ensure_entry(dmap, state, dir_tid, + ptype_directory, *p, ts); + } + return dir_tid; +} + +static tid +ensure_dir_in_map (file_path const & path, + directory_map & dmap, + path_state & state, + tid_source & ts) +{ + std::vector components; + split_path(path, components); + return ensure_dir_in_map(components, dmap, state, ts); +} + +static tid +ensure_file_in_map (file_path const & path, + directory_map & dmap, + path_state & state, + tid_source & ts) +{ + std::vector prefix; + path_component leaf_path; + split_path(path, prefix, leaf_path); + + I(! null_name(leaf_path)); + tid dir_tid = ensure_dir_in_map(prefix, dmap, state, ts); + return ensure_entry(dmap, state, dir_tid, ptype_file, leaf_path, ts); +} + +static void +ensure_entries_exist (path_state const & self_state, + directory_map & other_dmap, + path_state & other_state, + tid_source & ts) +{ + for (path_state::const_iterator i = self_state.begin(); + i != self_state.end(); ++i) + { + if (other_state.find(path_state_tid(i)) != other_state.end()) + continue; + + if (null_name(path_item_name(path_state_item(i)))) + continue; + + file_path full; + get_full_path(self_state, path_state_tid(i), full); + switch (path_item_type(path_state_item(i))) + { + case ptype_directory: + ensure_dir_in_map(full, other_dmap, other_state, ts); + break; + + case ptype_file: + ensure_file_in_map(full, other_dmap, other_state, ts); + break; + } + } +} + + +static void +apply_state_renumbering(state_renumbering const & renumbering, + path_state & state) +{ + sanity_check_path_state(state); + path_state tmp(state); + state.clear(); + + for (path_state::const_iterator i = tmp.begin(); i != tmp.end(); ++i) + { + path_item item = path_state_item(i); + tid t = path_state_tid(i); + + state_renumbering::const_iterator j = renumbering.find(t); + if (j != renumbering.end()) + t = j->second; + + j = renumbering.find(item.parent); + if (j != renumbering.end()) + item.parent = j->second; + + state.insert(std::make_pair(t, item)); + } + sanity_check_path_state(state); +} + +static void +apply_state_renumbering(state_renumbering const & renumbering, + path_analysis & pa) +{ + apply_state_renumbering(renumbering, pa.first); + apply_state_renumbering(renumbering, pa.second); +} + + +// this takes a path in the path space defined by input_dir and rebuilds it +// in the path space defined by output_space, including any changes to +// parents in the path (rather than directly to the path leaf name). it +// therefore *always* succeeds; sometimes it does nothing if there's no +// affected parent, but you always get a rebuilt path in the output space. + +static void +reconstruct_path(file_path const & input, + directory_map const & input_dir, + path_state const & output_space, + file_path & output) +{ + std::vector vec; + std::vector rebuilt; + + // L(F("reconstructing path '%s' under analysis\n") % input); + + split_path(input, vec); + + tid t = root_tid; + std::vector::const_iterator pth = vec.begin(); + while (pth != vec.end()) + { + directory_map::const_iterator dirent = input_dir.find(t); + if (dirent == input_dir.end()) + break; + + boost::shared_ptr node = dirent->second; + directory_node::const_iterator entry = node->find(*pth); + if (entry == node->end()) + break; + + { + // check to see if this is the image of an added or deleted entry + // (i.e. null name in output space), if so it terminates our + // search. + path_state::const_iterator i = output_space.find(directory_entry_tid(entry)); + I(i != output_space.end()); + if (null_name(path_item_name(path_state_item(i)))) + { + // L(F("input path element '%s' is null in output space, mapping truncated\n") % *pth); + break; + } + } + + // L(F("resolved entry '%s' in reconstruction\n") % *pth); + ++pth; + t = directory_entry_tid(entry); + + if (directory_entry_type(entry) != ptype_directory) + break; + } + + get_full_path(output_space, t, rebuilt); + + while(pth != vec.end()) + { + // L(F("copying tail entry '%s' in reconstruction\n") % *pth); + rebuilt.push_back(*pth); + ++pth; + } + + compose_path(rebuilt, output); + // L(F("reconstructed path '%s' as '%s'\n") % input % output); +} + + +static void +build_directory_map(path_state const & state, + directory_map & dir) +{ + sanity_check_path_state(state); + dir.clear(); + // L(F("building directory map for %d entries\n") % state.size()); + for (path_state::const_iterator i = state.begin(); i != state.end(); ++i) + { + tid curr = path_state_tid(i); + path_item item = path_state_item(i); + tid parent = path_item_parent(item); + path_component name = path_item_name(item); + ptype type = path_item_type(item); + // L(F("adding entry %s (%s %d) to directory node %d\n") + // % name % (type == ptype_directory ? "dir" : "file") % curr % parent); + dnode(dir, parent)->insert(std::make_pair(name,std::make_pair(type, curr))); + + // also, make sure to add current node if it's a directory, even if + // there are no entries in it + if (type == ptype_directory) + dnode(dir, curr); + } +} + + +void +analyze_rearrangement(change_set::path_rearrangement const & pr, + path_analysis & pa, + tid_source & ts) +{ + directory_map first_map, second_map; + state_renumbering renumbering; + std::set damaged_in_first, damaged_in_second; + + pa.first.clear(); + pa.second.clear(); + + for (std::set::const_iterator f = pr.deleted_files.begin(); + f != pr.deleted_files.end(); ++f) + { + tid x = ensure_file_in_map(*f, first_map, pa.first, ts); + pa.second.insert(std::make_pair(x, path_item(root_tid, ptype_file, make_null_component()))); + damaged_in_first.insert(x); + } + + for (std::set::const_iterator d = pr.deleted_dirs.begin(); + d != pr.deleted_dirs.end(); ++d) + { + tid x = ensure_dir_in_map(*d, first_map, pa.first, ts); + pa.second.insert(std::make_pair(x, path_item(root_tid, ptype_directory, make_null_component()))); + damaged_in_first.insert(x); + } + + for (std::map::const_iterator rf = pr.renamed_files.begin(); + rf != pr.renamed_files.end(); ++rf) + { + tid a = ensure_file_in_map(rf->first, first_map, pa.first, ts); + tid b = ensure_file_in_map(rf->second, second_map, pa.second, ts); + I(renumbering.find(a) == renumbering.end()); + renumbering.insert(std::make_pair(b,a)); + damaged_in_first.insert(a); + damaged_in_second.insert(b); + } + + for (std::map::const_iterator rd = pr.renamed_dirs.begin(); + rd != pr.renamed_dirs.end(); ++rd) + { + tid a = ensure_dir_in_map(rd->first, first_map, pa.first, ts); + tid b = ensure_dir_in_map(rd->second, second_map, pa.second, ts); + I(renumbering.find(a) == renumbering.end()); + renumbering.insert(std::make_pair(b,a)); + damaged_in_first.insert(a); + damaged_in_second.insert(b); + } + + for (std::set::const_iterator a = pr.added_files.begin(); + a != pr.added_files.end(); ++a) + { + tid x = ensure_file_in_map(*a, second_map, pa.second, ts); + pa.first.insert(std::make_pair(x, path_item(root_tid, ptype_file, make_null_component()))); + damaged_in_second.insert(x); + } + + // we now have two states which probably have a number of entries in + // common. we know already of an interesting set of entries they have in + // common: all the renamed_foo entries. for each such renamed_foo(a,b) + // entry, we made an entry in our state_renumbering of the form b->a, + // while building the states. + + // dump_analysis("analyzed", pa); + // dump_renumbering("first", renumbering); + apply_state_renumbering(renumbering, pa.second); + build_directory_map(pa.first, first_map); + build_directory_map(pa.second, second_map); + renumbering.clear(); + // dump_analysis("renumbered once", pa); + + // that only gets us half way, though: + // + // - every object which was explicitly moved (thus stayed alive) has been + // renumbered in re.second to have the same tid as in re.first + // + // - every object which was merely mentionned in passing -- say due to + // being an intermediate directory in a path -- and was not moved, still + // has differing tids in re.first and re.second (or worse, may only + // even have an *entry* in one of them) + // + // the second point here is what we need to correct: if a path didn't + // move, wasn't destroyed, and wasn't added, we want it to have the same + // tid. but that's a relatively easy condition to check; we've been + // keeping sets of all the objects which were damaged on each side of + // this business anyways. + + + // pass #1 makes sure that all the entries in each state *exist* within + // the other state, even if they have the wrong numbers + + ensure_entries_exist (pa.first, second_map, pa.second, ts); + ensure_entries_exist (pa.second, first_map, pa.first, ts); + + // pass #2 identifies common un-damaged elements from 2->1 and inserts + // renumberings + + for (path_state::const_iterator i = pa.second.begin(); + i != pa.second.end(); ++i) + { + tid first_tid, second_tid; + second_tid = path_state_tid(i); + file_path full; + if (pa.first.find(second_tid) != pa.first.end()) + continue; + get_full_path(pa.second, second_tid, full); + if (damaged_in_second.find(second_tid) != damaged_in_second.end()) + continue; + if (null_name(path_item_name(path_state_item(i)))) + continue; + I(lookup_path(full, first_map, first_tid)); + renumbering.insert(std::make_pair(second_tid, first_tid)); + } + + // dump_renumbering("second", renumbering); + apply_state_renumbering(renumbering, pa.second); + // dump_analysis("renumbered again", pa); + + // that should be the whole deal; if we don't have consensus at this + // point we have done something wrong. + sanity_check_path_analysis (pa); +} + +void +normalize_path_rearrangement(change_set::path_rearrangement & norm) +{ + path_analysis tmp; + tid_source ts; + + analyze_rearrangement(norm, tmp, ts); + clear_rearrangement(norm); + compose_rearrangement(tmp, norm); +} + +void +normalize_change_set(change_set & norm) +{ + normalize_path_rearrangement(norm.rearrangement); + change_set::delta_map tmp = norm.deltas; + for (change_set::delta_map::const_iterator i = tmp.begin(); + i != tmp.end(); ++i) + { + if (delta_entry_src(i) == delta_entry_dst(i)) + norm.deltas.erase(delta_entry_path(i)); + } +} + + +// begin stuff related to concatenation + +static void +index_entries(path_state const & state, + std::map & files, + std::map & dirs) +{ + for (path_state::const_iterator i = state.begin(); + i != state.end(); ++i) + { + file_path full; + path_item item = path_state_item(i); + get_full_path(state, path_state_tid(i), full); + + if (null_name(path_item_name(item))) + continue; + + switch (path_item_type(item)) + { + case ptype_directory: + dirs.insert(std::make_pair(full, path_state_tid(i))); + break; + + case ptype_file: + files.insert(std::make_pair(full, path_state_tid(i))); + break; + } + } +} + +// this takes every (p1,t1) entry in b and, if (p1,t2) it exists in a, +// inserts (t1,t2) in the rename set. in other words, it constructs the +// renumbering from b->a +static void +extend_renumbering_from_path_identities(std::map const & a, + std::map const & b, + state_renumbering & renumbering) +{ + for (std::map::const_iterator i = b.begin(); + i != b.end(); ++i) + { + I(! null_name(i->first)); + std::map::const_iterator j = a.find(i->first); + if (j == a.end()) + continue; + I(renumbering.find(i->second) == renumbering.end()); + renumbering.insert(std::make_pair(i->second, j->second)); + } +} + +static void +extend_state(path_state const & src, + path_state & dst) +{ + for (path_state::const_iterator i = src.begin(); + i != src.end(); ++i) + { + if (dst.find(path_state_tid(i)) == dst.end()) + dst.insert(*i); + } +} + +static void +ensure_tids_disjoint(path_analysis const & a, + path_analysis const & b) +{ + for (path_state::const_iterator i = a.first.begin(); + i != a.first.end(); ++i) + { + I(b.first.find(path_state_tid(i)) == b.first.end()); + } + for (path_state::const_iterator i = b.first.begin(); + i != b.first.end(); ++i) + { + I(a.first.find(path_state_tid(i)) == a.first.end()); + } +} + +static void +extract_killed(path_analysis const & a, + std::set & killed) + +{ + killed.clear(); + directory_map first_map, second_map; + + build_directory_map(a.first, first_map); + build_directory_map(a.second, second_map); + + for (directory_map::const_iterator i = first_map.begin(); + i != first_map.end(); ++i) + { + tid dir_tid = i->first; + directory_map::const_iterator j = second_map.find(dir_tid); + I(j != second_map.end()); + + // a path P = DIR/LEAF is "killed" by a path_analysis iff the + // directory node named DIR in the post-state contains LEAF in the + // pre-state, and does not contain LEAF in the post-state + + boost::shared_ptr first_node = i->second; + boost::shared_ptr second_node = j->second; + + for (directory_node::const_iterator p = first_node->begin(); + p != first_node->end(); ++p) + { + path_component first_name = directory_entry_name(p); + directory_node::const_iterator q = second_node->find(first_name); + if (q == second_node->end()) + { + // found a killed entry + std::vector killed_name; + file_path killed_path; + get_full_path(a.second, dir_tid, killed_name); + killed_name.push_back(first_name); + compose_path(killed_name, killed_path); + killed.insert(killed_path); + } + } + } +} + +static void +check_delta_entries_not_directories(path_analysis const & pa, + change_set::delta_map const & dels) +{ + directory_map dmap; + build_directory_map(pa.second, dmap); + for (change_set::delta_map::const_iterator i = dels.begin(); + i != dels.end(); ++i) + { + tid delta_tid; + if (lookup_path(delta_entry_path(i), dmap, delta_tid)) + { + path_state::const_iterator j = pa.second.find(delta_tid); + I(j != pa.second.end()); + I(path_item_type(path_state_item(j)) == ptype_file); + } + } +} + +static void +concatenate_disjoint_analyses(path_analysis const & a, + path_analysis const & b, + std::set const & a_killed, + path_analysis & concatenated) +{ + std::map a_second_files, a_second_dirs; + std::map b_first_files, b_first_dirs; + path_analysis a_tmp(a), b_tmp(b); + state_renumbering renumbering; + + // the trick here is that a.second and b.first supposedly refer to the + // same state-of-the-world, so all we need to do is: + // + // - confirm that both analyses have disjoint tids + // - work out which tids in b to identify with tids in a + // - renumber b + // + // - copy a.first -> concatenated.first + // - insert all elements of b.first not already in concatenated.first + // - copy b.second -> concatenated.second + // - insert all elements of a.second not already in concatenated.second + + ensure_tids_disjoint(a_tmp, b_tmp); + + index_entries(a_tmp.second, a_second_files, a_second_dirs); + index_entries(b_tmp.first, b_first_files, b_first_dirs); + + { + std::set + a_second_file_set, a_second_dir_set, + b_first_file_set, b_first_dir_set; + + extract_first(a_second_files, a_second_file_set); + extract_first(a_second_dirs, a_second_dir_set); + extract_first(b_first_files, b_first_file_set); + extract_first(b_first_dirs, b_first_dir_set); + + // check that there are no entry-type mismatches + check_sets_disjoint(a_second_file_set, b_first_dir_set); + check_sets_disjoint(a_second_dir_set, b_first_file_set); + + // check that there's no use of killed entries + check_sets_disjoint(a_killed, b_first_dir_set); + check_sets_disjoint(a_killed, b_first_file_set); + } + + extend_renumbering_from_path_identities(a_second_files, b_first_files, renumbering); + extend_renumbering_from_path_identities(a_second_dirs, b_first_dirs, renumbering); + + // dump_analysis("state A", a_tmp); + // dump_analysis("state B", b_tmp); + // dump_renumbering("concatenation", renumbering); + apply_state_renumbering(renumbering, b_tmp); + + concatenated.first = a_tmp.first; + concatenated.second = b_tmp.second; + + extend_state(b_tmp.first, concatenated.first); + extend_state(a_tmp.second, concatenated.second); + + sanity_check_path_analysis(concatenated); +} + +void +concatenate_rearrangements(change_set::path_rearrangement const & a, + change_set::path_rearrangement const & b, + change_set::path_rearrangement & concatenated) +{ + a.check_sane(); + b.check_sane(); + concatenated = change_set::path_rearrangement(); + + tid_source ts; + path_analysis a_analysis, b_analysis, concatenated_analysis; + + analyze_rearrangement(a, a_analysis, ts); + analyze_rearrangement(b, b_analysis, ts); + + std::set a_killed; + extract_killed(a_analysis, a_killed); + + concatenate_disjoint_analyses(a_analysis, + b_analysis, + a_killed, + concatenated_analysis); + + compose_rearrangement(concatenated_analysis, + concatenated); + + concatenated.check_sane(); +} + +void +concatenate_change_sets(change_set const & a, + change_set const & b, + change_set & concatenated) +{ + a.check_sane(); + b.check_sane(); + + L(F("concatenating change sets\n")); + + tid_source ts; + path_analysis a_analysis, b_analysis, concatenated_analysis; + + analyze_rearrangement(a.rearrangement, a_analysis, ts); + analyze_rearrangement(b.rearrangement, b_analysis, ts); + + std::set a_killed; + extract_killed(a_analysis, a_killed); + + concatenate_disjoint_analyses(a_analysis, + b_analysis, + a_killed, + concatenated_analysis); + + compose_rearrangement(concatenated_analysis, + concatenated.rearrangement); + + // now process the deltas + + concatenated.deltas.clear(); + directory_map a_dst_map, b_src_map; + L(F("concatenating %d and %d deltas\n") + % a.deltas.size() % b.deltas.size()); + build_directory_map(a_analysis.second, a_dst_map); + build_directory_map(b_analysis.first, b_src_map); + + // first rename a's deltas under the rearrangement of b + for (change_set::delta_map::const_iterator del = a.deltas.begin(); + del != a.deltas.end(); ++del) + { + file_path new_pth; + L(F("processing delta on %s\n") % delta_entry_path(del)); + + // work out the name of entry in b.first + reconstruct_path(delta_entry_path(del), b_src_map, b_analysis.second, new_pth); + L(F("delta on %s in first changeset renamed to %s\n") + % delta_entry_path(del) % new_pth); + + if (b.rearrangement.has_deleted_file(delta_entry_path(del))) + // the delta should be removed if the file is going to be deleted + L(F("discarding delta [%s]->[%s] for deleted file '%s'\n") + % delta_entry_src(del) % delta_entry_dst(del) % delta_entry_path(del)); + else + concatenated.deltas.insert(std::make_pair(new_pth, + std::make_pair(delta_entry_src(del), + delta_entry_dst(del)))); + } + + // next fuse any deltas id1->id2 and id2->id3 to id1->id3 + for (change_set::delta_map::const_iterator del = b.deltas.begin(); + del != b.deltas.end(); ++del) + { + + file_path del_pth = delta_entry_path(del); + change_set::delta_map::const_iterator existing = + concatenated.deltas.find(del_pth); + if (existing != concatenated.deltas.end()) + { + L(F("fusing deltas on %s : %s -> %s and %s -> %s\n") + % del_pth + % delta_entry_src(existing) + % delta_entry_dst(existing) + % delta_entry_src(del) + % delta_entry_dst(del)); + I(delta_entry_dst(existing) == delta_entry_src(del)); + std::pair fused = std::make_pair(delta_entry_src(existing), + delta_entry_dst(del)); + concatenated.deltas.erase(del_pth); + concatenated.deltas.insert(std::make_pair((del_pth), fused)); + } + else + { + L(F("delta on %s in second changeset copied forward\n") % del_pth); + // in general don't want deltas on deleted files. however if a + // file has been deleted then re-added, then a delta is valid + // (it applies to the newly-added file) + if (!b.rearrangement.has_deleted_file(del_pth) + || b.rearrangement.has_added_file(del_pth) + || b.rearrangement.has_renamed_file_dst(del_pth)) + concatenated.deltas.insert(*del); + } + } + + normalize_change_set(concatenated); + concatenated.check_sane(); + + L(F("finished concatenation\n")); +} + +// end stuff related to concatenation + + +// begin stuff related to merging + + +static void +extend_renumbering_via_added_files(path_analysis const & a, + path_analysis const & b, + state_renumbering & existing_renumbering, + state_renumbering & renumbering) +{ + directory_map a_second_map; + build_directory_map(a.second, a_second_map); + + for (path_state::const_iterator i = b.first.begin(); + i != b.first.end(); ++i) + { + path_item item = path_state_item(i); + if (path_item_type(item) == ptype_file && null_name(path_item_name(item))) + { + path_state::const_iterator j = b.second.find(path_state_tid(i)); + I(j != b.second.end()); + path_component leaf_name = path_item_name(path_state_item(j)); + + I(path_item_type(path_state_item(j)) == ptype_file); + if (! null_name(leaf_name)) + { + tid added_parent_tid = path_item_parent(path_state_item(j)); + state_renumbering::const_iterator ren = existing_renumbering.find(added_parent_tid); + if (ren != existing_renumbering.end()) + added_parent_tid = ren->second; + directory_map::const_iterator dirent = a_second_map.find(added_parent_tid); + if (dirent != a_second_map.end()) + { + boost::shared_ptr node = dirent->second; + directory_node::const_iterator entry = node->find(leaf_name); + if (entry != node->end() && directory_entry_type(entry) == ptype_file) + { + I(renumbering.find(path_state_tid(i)) == renumbering.end()); + renumbering.insert(std::make_pair(path_state_tid(i), + directory_entry_tid(entry))); + } + } + } + } + } +} + +static bool +find_item(tid t, path_state const & ps, + path_item & item) +{ + path_state::const_iterator i = ps.find(t); + if (i == ps.end()) + return false; + item = path_state_item(i); + return true; +} + +static bool +find_items(tid t, path_analysis const & pa, + path_item & first, path_item & second) +{ + if (find_item(t, pa.first, first)) + { + I(find_item(t, pa.second, second)); + I(path_item_type(first) == path_item_type(second)); + return true; + } + else + { + I(!find_item(t, pa.second, second)); + return false; + } +} + +static void +resolve_conflict(tid t, ptype ty, + path_analysis const & a_tmp, + path_analysis const & b_tmp, + path_item & resolved, + path_state & resolved_conflicts, + app_state & app) +{ + path_state::const_iterator i = resolved_conflicts.find(t); + + path_item a_item, b_item; + find_item(t, a_tmp.second, a_item); + find_item(t, b_tmp.second, b_item); + + file_path anc, a, b, res; + get_full_path(a_tmp.first, t, anc); + get_full_path(a_tmp.second, t, a); + get_full_path(b_tmp.second, t, b); + + if (i != resolved_conflicts.end()) + { + resolved = path_state_item(i); + } + else if (null_name(path_item_name(a_item)) && + ! null_name(path_item_name(b_item))) + { + L(F("delete of %s dominates rename to %s\n") % anc % b); + resolved = a_item; + resolved_conflicts.insert(std::make_pair(t, resolved)); + } + else if (null_name(path_item_name(b_item)) && + ! null_name(path_item_name(a_item))) + { + L(F("delete of %s dominates rename to %s\n") % anc % a); + resolved = b_item; + resolved_conflicts.insert(std::make_pair(t, resolved)); + } + else + { + switch (ty) + { + case ptype_file: + N(app.lua.hook_resolve_file_conflict(anc, a, b, res), + F("unable to resolve file conflict '%s' -> '%s' vs. '%s'") % anc % a % b); + break; + case ptype_directory: + N(app.lua.hook_resolve_dir_conflict(anc, a, b, res), + F("unable to resolve dir conflict '%s' -> '%s' vs. '%s'") % anc % a % b); + break; + } + + N((res == a || res == b), + F("illegal conflict resolution '%s', wanted '%s' or '%s'\n") % res % a % b); + + if (res == a) + I(find_item(t, a_tmp.second, resolved)); + else + I(find_item(t, b_tmp.second, resolved)); + + resolved_conflicts.insert(std::make_pair(t, resolved)); + } +} + +static void +ensure_no_rename_clobbers(path_analysis const & a, + path_analysis const & b) +{ + // there is a special non-mergable pair of changes which we need + // to identify here: + // + // tid i : x -> y in change A + // tid j : z -> x in change B + // + // on the surface it looks like it ought to be mergable, since there is + // no conflict in the tids. except for one problem: B effectively + // clobbered i with j. there is nothing you can append to change B to + // revive the identity of i; in fact you risk having i and j identified + // if you form the naive merge concatenation BA. indeed, since A and B + // both supposedly start in the same state (in which i occupies name x), + // it really ought not to be possible to form B; you should have to + // accompany it with some sort of statement about the fate of i. + // + // as it stands, we're going to fault when we see this. if it turns out + // that there's a legal way of constructing such changes, one option is + // to synthesize a delete of i in B; essentially read "z->x" as an + // implicit "delete x first if it exists in post-state". + // + // however, for the time being this is a fault because we believe they + // should be globally illegal clobbers. + + directory_map b_first_map, b_second_map; + build_directory_map (b.first, b_first_map); + build_directory_map (b.second, b_second_map); + tid a_tid, b_tid; + + for (path_state::const_iterator i = a.first.begin(); + i != a.first.end(); ++i) + { + file_path anc_path, a_second_path; + a_tid = path_state_tid(i); + get_full_path(a.first, a_tid, anc_path); + + if (! lookup_path(anc_path, b_first_map, b_tid)) + { + file_path b_second_path; + reconstruct_path(anc_path, b_first_map, b.second, b_second_path); + + N(! lookup_path(b_second_path, b_second_map, b_tid), + (F("tid %d (%s) clobbered tid %d (%s)\n") + % b_tid % b_second_path + % a_tid % anc_path)); + } + } + +} + +static void +project_missing_changes(path_analysis const & a_tmp, + path_analysis const & b_tmp, + path_analysis & b_merged, + path_state & resolved_conflicts, + app_state & app) +{ + + // for each tid t adjusted in a: + // - if t exists in b: + // - if the change to t in b == change in a, skip + // - else resolve conflict + // - if conflict resolved in favour of a, append to merged + // - if resolved in favour of b, skip + // - else (no t in b) insert a's change to t in merged + + for (path_state::const_iterator i = a_tmp.first.begin(); + i != a_tmp.first.end(); ++i) + { + tid t = path_state_tid(i); + path_item a_first_item, a_second_item; + path_item b_first_item, b_second_item; + I(find_items(t, a_tmp, a_first_item, a_second_item)); + if (find_items(t, b_tmp, b_first_item, b_second_item)) + { + I(a_first_item == b_first_item); + if (a_second_item == b_second_item) + { + L(F("skipping common change on %s (tid %d)\n") + % path_item_name(a_first_item) % t); + } + else if (a_first_item == a_second_item) + { + L(F("skipping neutral change of %s -> %s (tid %d)\n") + % path_item_name(a_first_item) + % path_item_name(a_second_item) + % t); + } + else if (b_first_item == b_second_item) + { + L(F("propagating change on %s -> %s (tid %d)\n") + % path_item_name(b_first_item) + % path_item_name(b_second_item) + % t); + b_merged.first.insert(std::make_pair(t, b_second_item)); + b_merged.second.insert(std::make_pair(t, a_second_item)); + } + else + { + // conflict + path_item resolved; + resolve_conflict(t, path_item_type(a_first_item), a_tmp, b_tmp, + resolved, resolved_conflicts, app); + + if (resolved == a_second_item) + { + L(F("conflict detected, resolved in A's favour\n")); + b_merged.first.insert(std::make_pair(t, b_second_item)); + b_merged.second.insert(std::make_pair(t, a_second_item)); + } + else + { + L(F("conflict detected, resolved in B's favour\n")); + } + } + } + else + { + // there was no entry in b at all for this tid, copy it + b_merged.first.insert(std::make_pair(t, a_first_item)); + b_merged.second.insert(std::make_pair(t, a_second_item)); + } + } + + // now drive through b.second's view of the directory structure, in case + // some intermediate b-only directories showed up the preimages of + // A-favoured conflicts. + extend_state(b_tmp.second, b_merged.first); + extend_state(b_merged.first, b_merged.second); +} + +static void +rebuild_analysis(path_analysis const & src, + path_analysis & dst, + tid_source & ts) +{ + state_renumbering renumbering; + + for (path_state::const_iterator i = src.first.begin(); + i != src.first.end(); ++i) + renumbering.insert(std::make_pair(path_state_tid(i), ts.next())); + + dst = src; + apply_state_renumbering(renumbering, dst); +} + +static void +merge_disjoint_analyses(path_analysis const & a, + path_analysis const & b, + path_analysis & a_renumbered, + path_analysis & b_renumbered, + path_analysis & a_merged, + path_analysis & b_merged, + tid_source & ts, + app_state & app) +{ + // we have anc->a and anc->b and we want to construct a->merged and + // b->merged, leading to the eventual identity concatenate(a,a_merged) == + // concatenate(b,b_merged). + + path_analysis a_tmp(a), b_tmp(b); + state_renumbering renumbering; + + ensure_tids_disjoint(a_tmp, b_tmp); + + // fault on a particular class of mal-formed changesets + ensure_no_rename_clobbers(a_tmp, b_tmp); + ensure_no_rename_clobbers(b_tmp, a_tmp); + + // a.first and b.first refer to the same state-of-the-world. + // + // we begin by driving all the entries in a.first into b.first and vice + // versa. + + { + directory_map a_first_map, b_first_map; + build_directory_map(a_tmp.first, a_first_map); + build_directory_map(b_tmp.first, b_first_map); + ensure_entries_exist(a_tmp.first, b_first_map, b_tmp.first, ts); + ensure_entries_exist(b_tmp.first, a_first_map, a_tmp.first, ts); + } + + // we then drive any of the new arrivals in a.first to a.second, and + // likewise on b + + { + directory_map a_second_map, b_second_map; + build_directory_map(a_tmp.second, a_second_map); + build_directory_map(b_tmp.second, b_second_map); + ensure_entries_exist(a_tmp.first, a_second_map, a_tmp.second, ts); + ensure_entries_exist(b_tmp.first, b_second_map, b_tmp.second, ts); + } + + // we then index, identify, and renumber all the immediately apparant + // entries in each side. + + { + std::map a_first_files, a_first_dirs; + std::map b_first_files, b_first_dirs; + index_entries(a_tmp.first, a_first_files, a_first_dirs); + index_entries(b_tmp.first, b_first_files, b_first_dirs); + extend_renumbering_from_path_identities(a_first_files, b_first_files, renumbering); + extend_renumbering_from_path_identities(a_first_dirs, b_first_dirs, renumbering); + } + + // once renamed, b_tmp will have moved a fair bit closer to a_tmp, in + // terms of tids. there is still one set of files we haven't accounted + // for, however: files added in a and b. + + { + state_renumbering aux_renumbering; + extend_renumbering_via_added_files(a_tmp, b_tmp, renumbering, aux_renumbering); + for (state_renumbering::const_iterator i = aux_renumbering.begin(); + i != aux_renumbering.end(); ++i) + { + I(renumbering.find(i->first) == renumbering.end()); + renumbering.insert(*i); + } + } + + // renumbering now contains a *complete* renumbering of b->a, + // so we reset a_tmp and b_tmp, and renumber b_tmp under this + // scheme. + + a_tmp = a; + b_tmp = b; + apply_state_renumbering(renumbering, b_tmp); + + a_renumbered = a_tmp; + b_renumbered = b_tmp; + + // now we're ready to merge (and resolve conflicts) + path_state resolved_conflicts; + project_missing_changes(a_tmp, b_tmp, b_merged, resolved_conflicts, app); + project_missing_changes(b_tmp, a_tmp, a_merged, resolved_conflicts, app); + + { + // now check: the merge analyses, when concatenated with their + // predecessors, should lead to the same composite rearrangement + + tid_source ts_tmp; + path_analysis anc_a_check, a_merge_check, a_check; + path_analysis anc_b_check, b_merge_check, b_check; + change_set::path_rearrangement a_re, b_re; + + rebuild_analysis(a, anc_a_check, ts_tmp); + rebuild_analysis(b, anc_b_check, ts_tmp); + rebuild_analysis(a_merged, a_merge_check, ts_tmp); + rebuild_analysis(b_merged, b_merge_check, ts_tmp); + + std::set anc_a_killed, anc_b_killed; + extract_killed(anc_a_check, anc_a_killed); + extract_killed(anc_b_check, anc_b_killed); + + concatenate_disjoint_analyses(anc_a_check, a_merge_check, anc_a_killed, a_check); + concatenate_disjoint_analyses(anc_b_check, b_merge_check, anc_b_killed, b_check); + compose_rearrangement(a_check, a_re); + compose_rearrangement(b_check, b_re); + I(a_re == b_re); + } + +} + +static void +merge_deltas(file_path const & anc_path, + file_path const & left_path, + file_path const & right_path, + file_path const & path_in_merged, + std::map & merge_finalists, + file_id const & anc, + file_id const & left, + file_id const & right, + file_id & finalist, + merge_provider & merger) +{ + std::map::const_iterator i = merge_finalists.find(path_in_merged); + if (i != merge_finalists.end()) + { + L(F("reusing merge resolution '%s' : '%s' -> '%s'\n") + % path_in_merged % anc % i->second); + finalist = i->second; + } + else + { + if (null_id(anc)) + { + N(merger.try_to_merge_files(left_path, right_path, path_in_merged, left, right, finalist), + F("merge of '%s' : '%s' vs. '%s' (no common ancestor) failed") + % path_in_merged % left % right); + } + else + { + N(merger.try_to_merge_files(anc_path, left_path, right_path, path_in_merged, + anc, left, right, finalist), + F("merge of '%s' : '%s' -> '%s' vs '%s' failed") + % path_in_merged % anc % left % right); + } + + L(F("merge of '%s' : '%s' -> '%s' vs '%s' resolved to '%s'\n") + % path_in_merged % anc % left % right % finalist); + + merge_finalists.insert(std::make_pair(path_in_merged, finalist)); + } +} + +static void +project_missing_deltas(change_set const & a, + change_set const & b, + path_analysis const & a_analysis, + path_analysis const & b_analysis, + path_analysis const & a_merged_analysis, + change_set & b_merged, + merge_provider & merger, + std::map & merge_finalists) +{ + directory_map a_second_map, b_first_map, a_merged_first_map; + build_directory_map(a_analysis.second, a_second_map); + build_directory_map(b_analysis.first, b_first_map); + build_directory_map(a_merged_analysis.first, a_merged_first_map); + + for (change_set::delta_map::const_iterator i = a.deltas.begin(); + i != a.deltas.end(); ++i) + { + file_path path_in_merged, path_in_anc, path_in_b_second; + + // we have a fork like this: + // + // + // +--> [a2] + // [a1==b1] + // +--> [b2] + // + // and we have a delta applied to a file in a2. we want to + // figure out what to call this delta's path in b2. this means + // reconstructing it in a1==b1, then reconstructing it *again* + // in b2. + + // first work out what the path in a.first == b.first is + reconstruct_path(delta_entry_path(i), a_second_map, + a_analysis.first, path_in_anc); + + // first work out what the path in b.second is + reconstruct_path(path_in_anc, b_first_map, + b_analysis.second, path_in_b_second); + + // then work out what the path in merged is + reconstruct_path(delta_entry_path(i), a_merged_first_map, + a_merged_analysis.second, path_in_merged); + + // now check to see if there was a delta on the b.second name in b + change_set::delta_map::const_iterator j = b.deltas.find(path_in_b_second); + + if (j == b.deltas.end()) + { + // if not, copy ours over using the merged name + L(F("merge is copying delta '%s' : '%s' -> '%s'\n") + % path_in_merged % delta_entry_src(i) % delta_entry_dst(i)); + I(b.deltas.find(path_in_merged) == b.deltas.end()); + if (b.rearrangement.has_deleted_file(path_in_merged)) + // if the file was deleted on the other fork of the merge, then + // we don't want to keep this delta. + L(F("skipping delta '%s'->'%s' on deleted file '%s'\n") + % delta_entry_src(i) % delta_entry_dst(i) % path_in_merged); + else + b_merged.apply_delta(path_in_merged, delta_entry_src(i), delta_entry_dst(i)); + } + else + { + // if so, either... + + if (!(delta_entry_src(i) == delta_entry_src(j))) + { + // This is a bit of a corner case where a file was added then deleted on one + // of the forks. The src for the addition fork will be null_id, but the src + // for the other fork will be the ancestor file's id. + + // if neither of the forks involved a file addition delta (null_id to something) + // then something bad happened. + I(null_id(delta_entry_src(i)) || null_id(delta_entry_src(j))); + + if (null_id(delta_entry_src(i))) + { + // ... use the delta from 'a' + // 'a' change_set included a delta []->[...], ie file added. We want to + // follow this fork so it gets added to the b_merged changeset + L(F("propagating new file addition delta on '%s' : '%s' -> '%s'\n") + % path_in_merged + % delta_entry_src(j) + % delta_entry_dst(i)); + b_merged.apply_delta(path_in_merged, delta_entry_src(i), delta_entry_dst(i)); + } + else if (null_id(delta_entry_src(j))) + { + // ... ignore the delta + // 'b' change_set included a delta []->[...], ie file added. We don't need + // to add it to the b_merged changeset, since any delta in 'a' will be + // ignored (as 'b' includes deletions). + L(F("skipping new file addition delta on '%s' : '' -> '%s'\n") + % path_in_merged + % delta_entry_dst(j)); + } + } + else if (delta_entry_dst(i) == delta_entry_dst(j)) + { + // ... absorb identical deltas + L(F("skipping common delta '%s' : '%s' -> '%s'\n") + % path_in_merged % delta_entry_src(i) % delta_entry_dst(i)); + } + + else if (delta_entry_src(i) == delta_entry_dst(i)) + { + L(F("skipping neutral delta on '%s' : %s -> %s\n") + % delta_entry_path(i) + % delta_entry_src(i) + % delta_entry_dst(i)); + } + + else if (delta_entry_src(j) == delta_entry_dst(j)) + { + L(F("propagating unperturbed delta on '%s' : '%s' -> '%s'\n") + % delta_entry_path(i) + % delta_entry_src(i) + % delta_entry_dst(i)); + b_merged.apply_delta(path_in_merged, delta_entry_dst(j), delta_entry_dst(i)); + } + + else + { + // ... or resolve conflict + L(F("merging delta '%s' : '%s' -> '%s' vs. '%s'\n") + % path_in_merged % delta_entry_src(i) % delta_entry_dst(i) % delta_entry_dst(j)); + file_id finalist; + + merge_deltas(path_in_anc, + delta_entry_path(i), // left_path + delta_entry_path(j), // right_path + path_in_merged, + merge_finalists, + delta_entry_src(i), // anc + delta_entry_dst(i), // left + delta_entry_dst(j), // right + finalist, merger); + L(F("resolved merge to '%s' : '%s' -> '%s'\n") + % path_in_merged % delta_entry_src(i) % finalist); + + // if the conflict resolved to something other than the + // existing post-state of b, add a new entry to the deltas of + // b finishing the job. + if (! (finalist == delta_entry_dst(j))) + b_merged.apply_delta(path_in_merged, delta_entry_dst(j), finalist); + } + } + } +} + + +void +merge_change_sets(change_set const & a, + change_set const & b, + change_set & a_merged, + change_set & b_merged, + merge_provider & merger, + app_state & app) +{ + a.check_sane(); + b.check_sane(); + + L(F("merging change sets\n")); + + tid_source ts; + path_analysis + a_analysis, b_analysis, + a_renumbered, b_renumbered, + a_merged_analysis, b_merged_analysis; + + analyze_rearrangement(a.rearrangement, a_analysis, ts); + analyze_rearrangement(b.rearrangement, b_analysis, ts); + + merge_disjoint_analyses(a_analysis, b_analysis, + a_renumbered, b_renumbered, + a_merged_analysis, b_merged_analysis, + ts, app); + + compose_rearrangement(a_merged_analysis, + a_merged.rearrangement); + + compose_rearrangement(b_merged_analysis, + b_merged.rearrangement); + + std::map merge_finalists; + + project_missing_deltas(a, b, + a_renumbered, b_renumbered, + a_merged_analysis, + b_merged, + merger, merge_finalists); + + project_missing_deltas(b, a, + b_renumbered, a_renumbered, + b_merged_analysis, + a_merged, + merger, merge_finalists); + + { + // confirmation step + change_set a_check, b_check; + // dump_change_set("a", a); + // dump_change_set("a_merged", a_merged); + // dump_change_set("b", b); + // dump_change_set("b_merged", b_merged); + concatenate_change_sets(a, a_merged, a_check); + concatenate_change_sets(b, b_merged, b_check); + // dump_change_set("a_check", a_check); + // dump_change_set("b_check", b_check); + I(a_check == b_check); + } + + normalize_change_set(a_merged); + normalize_change_set(b_merged); + + a_merged.check_sane(); + b_merged.check_sane(); + + L(F("finished merge\n")); +} + +// end stuff related to merging + +void +invert_change_set(change_set const & a2b, + manifest_map const & a_map, + change_set & b2a) +{ + a2b.check_sane(); + tid_source ts; + path_analysis a2b_analysis, b2a_analysis; + + analyze_rearrangement(a2b.rearrangement, a2b_analysis, ts); + + L(F("inverting change set\n")); + b2a_analysis.first = a2b_analysis.second; + b2a_analysis.second = a2b_analysis.first; + compose_rearrangement(b2a_analysis, b2a.rearrangement); + + b2a.deltas.clear(); + + // existing deltas are in "b space" + for (path_state::const_iterator b = b2a_analysis.first.begin(); + b != b2a_analysis.first.end(); ++b) + { + path_state::const_iterator a = b2a_analysis.second.find(path_state_tid(b)); + I(a != b2a_analysis.second.end()); + if (path_item_type(path_state_item(b)) == ptype_file) + { + file_path b_pth, a_pth; + get_full_path(b2a_analysis.first, path_state_tid(b), b_pth); + + if (null_name(path_item_name(path_state_item(b))) && + ! null_name(path_item_name(path_state_item(a)))) + { + // b->a represents an add in "a space" + get_full_path(b2a_analysis.second, path_state_tid(a), a_pth); + manifest_map::const_iterator i = a_map.find(a_pth); + I(i != a_map.end()); + b2a.deltas.insert(std::make_pair(a_pth, + std::make_pair(file_id(), + manifest_entry_id(i)))); + L(F("converted 'delete %s' to 'add as %s' in inverse\n") + % a_pth + % manifest_entry_id(i)); + } + else if (! null_name(path_item_name(path_state_item(b))) && + null_name(path_item_name(path_state_item(a)))) + { + // b->a represents a del from "b space" + get_full_path(b2a_analysis.first, path_state_tid(b), b_pth); + L(F("converted add %s to delete in inverse\n") % b_pth ); + } + else + { + get_full_path(b2a_analysis.first, path_state_tid(b), b_pth); + get_full_path(b2a_analysis.second, path_state_tid(a), a_pth); + change_set::delta_map::const_iterator del = a2b.deltas.find(b_pth); + if (del == a2b.deltas.end()) + continue; + file_id src_id(delta_entry_src(del)), dst_id(delta_entry_dst(del)); + L(F("converting delta %s -> %s on %s\n") + % src_id % dst_id % b_pth); + L(F("inverse is delta %s -> %s on %s\n") + % dst_id % src_id % a_pth); + b2a.deltas.insert(std::make_pair(a_pth, std::make_pair(dst_id, src_id))); + } + } + } + + // some deltas might not have been renamed, however. these we just invert the + // direction on + for (change_set::delta_map::const_iterator del = a2b.deltas.begin(); + del != a2b.deltas.end(); ++del) + { + // check to make sure this isn't the image of an add (now a delete) + if (null_id(delta_entry_src(del))) + continue; + // check to make sure this isn't one of the already-moved deltas + if (b2a.deltas.find(delta_entry_path(del)) != b2a.deltas.end()) + continue; + b2a.deltas.insert(std::make_pair(delta_entry_path(del), + std::make_pair(delta_entry_dst(del), + delta_entry_src(del)))); + } + normalize_change_set(b2a); + b2a.check_sane(); +} + +void +move_files_to_tmp_bottom_up(tid t, + local_path const & temporary_root, + path_state const & state, + directory_map const & dmap) +{ + directory_map::const_iterator dirent = dmap.find(t); + if (dirent != dmap.end()) + { + boost::shared_ptr node = dirent->second; + for (directory_node::const_iterator entry = node->begin(); + entry != node->end(); ++entry) + { + tid child = directory_entry_tid(entry); + file_path path; + path_item item; + + find_item(child, state, item); + + if (null_name(path_item_name(item))) + continue; + + // recursively move all sub-entries + if (path_item_type(item) == ptype_directory) + move_files_to_tmp_bottom_up(child, temporary_root, state, dmap); + + get_full_path(state, child, path); + + local_path src(path()); + local_path dst((mkpath(temporary_root()) + / mkpath(boost::lexical_cast(child))).string()); + + P(F("moving %s -> %s\n") % src % dst); + switch (path_item_type(item)) + { + case ptype_file: + if (file_exists(src)) + move_file(src, dst); + break; + case ptype_directory: + if (directory_exists(src)) + move_dir(src, dst); + break; + } + } + } +} + +void +move_files_from_tmp_top_down(tid t, + local_path const & temporary_root, + path_state const & state, + directory_map const & dmap) +{ + directory_map::const_iterator dirent = dmap.find(t); + if (dirent != dmap.end()) + { + boost::shared_ptr node = dirent->second; + for (directory_node::const_iterator entry = node->begin(); + entry != node->end(); ++entry) + { + tid child = directory_entry_tid(entry); + file_path path; + path_item item; + + find_item(child, state, item); + + if (null_name(path_item_name(item))) + continue; + + get_full_path(state, child, path); + + local_path src((mkpath(temporary_root()) + / mkpath(boost::lexical_cast(child))).string()); + local_path dst(path()); + + switch (path_item_type(item)) + { + case ptype_file: + if (file_exists(src)) + { + P(F("moving file %s -> %s\n") % src % dst); + make_dir_for(path); + move_file(src, dst); + } + break; + case ptype_directory: + if (directory_exists(src)) + { + P(F("moving dir %s -> %s\n") % src % dst); + make_dir_for(path); + move_dir(src, dst); + } + break; + } + + // recursively move all sub-entries + if (path_item_type(item) == ptype_directory) + move_files_from_tmp_top_down(child, temporary_root, state, dmap); + } + } +} + + +void +apply_rearrangement_to_filesystem(change_set::path_rearrangement const & re, + local_path const & temporary_root) +{ + re.check_sane(); + tid_source ts; + path_analysis analysis; + directory_map first_dmap, second_dmap; + + analyze_rearrangement(re, analysis, ts); + build_directory_map(analysis.first, first_dmap); + build_directory_map(analysis.second, second_dmap); + + if (analysis.first.empty()) + return; + + move_files_to_tmp_bottom_up(root_tid, temporary_root, + analysis.first, first_dmap); + + move_files_from_tmp_top_down(root_tid, temporary_root, + analysis.second, second_dmap); +} + +// application stuff + +void +apply_path_rearrangement(path_set const & old_ps, + change_set::path_rearrangement const & pr, + path_set & new_ps) +{ + pr.check_sane(); + change_set::path_rearrangement a, b, c; + a.added_files = old_ps; + concatenate_rearrangements(a, pr, c); + new_ps = c.added_files; +} + +void +build_pure_addition_change_set(manifest_map const & man, + change_set & cs) +{ + for (manifest_map::const_iterator i = man.begin(); i != man.end(); ++i) + cs.add_file(manifest_entry_path(i), manifest_entry_id(i)); + cs.check_sane(); +} + +// this function takes the rearrangement sitting in cs and "completes" the +// changeset by filling in all the deltas + +void +complete_change_set(manifest_map const & m_old, + manifest_map const & m_new, + change_set & cs) +{ + cs.rearrangement.check_sane(); + tid_source ts; + path_analysis analysis; + directory_map first_dmap, second_dmap; + + analyze_rearrangement(cs.rearrangement, analysis, ts); + build_directory_map(analysis.first, first_dmap); + build_directory_map(analysis.second, second_dmap); + + std::set paths; + extract_path_set(m_new, paths); + + for (std::set::const_iterator i = cs.rearrangement.added_files.begin(); + i != cs.rearrangement.added_files.end(); ++i) + { + manifest_map::const_iterator j = m_new.find(*i); + I(j != m_new.end()); + cs.deltas.insert(std::make_pair(*i, + std::make_pair(null_ident, + manifest_entry_id(j)))); + paths.erase(*i); + } + + for (std::set::const_iterator i = paths.begin(); + i != paths.end(); ++i) + { + file_path old_path; + reconstruct_path(*i, second_dmap, analysis.first, old_path); + manifest_map::const_iterator j = m_old.find(old_path); + manifest_map::const_iterator k = m_new.find(*i); + I(j != m_old.end()); + I(k != m_new.end()); + if (!(manifest_entry_id(j) == manifest_entry_id(k))) + cs.deltas.insert(std::make_pair(*i, std::make_pair(manifest_entry_id(j), + manifest_entry_id(k)))); + } + + cs.check_sane(); +} + + +void +apply_change_set(manifest_map const & old_man, + change_set const & cs, + manifest_map & new_man) +{ + cs.check_sane(); + change_set a, b; + build_pure_addition_change_set(old_man, a); + concatenate_change_sets(a, cs, b); + + // If the composed change_set still has renames or deletions in it, then + // they referred to things that weren't in the original manifest, and this + // change_set should never have been applied to this manifest in the first + // place. + I(b.rearrangement.deleted_files.empty()); + I(b.rearrangement.renamed_files.empty()); + // Furthermore, all deltas should be add deltas + for (change_set::delta_map::const_iterator i = b.deltas.begin(); + i != b.deltas.end(); ++i) + { + I(null_id(delta_entry_src(i))); + I(b.rearrangement.added_files.find(delta_entry_path(i)) + != b.rearrangement.added_files.end()); + } + + new_man.clear(); + for (std::set::const_iterator i = b.rearrangement.added_files.begin(); + i != b.rearrangement.added_files.end(); ++i) + { + change_set::delta_map::const_iterator d = b.deltas.find(*i); + I(d != b.deltas.end()); + new_man.insert(std::make_pair(*i, delta_entry_dst(d))); + } +} + +// quick, optimistic and destructive version +void +apply_path_rearrangement(change_set::path_rearrangement const & pr, + path_set & ps) +{ + pr.check_sane(); + if (pr.renamed_files.empty() + && pr.renamed_dirs.empty() + && pr.deleted_dirs.empty()) + { + // fast path for simple drop/add file operations + for (std::set::const_iterator i = pr.deleted_files.begin(); + i != pr.deleted_files.end(); ++i) + { + ps.erase(*i); + } + for (std::set::const_iterator i = pr.added_files.begin(); + i != pr.added_files.end(); ++i) + { + ps.insert(*i); + } + } + else + { + // fall back to the slow way + path_set tmp; + apply_path_rearrangement(ps, pr, tmp); + ps = tmp; + } +} + +// quick, optimistic and destructive version +file_path +apply_change_set_inverse(change_set const & cs, + file_path const & file_in_second) +{ + cs.check_sane(); + tid_source ts; + path_analysis analysis; + directory_map second_dmap; + file_path file_in_first; + + analyze_rearrangement(cs.rearrangement, analysis, ts); + build_directory_map(analysis.second, second_dmap); + reconstruct_path(file_in_second, second_dmap, analysis.first, file_in_first); + return file_in_first; +} + +// quick, optimistic and destructive version +void +apply_change_set(change_set const & cs, + manifest_map & man) +{ + cs.check_sane(); + if (cs.rearrangement.renamed_files.empty() + && cs.rearrangement.renamed_dirs.empty() + && cs.rearrangement.deleted_dirs.empty()) + { + // fast path for simple drop/add/delta file operations + for (std::set::const_iterator i = cs.rearrangement.deleted_files.begin(); + i != cs.rearrangement.deleted_files.end(); ++i) + { + man.erase(*i); + } + for (change_set::delta_map::const_iterator i = cs.deltas.begin(); + i != cs.deltas.end(); ++i) + { + if (!null_id(delta_entry_dst(i))) + man[delta_entry_path(i)] = delta_entry_dst(i); + } + } + else + { + // fall back to the slow way + manifest_map tmp; + apply_change_set(man, cs, tmp); + man = tmp; + } +} + + +// i/o stuff + +namespace +{ + namespace syms + { + std::string const patch("patch"); + std::string const from("from"); + std::string const to("to"); + std::string const add_file("add_file"); + std::string const delete_file("delete_file"); + std::string const delete_dir("delete_dir"); + std::string const rename_file("rename_file"); + std::string const rename_dir("rename_dir"); + } +} + +static void +parse_path_rearrangement(basic_io::parser & parser, + change_set & cs) +{ + while (parser.symp()) + { + std::string t1, t2; + if (parser.symp(syms::add_file)) + { + parser.sym(); + parser.str(t1); + cs.add_file(file_path(t1)); + } + else if (parser.symp(syms::delete_file)) + { + parser.sym(); + parser.str(t1); + cs.delete_file(file_path(t1)); + } + else if (parser.symp(syms::delete_dir)) + { + parser.sym(); + parser.str(t1); + cs.delete_dir(file_path(t1)); + } + else if (parser.symp(syms::rename_file)) + { + parser.sym(); + parser.str(t1); + parser.esym(syms::to); + parser.str(t2); + cs.rename_file(file_path(t1), + file_path(t2)); + } + else if (parser.symp(syms::rename_dir)) + { + parser.sym(); + parser.str(t1); + parser.esym(syms::to); + parser.str(t2); + cs.rename_dir(file_path(t1), + file_path(t2)); + } + else + break; + } + cs.rearrangement.check_sane(); +} + + +void +print_path_rearrangement(basic_io::printer & printer, + change_set::path_rearrangement const & pr) +{ + + pr.check_sane(); + for (std::set::const_iterator i = pr.deleted_files.begin(); + i != pr.deleted_files.end(); ++i) + { + basic_io::stanza st; + st.push_str_pair(syms::delete_file, (*i)()); + printer.print_stanza(st); + } + + for (std::set::const_iterator i = pr.deleted_dirs.begin(); + i != pr.deleted_dirs.end(); ++i) + { + basic_io::stanza st; + st.push_str_pair(syms::delete_dir, (*i)()); + printer.print_stanza(st); + } + + for (std::map::const_iterator i = pr.renamed_files.begin(); + i != pr.renamed_files.end(); ++i) + { + basic_io::stanza st; + st.push_str_pair(syms::rename_file, i->first()); + st.push_str_pair(syms::to, i->second()); + printer.print_stanza(st); + } + + for (std::map::const_iterator i = pr.renamed_dirs.begin(); + i != pr.renamed_dirs.end(); ++i) + { + basic_io::stanza st; + st.push_str_pair(syms::rename_dir, i->first()); + st.push_str_pair(syms::to, i->second()); + printer.print_stanza(st); + } + + for (std::set::const_iterator i = pr.added_files.begin(); + i != pr.added_files.end(); ++i) + { + basic_io::stanza st; + st.push_str_pair(syms::add_file, (*i)()); + printer.print_stanza(st); + } +} + +void +parse_change_set(basic_io::parser & parser, + change_set & cs) +{ + clear_change_set(cs); + + parse_path_rearrangement(parser, cs); + + while (parser.symp(syms::patch)) + { + std::string path, src, dst; + parser.sym(); + parser.str(path); + parser.esym(syms::from); + parser.hex(src); + parser.esym(syms::to); + parser.hex(dst); + cs.deltas.insert(std::make_pair(file_path(path), + std::make_pair(file_id(src), + file_id(dst)))); + } + cs.check_sane(); +} + +void +print_change_set(basic_io::printer & printer, + change_set const & cs) +{ + cs.check_sane(); + print_path_rearrangement(printer, cs.rearrangement); + + for (change_set::delta_map::const_iterator i = cs.deltas.begin(); + i != cs.deltas.end(); ++i) + { + basic_io::stanza st; + st.push_str_pair(syms::patch, i->first()); + st.push_hex_pair(syms::from, i->second.first.inner()()); + st.push_hex_pair(syms::to, i->second.second.inner()()); + printer.print_stanza(st); + } +} + +void +read_path_rearrangement(data const & dat, + change_set::path_rearrangement & re) +{ + std::istringstream iss(dat()); + basic_io::input_source src(iss, "path_rearrangement"); + basic_io::tokenizer tok(src); + basic_io::parser pars(tok); + change_set cs; + parse_path_rearrangement(pars, cs); + re = cs.rearrangement; + I(src.lookahead == EOF); + re.check_sane(); +} + +void +read_change_set(data const & dat, + change_set & cs) +{ + std::istringstream iss(dat()); + basic_io::input_source src(iss, "change_set"); + basic_io::tokenizer tok(src); + basic_io::parser pars(tok); + parse_change_set(pars, cs); + I(src.lookahead == EOF); + cs.check_sane(); +} + +void +write_change_set(change_set const & cs, + data & dat) +{ + cs.check_sane(); + std::ostringstream oss; + basic_io::printer pr(oss); + print_change_set(pr, cs); + dat = data(oss.str()); +} + +void +write_path_rearrangement(change_set::path_rearrangement const & re, + data & dat) +{ + re.check_sane(); + std::ostringstream oss; + basic_io::printer pr(oss); + print_path_rearrangement(pr, re); + dat = data(oss.str()); +} + +#ifdef BUILD_UNIT_TESTS +#include "unit_tests.hh" +#include "sanity.hh" + +static void dump_change_set(std::string const & ctx, + change_set const & cs) +{ + data tmp; + write_change_set(cs, tmp); + L(F("[begin changeset %s]\n") % ctx); + L(F("%s") % tmp); + L(F("[end changeset %s]\n") % ctx); +} + +static void +spin_change_set(change_set const & cs) +{ + data tmp1; + change_set cs1; + write_change_set(cs, tmp1); + dump_change_set("normalized", cs); + read_change_set(tmp1, cs1); + for (int i = 0; i < 5; ++i) + { + data tmp2; + change_set cs2; + write_change_set(cs1, tmp2); + BOOST_CHECK(tmp1 == tmp2); + read_change_set(tmp2, cs2); + BOOST_CHECK(cs1.rearrangement == cs2.rearrangement); + BOOST_CHECK(cs1.deltas == cs2.deltas); + cs1 = cs2; + } +} + +static void +disjoint_merge_test(std::string const & ab_str, + std::string const & ac_str) +{ + change_set ab, ac, bm, cm; + + app_state app; + + L(F("beginning disjoint_merge_test\n")); + + read_change_set(data(ab_str), ab); + read_change_set(data(ac_str), ac); + + manifest_map dummy; + + merge_provider merger(app, dummy, dummy, dummy); + merge_change_sets(ab, ac, bm, cm, merger, app); + + dump_change_set("ab", ab); + dump_change_set("ac", ac); + dump_change_set("bm", bm); + dump_change_set("cm", cm); + + BOOST_CHECK(bm.rearrangement == ac.rearrangement); + BOOST_CHECK(cm.rearrangement == ab.rearrangement); + + L(F("finished disjoint_merge_test\n")); +} + +static void +disjoint_merge_tests() +{ + disjoint_merge_test + ("rename_file \"foo\"\n" + " to \"bar\"\n", + + "rename_file \"apple\"\n" + " to \"orange\"\n"); + + disjoint_merge_test + ("rename_file \"foo/a.txt\"\n" + " to \"bar/b.txt\"\n", + + "rename_file \"bar/c.txt\"\n" + " to \"baz/d.txt\"\n"); + + disjoint_merge_test + ("patch \"foo/file.txt\"\n" + " from [c6a4a6196bb4a744207e1a6e90273369b8c2e925]\n" + " to [fe18ec0c55cbc72e4e51c58dc13af515a2f3a892]\n", + + "rename_file \"foo/file.txt\"\n" + " to \"foo/apple.txt\"\n"); + + disjoint_merge_test + ( + "rename_file \"apple.txt\"\n" + " to \"pear.txt\"\n" + "\n" + "patch \"foo.txt\"\n" + " from [c6a4a6196bb4a744207e1a6e90273369b8c2e925]\n" + " to [fe18ec0c55cbc72e4e51c58dc13af515a2f3a892]\n", + + "rename_file \"foo.txt\"\n" + " to \"bar.txt\"\n" + "\n" + "patch \"apple.txt\"\n" + " from [fe18ec0c55cbc72e4e51c58dc13af515a2f3a892]\n" + " to [435e816c30263c9184f94e7c4d5aec78ea7c028a]\n"); +} + +static void +basic_change_set_test() +{ + try + { + + change_set cs; + cs.delete_file(file_path("usr/lib/zombie")); + cs.add_file(file_path("usr/bin/cat"), + file_id(hexenc("adc83b19e793491b1c6ea0fd8b46cd9f32e592fc"))); + cs.add_file(file_path("usr/local/bin/dog"), + file_id(hexenc("adc83b19e793491b1c6ea0fd8b46cd9f32e592fc"))); + cs.rename_file(file_path("usr/local/bin/dog"), file_path("usr/bin/dog")); + cs.rename_file(file_path("usr/bin/cat"), file_path("usr/local/bin/chicken")); + cs.add_file(file_path("usr/lib/libc.so"), + file_id(hexenc("435e816c30263c9184f94e7c4d5aec78ea7c028a"))); + cs.rename_dir(file_path("usr/lib"), file_path("usr/local/lib")); + cs.apply_delta(file_path("usr/local/bin/chicken"), + file_id(hexenc("c6a4a6196bb4a744207e1a6e90273369b8c2e925")), + file_id(hexenc("fe18ec0c55cbc72e4e51c58dc13af515a2f3a892"))); + spin_change_set(cs); + } + catch (informative_failure & exn) + { + L(F("informative failure: %s\n") % exn.what); + } + catch (std::runtime_error & exn) + { + L(F("runtime error: %s\n") % exn.what()); + } +} + +static void +neutralize_change_test() +{ + try + { + + change_set cs1, cs2, csa; + cs1.add_file(file_path("usr/lib/zombie"), + file_id(hexenc("adc83b19e793491b1c6ea0fd8b46cd9f32e592fc"))); + cs1.rename_file(file_path("usr/lib/apple"), + file_path("usr/lib/orange")); + cs1.rename_dir(file_path("usr/lib/moose"), + file_path("usr/lib/squirrel")); + + dump_change_set("neutralize target", cs1); + + cs2.delete_file(file_path("usr/lib/zombie")); + cs2.rename_file(file_path("usr/lib/orange"), + file_path("usr/lib/apple")); + cs2.rename_dir(file_path("usr/lib/squirrel"), + file_path("usr/lib/moose")); + + dump_change_set("neutralizer", cs2); + + concatenate_change_sets(cs1, cs2, csa); + + dump_change_set("neutralized", csa); + + tid_source ts; + path_analysis analysis; + analyze_rearrangement(csa.rearrangement, analysis, ts); + + BOOST_CHECK(analysis.first.empty()); + BOOST_CHECK(analysis.second.empty()); + } + catch (informative_failure & exn) + { + L(F("informative failure: %s\n") % exn.what); + } + catch (std::runtime_error & exn) + { + L(F("runtime error: %s\n") % exn.what()); + } +} + +static void +non_interfering_change_test() +{ + try + { + + change_set cs1, cs2, csa; + cs1.delete_file(file_path("usr/lib/zombie")); + cs1.rename_file(file_path("usr/lib/orange"), + file_path("usr/lib/apple")); + cs1.rename_dir(file_path("usr/lib/squirrel"), + file_path("usr/lib/moose")); + + dump_change_set("non-interference A", cs1); + + cs2.add_file(file_path("usr/lib/zombie"), + file_id(hexenc("adc83b19e793491b1c6ea0fd8b46cd9f32e592fc"))); + cs2.rename_file(file_path("usr/lib/pear"), + file_path("usr/lib/orange")); + cs2.rename_dir(file_path("usr/lib/spy"), + file_path("usr/lib/squirrel")); + + dump_change_set("non-interference B", cs2); + + concatenate_change_sets(cs1, cs2, csa); + + dump_change_set("non-interference combined", csa); + + tid_source ts; + path_analysis analysis; + analyze_rearrangement(csa.rearrangement, analysis, ts); + + BOOST_CHECK(analysis.first.size() == 8); + BOOST_CHECK(analysis.second.size() == 8); + } + catch (informative_failure & exn) + { + L(F("informative failure: %s\n") % exn.what); + } + catch (std::runtime_error & exn) + { + L(F("runtime error: %s\n") % exn.what()); + } +} + +static const file_id fid_null; +static const file_id fid1 = file_id(hexenc("aaaa3831e5eb74e6cd50b94f9e99e6a14d98d702")); +static const file_id fid2 = file_id(hexenc("bbbb3831e5eb74e6cd50b94f9e99e6a14d98d702")); +static const file_id fid3 = file_id(hexenc("cccc3831e5eb74e6cd50b94f9e99e6a14d98d702")); + +typedef enum { in_a, in_b } which_t; +struct bad_concatenate_change_test +{ + change_set a; + change_set b; + change_set combined; + change_set concat; + bool do_combine; + std::string ident; + bad_concatenate_change_test(char const *file, int line) : + do_combine(false), + ident((F("%s:%d") % file % line).str()) + { + L(F("BEGINNING concatenation test %s\n") % ident); + } + + ~bad_concatenate_change_test() + { + L(F("FINISHING concatenation test %s\n") % ident); + } + + change_set & getit(which_t which) + { + if (which == in_a) + return a; + return b; + } + // Call combine() if you want to make sure that the things that are bad when + // concatenated are also bad when all stuck together into a single + // changeset. + void combine() { do_combine = true; } + void add_file(which_t which, std::string const & path, file_id fid = fid1) + { + getit(which).add_file(file_path(path), fid); + if (do_combine) + combined.add_file(file_path(path), fid); + } + void apply_delta(which_t which, std::string const & path, + file_id from_fid, + file_id to_fid) + { + getit(which).apply_delta(file_path(path), from_fid, to_fid); + if (do_combine) + combined.apply_delta(file_path(path), from_fid, to_fid); + } + void delete_file(which_t which, std::string const & path) + { + getit(which).delete_file(file_path(path)); + if (do_combine) + combined.delete_file(file_path(path)); + } + void delete_dir(which_t which, std::string const & path) + { + getit(which).delete_dir(file_path(path)); + if (do_combine) + combined.delete_dir(file_path(path)); + } + void rename_file(which_t which, + std::string const & path1, std::string const & path2) + { + getit(which).rename_file(file_path(path1), file_path(path2)); + if (do_combine) + combined.rename_file(file_path(path1), file_path(path2)); + } + void rename_dir(which_t which, + std::string const & path1, std::string const & path2) + { + getit(which).rename_dir(file_path(path1), file_path(path2)); + if (do_combine) + combined.rename_dir(file_path(path1), file_path(path2)); + } + void run() + { + L(F("RUNNING bad_concatenate_change_test %s\n") % ident); + try + { + dump_change_set("a", a); + dump_change_set("b", b); + } + catch (std::logic_error e) + { + L(F("skipping change_set printing, one or both are not sane\n")); + } + BOOST_CHECK_THROW(concatenate_change_sets(a, b, concat), + std::logic_error); + try { dump_change_set("concat", concat); } + catch (std::logic_error e) { L(F("concat change_set is insane\n")); } + if (do_combine) + { + L(F("Checking combined change set\n")); + change_set empty_cs, combined_concat; + BOOST_CHECK_THROW(concatenate_change_sets(combined, + empty_cs, + combined_concat), + std::logic_error); + try { dump_change_set("combined_concat", combined_concat); } + catch (std::logic_error e) { L(F("combined_concat is insane\n")); } + } + } + void run_both() + { + run(); + L(F("RUNNING bad_concatenate_change_test %s again backwards\n") % ident); + BOOST_CHECK_THROW(concatenate_change_sets(a, b, concat), + std::logic_error); + } +}; + +// We also do a number of just "bad change set" tests here, leaving one of +// them empty; this is because our main line of defense against bad +// change_sets, check_sane_history, does its checking by doing +// concatenations, so it's doing concatenations that we want to be sure does +// sanity checking. +static void +bad_concatenate_change_tests() +{ + // Files/directories can't be dropped on top of each other: + BOOST_CHECKPOINT("on top"); + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.add_file(in_a, "target"); + t.add_file(in_b, "target"); + t.run(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.combine(); + t.rename_file(in_a, "foo", "target"); + t.rename_file(in_b, "bar", "target"); + t.run(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.combine(); + t.rename_dir(in_a, "foo", "target"); + t.rename_dir(in_b, "bar", "target"); + t.run(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.combine(); + t.rename_file(in_a, "foo", "target"); + t.rename_dir(in_b, "bar", "target"); + t.run_both(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.combine(); + t.add_file(in_a, "target"); + t.rename_file(in_b, "foo", "target"); + t.run_both(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.combine(); + t.add_file(in_a, "target"); + t.rename_dir(in_b, "foo", "target"); + t.run_both(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.combine(); + t.add_file(in_a, "target/subfile"); + t.add_file(in_b, "target"); + t.run_both(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.combine(); + t.add_file(in_a, "target/subfile"); + t.rename_file(in_b, "foo", "target"); + t.run_both(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.add_file(in_a, "target/subfile"); + t.rename_dir(in_b, "foo", "target"); + t.run_both(); + } + // You can only delete something once + BOOST_CHECKPOINT("delete once"); + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.delete_file(in_a, "target"); + t.delete_file(in_b, "target"); + t.run(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.combine(); + t.delete_file(in_a, "target"); + t.delete_dir(in_b, "target"); + t.run_both(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.delete_dir(in_a, "target"); + t.delete_dir(in_b, "target"); + t.run(); + } + // You can't delete something that's not there anymore + BOOST_CHECKPOINT("delete after rename"); + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.combine(); + t.delete_file(in_a, "target"); + t.rename_file(in_b, "target", "foo"); + t.run_both(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.combine(); + t.delete_dir(in_a, "target"); + t.rename_file(in_b, "target", "foo"); + t.run_both(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.combine(); + t.delete_dir(in_a, "target"); + t.rename_dir(in_b, "target", "foo"); + t.run_both(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.combine(); + t.delete_file(in_a, "target"); + t.rename_dir(in_b, "target", "foo"); + t.run_both(); + } + // Files/directories can't be split in two + BOOST_CHECKPOINT("splitting files/dirs"); + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.rename_file(in_a, "target", "foo"); + t.rename_file(in_b, "target", "bar"); + t.run(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.rename_dir(in_a, "target", "foo"); + t.rename_dir(in_b, "target", "bar"); + t.run(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.combine(); + t.rename_dir(in_a, "target", "foo"); + t.rename_file(in_b, "target", "bar"); + t.run_both(); + } + // Files and directories are different + BOOST_CHECKPOINT("files != dirs"); + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.add_file(in_a, "target"); + t.delete_dir(in_b, "target"); + t.run(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.add_file(in_a, "target/subfile"); + t.delete_file(in_b, "target"); + t.run(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.add_file(in_a, "target/subfile"); + t.rename_file(in_b, "target", "foo"); + t.run(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.rename_file(in_a, "foo", "target"); + t.delete_dir(in_b, "target"); + t.run(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.combine(); + t.apply_delta(in_a, "target", fid1, fid2); + t.delete_dir(in_b, "target"); + t.run_both(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.rename_dir(in_a, "foo", "target"); + t.delete_file(in_b, "target"); + t.run(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.combine(); + t.rename_dir(in_a, "foo", "target"); + t.apply_delta(in_b, "target", fid1, fid2); + t.run_both(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.combine(); + t.apply_delta(in_a, "target", fid1, fid2); + t.rename_dir(in_b, "target", "bar"); + t.run_both(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.rename_file(in_a, "foo", "target"); + t.rename_dir(in_b, "target", "bar"); + t.run(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.rename_dir(in_a, "foo", "target"); + t.rename_file(in_b, "target", "bar"); + t.run(); + } + // Directories can't be patched, and patches can't be directoried... + BOOST_CHECKPOINT("can't patch dirs or vice versa"); + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.combine(); + t.add_file(in_a, "target/subfile"); + t.apply_delta(in_b, "target", fid_null, fid1); + t.run_both(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.combine(); + t.add_file(in_a, "target/subfile"); + t.apply_delta(in_b, "target", fid1, fid2); + t.run_both(); + } + // Deltas must be consistent + BOOST_CHECKPOINT("consistent deltas"); + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.apply_delta(in_a, "target", fid1, fid2); + t.apply_delta(in_b, "target", fid3, fid1); + t.run(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.add_file(in_a, "target", fid1); + t.apply_delta(in_b, "target", fid2, fid3); + t.run(); + } + // Can't have a null source id if it's not an add + BOOST_CHECKPOINT("null id on non-add"); + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.apply_delta(in_a, "target", fid_null, fid1); + t.run(); + } + // Can't have drop + delta with no add + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.combine(); + t.delete_file(in_a, "target"); + t.apply_delta(in_b, "target", fid1, fid2); + t.run(); + } + // Can't have a null destination id, ever, with or without a delete_file + BOOST_CHECKPOINT("no null destinations"); + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.delete_file(in_a, "target"); + t.apply_delta(in_a, "target", fid1, fid_null); + t.run(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.apply_delta(in_a, "target", fid1, fid_null); + t.run(); + } + // Can't have a patch with src == dst + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.apply_delta(in_a, "target", fid1, fid1); + t.run(); + } +} + +// FIXME: Things that should be added, but can't be trivially because they +// assert too early: +// anything repeated -- multiple adds, multiple deletes, multiple deltas +// including in one changeset, for both files and dirs +// (probably should put these in strings, and do BOOST_CHECK_THROWS in the +// parser?) + +// FIXME: also need tests for the invariants in apply_manifest (and any +// invariants that should be there but aren't, of course) + +void +add_change_set_tests(test_suite * suite) +{ + I(suite); + suite->add(BOOST_TEST_CASE(&basic_change_set_test)); + suite->add(BOOST_TEST_CASE(&neutralize_change_test)); + suite->add(BOOST_TEST_CASE(&non_interfering_change_test)); + suite->add(BOOST_TEST_CASE(&disjoint_merge_tests)); + suite->add(BOOST_TEST_CASE(&bad_concatenate_change_tests)); +} + + +#endif // BUILD_UNIT_TESTS ============================================================ --- tests/(minor)_test_a_merge_3/merge.diff3 adc1ca256e9313dd387448ffcd5cf7572eb58d8e +++ tests/(minor)_test_a_merge_3/merge.diff3 adc1ca256e9313dd387448ffcd5cf7572eb58d8e @@ -0,0 +1,3563 @@ +// copyright (C) 2004 graydon hoare +// all rights reserved. +// licensed to the public under the terms of the GNU GPL (>= 2) +// see the file COPYING for details + +// this is how you "ask for" the C99 constant constructor macros. *and* +// you have to do so before any other files accidentally include +// stdint.h. awesome. +#define __STDC_CONSTANT_MACROS + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "basic_io.hh" +#include "change_set.hh" +#include "constants.hh" +#include "diff_patch.hh" +#include "file_io.hh" +#include "interner.hh" +#include "numeric_vocab.hh" +#include "sanity.hh" +#include "smap.hh" + +// our analyses in this file happen on one of two families of +// related structures: a path_analysis or a directory_map. +// +// a path_analysis corresponds exactly to a normalized +// path_rearrangement; they are two ways of writing the +// same information +// +// the path_analysis stores two path_states. each path_state is a map from +// transient identifiers (tids) to items. each item represents a semantic +// element of a filesystem which has a type (file or directory), a name, +// and a parent link (another tid). tids should be unique across a +// path_analysis. + +typedef enum { ptype_directory, ptype_file } ptype; +typedef u32 tid; +static tid root_tid = 0; + +struct +tid_source +{ + tid ctr; + tid_source() : ctr(root_tid + 1) {} + tid next() { I(ctr != UINT32_C(0xffffffff)); return ctr++; } +}; + +typedef u32 path_component; + +struct +path_component_maker +{ + path_component make(std::string const & s) + { + bool is_new; + path_component pc = intern.intern(s, is_new); + // sanity check new ones + if (is_new) + { + // must be a valid file_path + file_path tmp_file_path = file_path(s); + // must contain exactly 0 or 1 components + fs::path tmp_fs_path = mkpath(s); + I(null_name(s) || ++(tmp_fs_path.begin()) == tmp_fs_path.end()); + } + return pc; + } + std::string lookup(path_component pc) const + { + return intern.lookup(pc); + } +private: + interner intern; +}; + +static path_component_maker the_path_component_maker; + +static path_component +make_null_component() +{ + static path_component null_pc = the_path_component_maker.make(""); + return null_pc; +} + +static bool +null_name(path_component pc) +{ + return pc == make_null_component(); +} + +struct +path_item +{ + tid parent; + ptype ty; + path_component name; + path_item() {} + path_item(tid p, ptype t, path_component n); + path_item(path_item const & other); + path_item const & operator=(path_item const & other); + bool operator==(path_item const & other) const; +}; + + +template struct identity +{ + size_t operator()(T const & v) const + { + return static_cast(v); + } +}; + +typedef smap path_state; +typedef smap state_renumbering; +typedef std::pair path_analysis; + +// nulls and tests + +static file_id null_ident; + +// a directory_map is a more "normal" representation of a directory tree, +// which you can traverse more conveniently from root to tip +// +// tid -> [ name -> (ptype, tid), +// name -> (ptype, tid), +// ... ] +// +// tid -> [ name -> (ptype, tid), +// name -> (ptype, tid), +// ... ] + +typedef smap< path_component, std::pair > directory_node; +typedef smap > directory_map; + +static path_component +directory_entry_name(directory_node::const_iterator const & i) +{ + return i->first; +} + +static ptype +directory_entry_type(directory_node::const_iterator const & i) +{ + return i->second.first; +} + +static tid +directory_entry_tid(directory_node::const_iterator const & i) +{ + return i->second.second; +} + +void +change_set::add_file(file_path const & a) +{ + I(rearrangement.added_files.find(a) == rearrangement.added_files.end()); + rearrangement.added_files.insert(a); +} + +void +change_set::add_file(file_path const & a, file_id const & ident) +{ + I(rearrangement.added_files.find(a) == rearrangement.added_files.end()); + I(deltas.find(a) == deltas.end()); + rearrangement.added_files.insert(a); + deltas.insert(std::make_pair(a, std::make_pair(null_ident, ident))); +} + +void +change_set::apply_delta(file_path const & path, + file_id const & src, + file_id const & dst) +{ + I(deltas.find(path) == deltas.end()); + deltas.insert(std::make_pair(path, std::make_pair(src, dst))); +} + +void +change_set::delete_file(file_path const & d) +{ + I(rearrangement.deleted_files.find(d) == rearrangement.deleted_files.end()); + rearrangement.deleted_files.insert(d); +} + +void +change_set::delete_dir(file_path const & d) +{ + I(rearrangement.deleted_dirs.find(d) == rearrangement.deleted_dirs.end()); + rearrangement.deleted_dirs.insert(d); +} + +void +change_set::rename_file(file_path const & a, file_path const & b) +{ + I(rearrangement.renamed_files.find(a) == rearrangement.renamed_files.end()); + rearrangement.renamed_files.insert(std::make_pair(a,b)); +} + +void +change_set::rename_dir(file_path const & a, file_path const & b) +{ + I(rearrangement.renamed_dirs.find(a) == rearrangement.renamed_dirs.end()); + rearrangement.renamed_dirs.insert(std::make_pair(a,b)); +} + + +bool +change_set::path_rearrangement::operator==(path_rearrangement const & other) const +{ + return deleted_files == other.deleted_files && + deleted_dirs == other.deleted_dirs && + renamed_files == other.renamed_files && + renamed_dirs == other.renamed_dirs && + added_files == other.added_files; +} + +bool +change_set::path_rearrangement::empty() const +{ + return deleted_files.empty() && + deleted_dirs.empty() && + renamed_files.empty() && + renamed_dirs.empty() && + added_files.empty(); +} + +bool +change_set::path_rearrangement::has_added_file(file_path const & file) const +{ + return added_files.find(file) != added_files.end(); +} + +bool +change_set::path_rearrangement::has_deleted_file(file_path const & file) const +{ + return deleted_files.find(file) != deleted_files.end(); +} + +bool +change_set::path_rearrangement::has_renamed_file_dst(file_path const & file) const +{ + // FIXME: this is inefficient, but improvements would require a different + // structure for renamed_files (or perhaps a second reverse map). For now + // we'll assume that few files will be renamed per changeset. + for (std::map::const_iterator rf = renamed_files.begin(); + rf != renamed_files.end(); ++rf) + if (rf->second == file) + return true; + return false; +} + +bool +change_set::path_rearrangement::has_renamed_file_src(file_path const & file) const +{ + return renamed_files.find(file) != renamed_files.end(); +} + +bool +change_set::empty() const +{ + return deltas.empty() && rearrangement.empty(); +} + +bool +change_set::operator==(change_set const & other) const +{ + return rearrangement == other.rearrangement && + deltas == other.deltas; +} + + +// simple accessors + +inline tid const & +path_item_parent(path_item const & p) +{ + return p.parent; +} + +inline ptype const & +path_item_type(path_item const & p) +{ + return p.ty; +} + +inline path_component +path_item_name(path_item const & p) +{ + return p.name; +} + +inline tid +path_state_tid(path_state::const_iterator i) +{ + return i->first; +} + +inline path_item const & +path_state_item(path_state::const_iterator i) +{ + return i->second; +} + + + +// structure dumping +/* + +static void +dump_renumbering(std::string const & s, + state_renumbering const & r) +{ + L(F("BEGIN dumping renumbering '%s'\n") % s); + for (state_renumbering::const_iterator i = r.begin(); + i != r.end(); ++i) + { + L(F("%d -> %d\n") % i->first % i->second); + } + L(F("END dumping renumbering '%s'\n") % s); +} + +static void +dump_state(std::string const & s, + path_state const & st) +{ + L(F("BEGIN dumping state '%s'\n") % s); + for (path_state::const_iterator i = st.begin(); + i != st.end(); ++i) + { + L(F("state '%s': tid %d, parent %d, type %s, name %s\n") + % s + % path_state_tid(i) + % path_item_parent(path_state_item(i)) + % (path_item_type(path_state_item(i)) == ptype_directory ? "dir" : "file") + % the_path_component_maker.lookup(path_item_name(path_state_item(i)))); + } + L(F("END dumping state '%s'\n") % s); +} + +static void +dump_analysis(std::string const & s, + path_analysis const & t) +{ + L(F("BEGIN dumping tree '%s'\n") % s); + dump_state(s + " first", t.first); + dump_state(s + " second", t.second); + L(F("END dumping tree '%s'\n") % s); +} + +*/ + + +// sanity checking + +static void +check_sets_disjoint(std::set const & a, + std::set const & b) +{ + std::set isect; + std::set_intersection(a.begin(), a.end(), + b.begin(), b.end(), + std::inserter(isect, isect.begin())); + if (!global_sanity.relaxed) + { + I(isect.empty()); + } +} + +change_set::path_rearrangement::path_rearrangement(path_rearrangement const & other) +{ + other.check_sane(); + deleted_files = other.deleted_files; + deleted_dirs = other.deleted_dirs; + renamed_files = other.renamed_files; + renamed_dirs = other.renamed_dirs; + added_files = other.added_files; +} + +change_set::path_rearrangement const & +change_set::path_rearrangement::operator=(path_rearrangement const & other) +{ + other.check_sane(); + deleted_files = other.deleted_files; + deleted_dirs = other.deleted_dirs; + renamed_files = other.renamed_files; + renamed_dirs = other.renamed_dirs; + added_files = other.added_files; + return *this; +} + +static void +extract_pairs_and_insert(std::map const & in, + std::set & firsts, + std::set & seconds) +{ + for (std::map::const_iterator i = in.begin(); + i != in.end(); ++i) + { + firsts.insert(i->first); + seconds.insert(i->second); + } +} + +template +static void +extract_first(std::map const & m, std::set & s) +{ + s.clear(); + for (typename std::map::const_iterator i = m.begin(); + i != m.end(); ++i) + { + s.insert(i->first); + } +} + +static void +extract_killed(path_analysis const & a, + std::set & killed); + + +static void +check_no_deltas_on_killed_files(path_analysis const & pa, + change_set::delta_map const & del) +{ + std::set killed; + std::set delta_paths; + + extract_killed(pa, killed); + extract_first(del, delta_paths); + check_sets_disjoint(killed, delta_paths); +} + +static void +check_delta_entries_not_directories(path_analysis const & pa, + change_set::delta_map const & dels); + +void +analyze_rearrangement(change_set::path_rearrangement const & pr, + path_analysis & pa, + tid_source & ts); + +void +sanity_check_path_analysis(path_analysis const & pr); + +void +change_set::path_rearrangement::check_sane() const +{ + delta_map del; + this->check_sane(del); +} + +void +change_set::path_rearrangement::check_sane(delta_map const & deltas) const +{ + tid_source ts; + path_analysis pa; + analyze_rearrangement(*this, pa, ts); + sanity_check_path_analysis (pa); + + check_no_deltas_on_killed_files(pa, deltas); + check_delta_entries_not_directories(pa, deltas); + + // FIXME: extend this as you manage to think of more invariants + // which are cheap enough to check at this level. + std::set renamed_srcs, renamed_dsts; + extract_pairs_and_insert(renamed_files, renamed_srcs, renamed_dsts); + extract_pairs_and_insert(renamed_dirs, renamed_srcs, renamed_dsts); + + // Files cannot be split nor joined by renames. + I(renamed_files.size() + renamed_dirs.size() == renamed_srcs.size()); + I(renamed_files.size() + renamed_dirs.size() == renamed_dsts.size()); + + check_sets_disjoint(deleted_files, deleted_dirs); + check_sets_disjoint(deleted_files, renamed_srcs); + check_sets_disjoint(deleted_dirs, renamed_srcs); + + check_sets_disjoint(added_files, renamed_dsts); +} + +change_set::change_set(change_set const & other) +{ + other.check_sane(); + rearrangement = other.rearrangement; + deltas = other.deltas; +} + +change_set const &change_set::operator=(change_set const & other) +{ + other.check_sane(); + rearrangement = other.rearrangement; + deltas = other.deltas; + return *this; +} + +void +change_set::check_sane() const +{ + // FIXME: extend this as you manage to think of more invariants + // which are cheap enough to check at this level. + + rearrangement.check_sane(this->deltas); + + for (std::set::const_iterator i = rearrangement.added_files.begin(); + i != rearrangement.added_files.end(); ++i) + { + delta_map::const_iterator j = deltas.find(*i); + if (!global_sanity.relaxed) + { + I(j != deltas.end()); + I(null_id(delta_entry_src(j))); + I(!null_id(delta_entry_dst(j))); + } + } + + for (delta_map::const_iterator i = deltas.begin(); + i != deltas.end(); ++i) + { + if (!global_sanity.relaxed) + { + I(!null_name(delta_entry_path(i))); + I(!null_id(delta_entry_dst(i))); + I(!(delta_entry_src(i) == delta_entry_dst(i))); + if (null_id(delta_entry_src(i))) + I(rearrangement.added_files.find(delta_entry_path(i)) + != rearrangement.added_files.end()); + } + } + +} + +static void +sanity_check_path_item(path_item const & pi) +{ +} + +static void +confirm_proper_tree(path_state const & ps) +{ + smap confirmed; + I(ps.find(root_tid) == ps.end()); + for (path_state::const_iterator i = ps.begin(); i != ps.end(); ++i) + { + tid curr = i->first; + path_item item = i->second; + smap ancs; + + while (confirmed.find(curr) == confirmed.end()) + { + sanity_check_path_item(item); + I(ancs.find(curr) == ancs.end()); + ancs.insert(std::make_pair(curr,true)); + if (path_item_parent(item) == root_tid) + break; + else + { + curr = path_item_parent(item); + path_state::const_iterator j = ps.find(curr); + I(j != ps.end()); + + // if we're null, our parent must also be null + if (null_name(item.name)) + I(null_name(path_state_item(j).name)); + + item = path_state_item(j); + I(path_item_type(item) == ptype_directory); + } + } + std::copy(ancs.begin(), ancs.end(), + inserter(confirmed, confirmed.begin())); + } + I(confirmed.find(root_tid) == confirmed.end()); +} + +static void +confirm_unique_entries_in_directories(path_state const & ps) +{ + smap, bool> entries; + for (path_state::const_iterator i = ps.begin(); i != ps.end(); ++i) + { + if (null_name(path_item_name(i->second))) + { + I(path_item_parent(i->second) == root_tid); + continue; + } + + std::pair p = std::make_pair(path_item_parent(i->second), + path_item_name(i->second)); + I(entries.find(p) == entries.end()); + entries.insert(std::make_pair(p,true)); + } +} + +static void +sanity_check_path_state(path_state const & ps) +{ + confirm_proper_tree(ps); + confirm_unique_entries_in_directories(ps); +} + +path_item::path_item(tid p, ptype t, path_component n) + : parent(p), ty(t), name(n) +{ + sanity_check_path_item(*this); +} + +path_item::path_item(path_item const & other) + : parent(other.parent), ty(other.ty), name(other.name) +{ + sanity_check_path_item(*this); +} + +path_item const & path_item::operator=(path_item const & other) +{ + parent = other.parent; + ty = other.ty; + name = other.name; + sanity_check_path_item(*this); + return *this; +} + +bool path_item::operator==(path_item const & other) const +{ + return this->parent == other.parent && + this->ty == other.ty && + this->name == other.name; +} + + +static void +check_states_agree(path_state const & p1, + path_state const & p2) +{ + path_analysis pa; + pa.first = p1; + pa.second = p2; + // dump_analysis("agreement", pa); + for (path_state::const_iterator i = p1.begin(); i != p1.end(); ++i) + { + path_state::const_iterator j = p2.find(i->first); + I(j != p2.end()); + I(path_item_type(i->second) == path_item_type(j->second)); + // I(! (null_name(path_item_name(i->second)) + // && + // null_name(path_item_name(j->second)))); + } +} + +void +sanity_check_path_analysis(path_analysis const & pr) +{ + sanity_check_path_state(pr.first); + sanity_check_path_state(pr.second); + check_states_agree(pr.first, pr.second); + check_states_agree(pr.second, pr.first); +} + + +// construction helpers + +static boost::shared_ptr +new_dnode() +{ + return boost::shared_ptr(new directory_node()); +} + +static boost::shared_ptr +dnode(directory_map & dir, tid t) +{ + boost::shared_ptr node; + directory_map::const_iterator dirent = dir.find(t); + if (dirent == dir.end()) + { + node = new_dnode(); + dir.insert(std::make_pair(t, node)); + } + else + node = dirent->second; + return node; +} + + +// This function takes a vector of path components and joins them into a +// single file_path. Valid input may be a single-element vector whose sole +// element is the empty path component (""); this represents the null path, +// which we use to represent non-existent files. Alternatively, input may be +// a multi-element vector, in which case all elements of the vector are +// required to be non-null. The following are valid inputs (with strings +// replaced by their interned version, of course): +// - [""] +// - ["foo"] +// - ["foo", "bar"] +// The following are not: +// - [] +// - ["foo", ""] +// - ["", "bar"] +static void +compose_path(std::vector const & names, + file_path & path) +{ + try + { + std::vector::const_iterator i = names.begin(); + I(i != names.end()); + fs::path p = mkpath(the_path_component_maker.lookup(*i)); + ++i; + if (names.size() > 1) + I(!null_name(*i)); + for ( ; i != names.end(); ++i) + { + I(!null_name(*i)); + p /= mkpath(the_path_component_maker.lookup(*i)); + } + path = file_path(p.string()); + } + catch (std::runtime_error &e) + { + throw informative_failure(e.what()); + } +} + +static void +get_full_path(path_state const & state, + tid t, + std::vector & pth) +{ + std::vector tmp; + while(t != root_tid) + { + path_state::const_iterator i = state.find(t); + I(i != state.end()); + tmp.push_back(path_item_name(i->second)); + t = path_item_parent(i->second); + } + pth.clear(); + std::copy(tmp.rbegin(), tmp.rend(), inserter(pth, pth.begin())); +} + +static void +get_full_path(path_state const & state, + tid t, + file_path & pth) +{ + std::vector tmp; + get_full_path(state, t, tmp); + // L(F("got %d-entry path for tid %d\n") % tmp.size() % t); + compose_path(tmp, pth); +} + +static void +clear_rearrangement(change_set::path_rearrangement & pr) +{ + pr.deleted_files.clear(); + pr.deleted_dirs.clear(); + pr.renamed_files.clear(); + pr.renamed_dirs.clear(); + pr.added_files.clear(); +} + +static void +clear_change_set(change_set & cs) +{ + clear_rearrangement(cs.rearrangement); + cs.deltas.clear(); +} + +static void +compose_rearrangement(path_analysis const & pa, + change_set::path_rearrangement & pr) +{ + clear_rearrangement(pr); + + for (path_state::const_iterator i = pa.first.begin(); + i != pa.first.end(); ++i) + { + tid curr(path_state_tid(i)); + std::vector old_name, new_name; + file_path old_path, new_path; + + path_state::const_iterator j = pa.second.find(curr); + I(j != pa.second.end()); + path_item old_item(path_state_item(i)); + path_item new_item(path_state_item(j)); + + // compose names + if (!null_name(path_item_name(old_item))) + { + get_full_path(pa.first, curr, old_name); + compose_path(old_name, old_path); + } + + if (!null_name(path_item_name(new_item))) + { + get_full_path(pa.second, curr, new_name); + compose_path(new_name, new_path); + } + + if (old_path == new_path) + { + L(F("skipping preserved %s %d : '%s'\n") + % (path_item_type(old_item) == ptype_directory ? "directory" : "file") + % curr % old_path); + continue; + } + + L(F("analyzing %s %d : '%s' -> '%s'\n") + % (path_item_type(old_item) == ptype_directory ? "directory" : "file") + % curr % old_path % new_path); + + if (null_name(path_item_name(old_item))) + { + // an addition (which must be a file, not a directory) + I(! null_name(path_item_name(new_item))); + I(path_item_type(new_item) != ptype_directory); + pr.added_files.insert(new_path); + } + else if (null_name(path_item_name(new_item))) + { + // a deletion + I(! null_name(path_item_name(old_item))); + switch (path_item_type(new_item)) + { + case ptype_directory: + pr.deleted_dirs.insert(old_path); + break; + case ptype_file: + pr.deleted_files.insert(old_path); + break; + } + } + else + { + // a generic rename + switch (path_item_type(new_item)) + { + case ptype_directory: + pr.renamed_dirs.insert(std::make_pair(old_path, new_path)); + break; + case ptype_file: + pr.renamed_files.insert(std::make_pair(old_path, new_path)); + break; + } + } + } +} + + + + +// +// this takes a path of the form +// +// "p[0]/p[1]/.../p[n-1]/p[n]" +// +// and fills in a vector of paths corresponding to p[0] ... p[n-1], +// along with a separate "leaf path" for element p[n]. +// + +static void +split_path(file_path const & p, + std::vector & components) +{ + components.clear(); + fs::path tmp = mkpath(p()); + for (fs::path::iterator i = tmp.begin(); i != tmp.end(); ++i) + components.push_back(the_path_component_maker.make(*i)); +} + +static void +split_path(file_path const & p, + std::vector & prefix, + path_component & leaf_path) +{ + split_path(p, prefix); + I(prefix.size() > 0); + leaf_path = prefix.back(); + prefix.pop_back(); +} + +static bool +lookup_path(std::vector const & pth, + directory_map const & dir, + tid & t) +{ + t = root_tid; + for (std::vector::const_iterator i = pth.begin(); + i != pth.end(); ++i) + { + directory_map::const_iterator dirent = dir.find(t); + if (dirent != dir.end()) + { + boost::shared_ptr node = dirent->second; + directory_node::const_iterator entry = node->find(*i); + if (entry == node->end()) + return false; + t = directory_entry_tid(entry); + } + else + return false; + } + return true; +} + +static bool +lookup_path(file_path const & pth, + directory_map const & dir, + tid & t) +{ + std::vector vec; + split_path(pth, vec); + return lookup_path(vec, dir, t); +} + +static tid +ensure_entry(directory_map & dmap, + path_state & state, + tid dir_tid, + ptype entry_ty, + path_component entry, + tid_source & ts) +{ + I(! null_name(entry)); + + if (dir_tid != root_tid) + { + path_state::const_iterator parent = state.find(dir_tid); + I( parent != state.end()); + + // if our parent is null, we immediately become null too, and attach to + // the root node (where all null entries reside) + if (null_name(path_item_name(path_state_item(parent)))) + { + tid new_tid = ts.next(); + state.insert(std::make_pair(new_tid, path_item(root_tid, entry_ty, make_null_component()))); + return new_tid; + } + } + + boost::shared_ptr node = dnode(dmap, dir_tid); + directory_node::const_iterator node_entry = node->find(entry); + + if (node_entry != node->end()) + { + I(node_entry->second.first == entry_ty); + return node_entry->second.second; + } + else + { + tid new_tid = ts.next(); + state.insert(std::make_pair(new_tid, path_item(dir_tid, entry_ty, entry))); + node->insert(std::make_pair(entry, std::make_pair(entry_ty, new_tid))); + return new_tid; + } +} + +static tid +ensure_dir_in_map (std::vector pth, + directory_map & dmap, + path_state & state, + tid_source & ts) +{ + tid dir_tid = root_tid; + for (std::vector::const_iterator p = pth.begin(); + p != pth.end(); ++p) + { + dir_tid = ensure_entry(dmap, state, dir_tid, + ptype_directory, *p, ts); + } + return dir_tid; +} + +static tid +ensure_dir_in_map (file_path const & path, + directory_map & dmap, + path_state & state, + tid_source & ts) +{ + std::vector components; + split_path(path, components); + return ensure_dir_in_map(components, dmap, state, ts); +} + +static tid +ensure_file_in_map (file_path const & path, + directory_map & dmap, + path_state & state, + tid_source & ts) +{ + std::vector prefix; + path_component leaf_path; + split_path(path, prefix, leaf_path); + + I(! null_name(leaf_path)); + tid dir_tid = ensure_dir_in_map(prefix, dmap, state, ts); + return ensure_entry(dmap, state, dir_tid, ptype_file, leaf_path, ts); +} + +static void +ensure_entries_exist (path_state const & self_state, + directory_map & other_dmap, + path_state & other_state, + tid_source & ts) +{ + for (path_state::const_iterator i = self_state.begin(); + i != self_state.end(); ++i) + { + if (other_state.find(path_state_tid(i)) != other_state.end()) + continue; + + if (null_name(path_item_name(path_state_item(i)))) + continue; + + file_path full; + get_full_path(self_state, path_state_tid(i), full); + switch (path_item_type(path_state_item(i))) + { + case ptype_directory: + ensure_dir_in_map(full, other_dmap, other_state, ts); + break; + + case ptype_file: + ensure_file_in_map(full, other_dmap, other_state, ts); + break; + } + } +} + + +static void +apply_state_renumbering(state_renumbering const & renumbering, + path_state & state) +{ + sanity_check_path_state(state); + path_state tmp(state); + state.clear(); + + for (path_state::const_iterator i = tmp.begin(); i != tmp.end(); ++i) + { + path_item item = path_state_item(i); + tid t = path_state_tid(i); + + state_renumbering::const_iterator j = renumbering.find(t); + if (j != renumbering.end()) + t = j->second; + + j = renumbering.find(item.parent); + if (j != renumbering.end()) + item.parent = j->second; + + state.insert(std::make_pair(t, item)); + } + sanity_check_path_state(state); +} + +static void +apply_state_renumbering(state_renumbering const & renumbering, + path_analysis & pa) +{ + apply_state_renumbering(renumbering, pa.first); + apply_state_renumbering(renumbering, pa.second); +} + + +// this takes a path in the path space defined by input_dir and rebuilds it +// in the path space defined by output_space, including any changes to +// parents in the path (rather than directly to the path leaf name). it +// therefore *always* succeeds; sometimes it does nothing if there's no +// affected parent, but you always get a rebuilt path in the output space. + +static void +reconstruct_path(file_path const & input, + directory_map const & input_dir, + path_state const & output_space, + file_path & output) +{ + std::vector vec; + std::vector rebuilt; + + // L(F("reconstructing path '%s' under analysis\n") % input); + + split_path(input, vec); + + tid t = root_tid; + std::vector::const_iterator pth = vec.begin(); + while (pth != vec.end()) + { + directory_map::const_iterator dirent = input_dir.find(t); + if (dirent == input_dir.end()) + break; + + boost::shared_ptr node = dirent->second; + directory_node::const_iterator entry = node->find(*pth); + if (entry == node->end()) + break; + + { + // check to see if this is the image of an added or deleted entry + // (i.e. null name in output space), if so it terminates our + // search. + path_state::const_iterator i = output_space.find(directory_entry_tid(entry)); + I(i != output_space.end()); + if (null_name(path_item_name(path_state_item(i)))) + { + // L(F("input path element '%s' is null in output space, mapping truncated\n") % *pth); + break; + } + } + + // L(F("resolved entry '%s' in reconstruction\n") % *pth); + ++pth; + t = directory_entry_tid(entry); + + if (directory_entry_type(entry) != ptype_directory) + break; + } + + get_full_path(output_space, t, rebuilt); + + while(pth != vec.end()) + { + // L(F("copying tail entry '%s' in reconstruction\n") % *pth); + rebuilt.push_back(*pth); + ++pth; + } + + compose_path(rebuilt, output); + // L(F("reconstructed path '%s' as '%s'\n") % input % output); +} + + +static void +build_directory_map(path_state const & state, + directory_map & dir) +{ + sanity_check_path_state(state); + dir.clear(); + // L(F("building directory map for %d entries\n") % state.size()); + for (path_state::const_iterator i = state.begin(); i != state.end(); ++i) + { + tid curr = path_state_tid(i); + path_item item = path_state_item(i); + tid parent = path_item_parent(item); + path_component name = path_item_name(item); + ptype type = path_item_type(item); + // L(F("adding entry %s (%s %d) to directory node %d\n") + // % name % (type == ptype_directory ? "dir" : "file") % curr % parent); + dnode(dir, parent)->insert(std::make_pair(name,std::make_pair(type, curr))); + + // also, make sure to add current node if it's a directory, even if + // there are no entries in it + if (type == ptype_directory) + dnode(dir, curr); + } +} + + +void +analyze_rearrangement(change_set::path_rearrangement const & pr, + path_analysis & pa, + tid_source & ts) +{ + directory_map first_map, second_map; + state_renumbering renumbering; + std::set damaged_in_first, damaged_in_second; + + pa.first.clear(); + pa.second.clear(); + + for (std::set::const_iterator f = pr.deleted_files.begin(); + f != pr.deleted_files.end(); ++f) + { + tid x = ensure_file_in_map(*f, first_map, pa.first, ts); + pa.second.insert(std::make_pair(x, path_item(root_tid, ptype_file, make_null_component()))); + damaged_in_first.insert(x); + } + + for (std::set::const_iterator d = pr.deleted_dirs.begin(); + d != pr.deleted_dirs.end(); ++d) + { + tid x = ensure_dir_in_map(*d, first_map, pa.first, ts); + pa.second.insert(std::make_pair(x, path_item(root_tid, ptype_directory, make_null_component()))); + damaged_in_first.insert(x); + } + + for (std::map::const_iterator rf = pr.renamed_files.begin(); + rf != pr.renamed_files.end(); ++rf) + { + tid a = ensure_file_in_map(rf->first, first_map, pa.first, ts); + tid b = ensure_file_in_map(rf->second, second_map, pa.second, ts); + I(renumbering.find(a) == renumbering.end()); + renumbering.insert(std::make_pair(b,a)); + damaged_in_first.insert(a); + damaged_in_second.insert(b); + } + + for (std::map::const_iterator rd = pr.renamed_dirs.begin(); + rd != pr.renamed_dirs.end(); ++rd) + { + tid a = ensure_dir_in_map(rd->first, first_map, pa.first, ts); + tid b = ensure_dir_in_map(rd->second, second_map, pa.second, ts); + I(renumbering.find(a) == renumbering.end()); + renumbering.insert(std::make_pair(b,a)); + damaged_in_first.insert(a); + damaged_in_second.insert(b); + } + + for (std::set::const_iterator a = pr.added_files.begin(); + a != pr.added_files.end(); ++a) + { + tid x = ensure_file_in_map(*a, second_map, pa.second, ts); + pa.first.insert(std::make_pair(x, path_item(root_tid, ptype_file, make_null_component()))); + damaged_in_second.insert(x); + } + + // we now have two states which probably have a number of entries in + // common. we know already of an interesting set of entries they have in + // common: all the renamed_foo entries. for each such renamed_foo(a,b) + // entry, we made an entry in our state_renumbering of the form b->a, + // while building the states. + + // dump_analysis("analyzed", pa); + // dump_renumbering("first", renumbering); + apply_state_renumbering(renumbering, pa.second); + build_directory_map(pa.first, first_map); + build_directory_map(pa.second, second_map); + renumbering.clear(); + // dump_analysis("renumbered once", pa); + + // that only gets us half way, though: + // + // - every object which was explicitly moved (thus stayed alive) has been + // renumbered in re.second to have the same tid as in re.first + // + // - every object which was merely mentionned in passing -- say due to + // being an intermediate directory in a path -- and was not moved, still + // has differing tids in re.first and re.second (or worse, may only + // even have an *entry* in one of them) + // + // the second point here is what we need to correct: if a path didn't + // move, wasn't destroyed, and wasn't added, we want it to have the same + // tid. but that's a relatively easy condition to check; we've been + // keeping sets of all the objects which were damaged on each side of + // this business anyways. + + + // pass #1 makes sure that all the entries in each state *exist* within + // the other state, even if they have the wrong numbers + + ensure_entries_exist (pa.first, second_map, pa.second, ts); + ensure_entries_exist (pa.second, first_map, pa.first, ts); + + // pass #2 identifies common un-damaged elements from 2->1 and inserts + // renumberings + + for (path_state::const_iterator i = pa.second.begin(); + i != pa.second.end(); ++i) + { + tid first_tid, second_tid; + second_tid = path_state_tid(i); + file_path full; + if (pa.first.find(second_tid) != pa.first.end()) + continue; + get_full_path(pa.second, second_tid, full); + if (damaged_in_second.find(second_tid) != damaged_in_second.end()) + continue; + if (null_name(path_item_name(path_state_item(i)))) + continue; + I(lookup_path(full, first_map, first_tid)); + renumbering.insert(std::make_pair(second_tid, first_tid)); + } + + // dump_renumbering("second", renumbering); + apply_state_renumbering(renumbering, pa.second); + // dump_analysis("renumbered again", pa); + + // that should be the whole deal; if we don't have consensus at this + // point we have done something wrong. + sanity_check_path_analysis (pa); +} + +void +normalize_path_rearrangement(change_set::path_rearrangement & norm) +{ + path_analysis tmp; + tid_source ts; + + analyze_rearrangement(norm, tmp, ts); + clear_rearrangement(norm); + compose_rearrangement(tmp, norm); +} + +void +normalize_change_set(change_set & norm) +{ + normalize_path_rearrangement(norm.rearrangement); + change_set::delta_map tmp = norm.deltas; + for (change_set::delta_map::const_iterator i = tmp.begin(); + i != tmp.end(); ++i) + { + if (delta_entry_src(i) == delta_entry_dst(i)) + norm.deltas.erase(delta_entry_path(i)); + } +} + + +// begin stuff related to concatenation + +static void +index_entries(path_state const & state, + std::map & files, + std::map & dirs) +{ + for (path_state::const_iterator i = state.begin(); + i != state.end(); ++i) + { + file_path full; + path_item item = path_state_item(i); + get_full_path(state, path_state_tid(i), full); + + if (null_name(path_item_name(item))) + continue; + + switch (path_item_type(item)) + { + case ptype_directory: + dirs.insert(std::make_pair(full, path_state_tid(i))); + break; + + case ptype_file: + files.insert(std::make_pair(full, path_state_tid(i))); + break; + } + } +} + +// this takes every (p1,t1) entry in b and, if (p1,t2) it exists in a, +// inserts (t1,t2) in the rename set. in other words, it constructs the +// renumbering from b->a +static void +extend_renumbering_from_path_identities(std::map const & a, + std::map const & b, + state_renumbering & renumbering) +{ + for (std::map::const_iterator i = b.begin(); + i != b.end(); ++i) + { + I(! null_name(i->first)); + std::map::const_iterator j = a.find(i->first); + if (j == a.end()) + continue; + I(renumbering.find(i->second) == renumbering.end()); + renumbering.insert(std::make_pair(i->second, j->second)); + } +} + +static void +extend_state(path_state const & src, + path_state & dst) +{ + for (path_state::const_iterator i = src.begin(); + i != src.end(); ++i) + { + if (dst.find(path_state_tid(i)) == dst.end()) + dst.insert(*i); + } +} + +static void +ensure_tids_disjoint(path_analysis const & a, + path_analysis const & b) +{ + for (path_state::const_iterator i = a.first.begin(); + i != a.first.end(); ++i) + { + I(b.first.find(path_state_tid(i)) == b.first.end()); + } + for (path_state::const_iterator i = b.first.begin(); + i != b.first.end(); ++i) + { + I(a.first.find(path_state_tid(i)) == a.first.end()); + } +} + +static void +extract_killed(path_analysis const & a, + std::set & killed) + +{ + killed.clear(); + directory_map first_map, second_map; + + build_directory_map(a.first, first_map); + build_directory_map(a.second, second_map); + + for (directory_map::const_iterator i = first_map.begin(); + i != first_map.end(); ++i) + { + tid dir_tid = i->first; + directory_map::const_iterator j = second_map.find(dir_tid); + I(j != second_map.end()); + + // a path P = DIR/LEAF is "killed" by a path_analysis iff the + // directory node named DIR in the post-state contains LEAF in the + // pre-state, and does not contain LEAF in the post-state + + boost::shared_ptr first_node = i->second; + boost::shared_ptr second_node = j->second; + + for (directory_node::const_iterator p = first_node->begin(); + p != first_node->end(); ++p) + { + path_component first_name = directory_entry_name(p); + directory_node::const_iterator q = second_node->find(first_name); + if (q == second_node->end()) + { + // found a killed entry + std::vector killed_name; + file_path killed_path; + get_full_path(a.second, dir_tid, killed_name); + killed_name.push_back(first_name); + compose_path(killed_name, killed_path); + killed.insert(killed_path); + } + } + } +} + +static void +check_delta_entries_not_directories(path_analysis const & pa, + change_set::delta_map const & dels) +{ + directory_map dmap; + build_directory_map(pa.second, dmap); + for (change_set::delta_map::const_iterator i = dels.begin(); + i != dels.end(); ++i) + { + tid delta_tid; + if (lookup_path(delta_entry_path(i), dmap, delta_tid)) + { + path_state::const_iterator j = pa.second.find(delta_tid); + I(j != pa.second.end()); + I(path_item_type(path_state_item(j)) == ptype_file); + } + } +} + +static void +concatenate_disjoint_analyses(path_analysis const & a, + path_analysis const & b, + std::set const & a_killed, + path_analysis & concatenated) +{ + std::map a_second_files, a_second_dirs; + std::map b_first_files, b_first_dirs; + path_analysis a_tmp(a), b_tmp(b); + state_renumbering renumbering; + + // the trick here is that a.second and b.first supposedly refer to the + // same state-of-the-world, so all we need to do is: + // + // - confirm that both analyses have disjoint tids + // - work out which tids in b to identify with tids in a + // - renumber b + // + // - copy a.first -> concatenated.first + // - insert all elements of b.first not already in concatenated.first + // - copy b.second -> concatenated.second + // - insert all elements of a.second not already in concatenated.second + + ensure_tids_disjoint(a_tmp, b_tmp); + + index_entries(a_tmp.second, a_second_files, a_second_dirs); + index_entries(b_tmp.first, b_first_files, b_first_dirs); + + { + std::set + a_second_file_set, a_second_dir_set, + b_first_file_set, b_first_dir_set; + + extract_first(a_second_files, a_second_file_set); + extract_first(a_second_dirs, a_second_dir_set); + extract_first(b_first_files, b_first_file_set); + extract_first(b_first_dirs, b_first_dir_set); + + // check that there are no entry-type mismatches + check_sets_disjoint(a_second_file_set, b_first_dir_set); + check_sets_disjoint(a_second_dir_set, b_first_file_set); + + // check that there's no use of killed entries + check_sets_disjoint(a_killed, b_first_dir_set); + check_sets_disjoint(a_killed, b_first_file_set); + } + + extend_renumbering_from_path_identities(a_second_files, b_first_files, renumbering); + extend_renumbering_from_path_identities(a_second_dirs, b_first_dirs, renumbering); + + // dump_analysis("state A", a_tmp); + // dump_analysis("state B", b_tmp); + // dump_renumbering("concatenation", renumbering); + apply_state_renumbering(renumbering, b_tmp); + + concatenated.first = a_tmp.first; + concatenated.second = b_tmp.second; + + extend_state(b_tmp.first, concatenated.first); + extend_state(a_tmp.second, concatenated.second); + + sanity_check_path_analysis(concatenated); +} + +void +concatenate_rearrangements(change_set::path_rearrangement const & a, + change_set::path_rearrangement const & b, + change_set::path_rearrangement & concatenated) +{ + a.check_sane(); + b.check_sane(); + concatenated = change_set::path_rearrangement(); + + tid_source ts; + path_analysis a_analysis, b_analysis, concatenated_analysis; + + analyze_rearrangement(a, a_analysis, ts); + analyze_rearrangement(b, b_analysis, ts); + + std::set a_killed; + extract_killed(a_analysis, a_killed); + + concatenate_disjoint_analyses(a_analysis, + b_analysis, + a_killed, + concatenated_analysis); + + compose_rearrangement(concatenated_analysis, + concatenated); + + concatenated.check_sane(); +} + +void +concatenate_change_sets(change_set const & a, + change_set const & b, + change_set & concatenated) +{ + a.check_sane(); + b.check_sane(); + + L(F("concatenating change sets\n")); + + tid_source ts; + path_analysis a_analysis, b_analysis, concatenated_analysis; + + analyze_rearrangement(a.rearrangement, a_analysis, ts); + analyze_rearrangement(b.rearrangement, b_analysis, ts); + + std::set a_killed; + extract_killed(a_analysis, a_killed); + + concatenate_disjoint_analyses(a_analysis, + b_analysis, + a_killed, + concatenated_analysis); + + compose_rearrangement(concatenated_analysis, + concatenated.rearrangement); + + // now process the deltas + + concatenated.deltas.clear(); + directory_map a_dst_map, b_src_map; + L(F("concatenating %d and %d deltas\n") + % a.deltas.size() % b.deltas.size()); + build_directory_map(a_analysis.second, a_dst_map); + build_directory_map(b_analysis.first, b_src_map); + + // first rename a's deltas under the rearrangement of b + for (change_set::delta_map::const_iterator del = a.deltas.begin(); + del != a.deltas.end(); ++del) + { + file_path new_pth; + L(F("processing delta on %s\n") % delta_entry_path(del)); + + // work out the name of entry in b.first + reconstruct_path(delta_entry_path(del), b_src_map, b_analysis.second, new_pth); + L(F("delta on %s in first changeset renamed to %s\n") + % delta_entry_path(del) % new_pth); + + if (b.rearrangement.has_deleted_file(delta_entry_path(del))) + // the delta should be removed if the file is going to be deleted + L(F("discarding delta [%s]->[%s] for deleted file '%s'\n") + % delta_entry_src(del) % delta_entry_dst(del) % delta_entry_path(del)); + else + concatenated.deltas.insert(std::make_pair(new_pth, + std::make_pair(delta_entry_src(del), + delta_entry_dst(del)))); + } + + // next fuse any deltas id1->id2 and id2->id3 to id1->id3 + for (change_set::delta_map::const_iterator del = b.deltas.begin(); + del != b.deltas.end(); ++del) + { + + file_path del_pth = delta_entry_path(del); + change_set::delta_map::const_iterator existing = + concatenated.deltas.find(del_pth); + if (existing != concatenated.deltas.end()) + { + L(F("fusing deltas on %s : %s -> %s and %s -> %s\n") + % del_pth + % delta_entry_src(existing) + % delta_entry_dst(existing) + % delta_entry_src(del) + % delta_entry_dst(del)); + I(delta_entry_dst(existing) == delta_entry_src(del)); + std::pair fused = std::make_pair(delta_entry_src(existing), + delta_entry_dst(del)); + concatenated.deltas.erase(del_pth); + concatenated.deltas.insert(std::make_pair((del_pth), fused)); + } + else + { + L(F("delta on %s in second changeset copied forward\n") % del_pth); + // in general don't want deltas on deleted files. however if a + // file has been deleted then re-added, then a delta is valid + // (it applies to the newly-added file) + if (!b.rearrangement.has_deleted_file(del_pth) + || b.rearrangement.has_added_file(del_pth) + || b.rearrangement.has_renamed_file_dst(del_pth)) + concatenated.deltas.insert(*del); + } + } + + normalize_change_set(concatenated); + concatenated.check_sane(); + + L(F("finished concatenation\n")); +} + +// end stuff related to concatenation + + +// begin stuff related to merging + + +static void +extend_renumbering_via_added_files(path_analysis const & a, + path_analysis const & b, + state_renumbering & existing_renumbering, + state_renumbering & renumbering) +{ + directory_map a_second_map; + build_directory_map(a.second, a_second_map); + + for (path_state::const_iterator i = b.first.begin(); + i != b.first.end(); ++i) + { + path_item item = path_state_item(i); + if (path_item_type(item) == ptype_file && null_name(path_item_name(item))) + { + path_state::const_iterator j = b.second.find(path_state_tid(i)); + I(j != b.second.end()); + path_component leaf_name = path_item_name(path_state_item(j)); + + I(path_item_type(path_state_item(j)) == ptype_file); + if (! null_name(leaf_name)) + { + tid added_parent_tid = path_item_parent(path_state_item(j)); + state_renumbering::const_iterator ren = existing_renumbering.find(added_parent_tid); + if (ren != existing_renumbering.end()) + added_parent_tid = ren->second; + directory_map::const_iterator dirent = a_second_map.find(added_parent_tid); + if (dirent != a_second_map.end()) + { + boost::shared_ptr node = dirent->second; + directory_node::const_iterator entry = node->find(leaf_name); + if (entry != node->end() && directory_entry_type(entry) == ptype_file) + { + I(renumbering.find(path_state_tid(i)) == renumbering.end()); + renumbering.insert(std::make_pair(path_state_tid(i), + directory_entry_tid(entry))); + } + } + } + } + } +} + +static bool +find_item(tid t, path_state const & ps, + path_item & item) +{ + path_state::const_iterator i = ps.find(t); + if (i == ps.end()) + return false; + item = path_state_item(i); + return true; +} + +static bool +find_items(tid t, path_analysis const & pa, + path_item & first, path_item & second) +{ + if (find_item(t, pa.first, first)) + { + I(find_item(t, pa.second, second)); + I(path_item_type(first) == path_item_type(second)); + return true; + } + else + { + I(!find_item(t, pa.second, second)); + return false; + } +} + +static void +resolve_conflict(tid t, ptype ty, + path_analysis const & a_tmp, + path_analysis const & b_tmp, + path_item & resolved, + path_state & resolved_conflicts, + app_state & app) +{ + path_state::const_iterator i = resolved_conflicts.find(t); + + path_item a_item, b_item; + find_item(t, a_tmp.second, a_item); + find_item(t, b_tmp.second, b_item); + + file_path anc, a, b, res; + get_full_path(a_tmp.first, t, anc); + get_full_path(a_tmp.second, t, a); + get_full_path(b_tmp.second, t, b); + + if (i != resolved_conflicts.end()) + { + resolved = path_state_item(i); + } + else if (null_name(path_item_name(a_item)) && + ! null_name(path_item_name(b_item))) + { + L(F("delete of %s dominates rename to %s\n") % anc % b); + resolved = a_item; + resolved_conflicts.insert(std::make_pair(t, resolved)); + } + else if (null_name(path_item_name(b_item)) && + ! null_name(path_item_name(a_item))) + { + L(F("delete of %s dominates rename to %s\n") % anc % a); + resolved = b_item; + resolved_conflicts.insert(std::make_pair(t, resolved)); + } + else + { + switch (ty) + { + case ptype_file: + N(app.lua.hook_resolve_file_conflict(anc, a, b, res), + F("unable to resolve file conflict '%s' -> '%s' vs. '%s'") % anc % a % b); + break; + case ptype_directory: + N(app.lua.hook_resolve_dir_conflict(anc, a, b, res), + F("unable to resolve dir conflict '%s' -> '%s' vs. '%s'") % anc % a % b); + break; + } + + N((res == a || res == b), + F("illegal conflict resolution '%s', wanted '%s' or '%s'\n") % res % a % b); + + if (res == a) + I(find_item(t, a_tmp.second, resolved)); + else + I(find_item(t, b_tmp.second, resolved)); + + resolved_conflicts.insert(std::make_pair(t, resolved)); + } +} + +static void +ensure_no_rename_clobbers(path_analysis const & a, + path_analysis const & b) +{ + // there is a special non-mergable pair of changes which we need + // to identify here: + // + // tid i : x -> y in change A + // tid j : z -> x in change B + // + // on the surface it looks like it ought to be mergable, since there is + // no conflict in the tids. except for one problem: B effectively + // clobbered i with j. there is nothing you can append to change B to + // revive the identity of i; in fact you risk having i and j identified + // if you form the naive merge concatenation BA. indeed, since A and B + // both supposedly start in the same state (in which i occupies name x), + // it really ought not to be possible to form B; you should have to + // accompany it with some sort of statement about the fate of i. + // + // as it stands, we're going to fault when we see this. if it turns out + // that there's a legal way of constructing such changes, one option is + // to synthesize a delete of i in B; essentially read "z->x" as an + // implicit "delete x first if it exists in post-state". + // + // however, for the time being this is a fault because we believe they + // should be globally illegal clobbers. + + directory_map b_first_map, b_second_map; + build_directory_map (b.first, b_first_map); + build_directory_map (b.second, b_second_map); + tid a_tid, b_tid; + + for (path_state::const_iterator i = a.first.begin(); + i != a.first.end(); ++i) + { + file_path anc_path, a_second_path; + a_tid = path_state_tid(i); + get_full_path(a.first, a_tid, anc_path); + + if (! lookup_path(anc_path, b_first_map, b_tid)) + { + file_path b_second_path; + reconstruct_path(anc_path, b_first_map, b.second, b_second_path); + + N(! lookup_path(b_second_path, b_second_map, b_tid), + (F("tid %d (%s) clobbered tid %d (%s)\n") + % b_tid % b_second_path + % a_tid % anc_path)); + } + } + +} + +static void +project_missing_changes(path_analysis const & a_tmp, + path_analysis const & b_tmp, + path_analysis & b_merged, + path_state & resolved_conflicts, + app_state & app) +{ + + // for each tid t adjusted in a: + // - if t exists in b: + // - if the change to t in b == change in a, skip + // - else resolve conflict + // - if conflict resolved in favour of a, append to merged + // - if resolved in favour of b, skip + // - else (no t in b) insert a's change to t in merged + + for (path_state::const_iterator i = a_tmp.first.begin(); + i != a_tmp.first.end(); ++i) + { + tid t = path_state_tid(i); + path_item a_first_item, a_second_item; + path_item b_first_item, b_second_item; + I(find_items(t, a_tmp, a_first_item, a_second_item)); + if (find_items(t, b_tmp, b_first_item, b_second_item)) + { + I(a_first_item == b_first_item); + if (a_second_item == b_second_item) + { + L(F("skipping common change on %s (tid %d)\n") + % path_item_name(a_first_item) % t); + } + else if (a_first_item == a_second_item) + { + L(F("skipping neutral change of %s -> %s (tid %d)\n") + % path_item_name(a_first_item) + % path_item_name(a_second_item) + % t); + } + else if (b_first_item == b_second_item) + { + L(F("propagating change on %s -> %s (tid %d)\n") + % path_item_name(b_first_item) + % path_item_name(b_second_item) + % t); + b_merged.first.insert(std::make_pair(t, b_second_item)); + b_merged.second.insert(std::make_pair(t, a_second_item)); + } + else + { + // conflict + path_item resolved; + resolve_conflict(t, path_item_type(a_first_item), a_tmp, b_tmp, + resolved, resolved_conflicts, app); + + if (resolved == a_second_item) + { + L(F("conflict detected, resolved in A's favour\n")); + b_merged.first.insert(std::make_pair(t, b_second_item)); + b_merged.second.insert(std::make_pair(t, a_second_item)); + } + else + { + L(F("conflict detected, resolved in B's favour\n")); + } + } + } + else + { + // there was no entry in b at all for this tid, copy it + b_merged.first.insert(std::make_pair(t, a_first_item)); + b_merged.second.insert(std::make_pair(t, a_second_item)); + } + } + + // now drive through b.second's view of the directory structure, in case + // some intermediate b-only directories showed up the preimages of + // A-favoured conflicts. + extend_state(b_tmp.second, b_merged.first); + extend_state(b_merged.first, b_merged.second); +} + +static void +rebuild_analysis(path_analysis const & src, + path_analysis & dst, + tid_source & ts) +{ + state_renumbering renumbering; + + for (path_state::const_iterator i = src.first.begin(); + i != src.first.end(); ++i) + renumbering.insert(std::make_pair(path_state_tid(i), ts.next())); + + dst = src; + apply_state_renumbering(renumbering, dst); +} + +static void +merge_disjoint_analyses(path_analysis const & a, + path_analysis const & b, + path_analysis & a_renumbered, + path_analysis & b_renumbered, + path_analysis & a_merged, + path_analysis & b_merged, + tid_source & ts, + app_state & app) +{ + // we have anc->a and anc->b and we want to construct a->merged and + // b->merged, leading to the eventual identity concatenate(a,a_merged) == + // concatenate(b,b_merged). + + path_analysis a_tmp(a), b_tmp(b); + state_renumbering renumbering; + + ensure_tids_disjoint(a_tmp, b_tmp); + + // fault on a particular class of mal-formed changesets + ensure_no_rename_clobbers(a_tmp, b_tmp); + ensure_no_rename_clobbers(b_tmp, a_tmp); + + // a.first and b.first refer to the same state-of-the-world. + // + // we begin by driving all the entries in a.first into b.first and vice + // versa. + + { + directory_map a_first_map, b_first_map; + build_directory_map(a_tmp.first, a_first_map); + build_directory_map(b_tmp.first, b_first_map); + ensure_entries_exist(a_tmp.first, b_first_map, b_tmp.first, ts); + ensure_entries_exist(b_tmp.first, a_first_map, a_tmp.first, ts); + } + + // we then drive any of the new arrivals in a.first to a.second, and + // likewise on b + + { + directory_map a_second_map, b_second_map; + build_directory_map(a_tmp.second, a_second_map); + build_directory_map(b_tmp.second, b_second_map); + ensure_entries_exist(a_tmp.first, a_second_map, a_tmp.second, ts); + ensure_entries_exist(b_tmp.first, b_second_map, b_tmp.second, ts); + } + + // we then index, identify, and renumber all the immediately apparant + // entries in each side. + + { + std::map a_first_files, a_first_dirs; + std::map b_first_files, b_first_dirs; + index_entries(a_tmp.first, a_first_files, a_first_dirs); + index_entries(b_tmp.first, b_first_files, b_first_dirs); + extend_renumbering_from_path_identities(a_first_files, b_first_files, renumbering); + extend_renumbering_from_path_identities(a_first_dirs, b_first_dirs, renumbering); + } + + // once renamed, b_tmp will have moved a fair bit closer to a_tmp, in + // terms of tids. there is still one set of files we haven't accounted + // for, however: files added in a and b. + + { + state_renumbering aux_renumbering; + extend_renumbering_via_added_files(a_tmp, b_tmp, renumbering, aux_renumbering); + for (state_renumbering::const_iterator i = aux_renumbering.begin(); + i != aux_renumbering.end(); ++i) + { + I(renumbering.find(i->first) == renumbering.end()); + renumbering.insert(*i); + } + } + + // renumbering now contains a *complete* renumbering of b->a, + // so we reset a_tmp and b_tmp, and renumber b_tmp under this + // scheme. + + a_tmp = a; + b_tmp = b; + apply_state_renumbering(renumbering, b_tmp); + + a_renumbered = a_tmp; + b_renumbered = b_tmp; + + // now we're ready to merge (and resolve conflicts) + path_state resolved_conflicts; + project_missing_changes(a_tmp, b_tmp, b_merged, resolved_conflicts, app); + project_missing_changes(b_tmp, a_tmp, a_merged, resolved_conflicts, app); + + { + // now check: the merge analyses, when concatenated with their + // predecessors, should lead to the same composite rearrangement + + tid_source ts_tmp; + path_analysis anc_a_check, a_merge_check, a_check; + path_analysis anc_b_check, b_merge_check, b_check; + change_set::path_rearrangement a_re, b_re; + + rebuild_analysis(a, anc_a_check, ts_tmp); + rebuild_analysis(b, anc_b_check, ts_tmp); + rebuild_analysis(a_merged, a_merge_check, ts_tmp); + rebuild_analysis(b_merged, b_merge_check, ts_tmp); + + std::set anc_a_killed, anc_b_killed; + extract_killed(anc_a_check, anc_a_killed); + extract_killed(anc_b_check, anc_b_killed); + + concatenate_disjoint_analyses(anc_a_check, a_merge_check, anc_a_killed, a_check); + concatenate_disjoint_analyses(anc_b_check, b_merge_check, anc_b_killed, b_check); + compose_rearrangement(a_check, a_re); + compose_rearrangement(b_check, b_re); + I(a_re == b_re); + } + +} + +static void +merge_deltas(file_path const & anc_path, + file_path const & left_path, + file_path const & right_path, + file_path const & path_in_merged, + std::map & merge_finalists, + file_id const & anc, + file_id const & left, + file_id const & right, + file_id & finalist, + merge_provider & merger) +{ + std::map::const_iterator i = merge_finalists.find(path_in_merged); + if (i != merge_finalists.end()) + { + L(F("reusing merge resolution '%s' : '%s' -> '%s'\n") + % path_in_merged % anc % i->second); + finalist = i->second; + } + else + { + if (null_id(anc)) + { + N(merger.try_to_merge_files(left_path, right_path, path_in_merged, left, right, finalist), + F("merge of '%s' : '%s' vs. '%s' (no common ancestor) failed") + % path_in_merged % left % right); + } + else + { + N(merger.try_to_merge_files(anc_path, left_path, right_path, path_in_merged, + anc, left, right, finalist), + F("merge of '%s' : '%s' -> '%s' vs '%s' failed") + % path_in_merged % anc % left % right); + } + + L(F("merge of '%s' : '%s' -> '%s' vs '%s' resolved to '%s'\n") + % path_in_merged % anc % left % right % finalist); + + merge_finalists.insert(std::make_pair(path_in_merged, finalist)); + } +} + +static void +project_missing_deltas(change_set const & a, + change_set const & b, + path_analysis const & a_analysis, + path_analysis const & b_analysis, + path_analysis const & a_merged_analysis, + change_set & b_merged, + merge_provider & merger, + std::map & merge_finalists) +{ + directory_map a_second_map, b_first_map, a_merged_first_map; + build_directory_map(a_analysis.second, a_second_map); + build_directory_map(b_analysis.first, b_first_map); + build_directory_map(a_merged_analysis.first, a_merged_first_map); + + for (change_set::delta_map::const_iterator i = a.deltas.begin(); + i != a.deltas.end(); ++i) + { + file_path path_in_merged, path_in_anc, path_in_b_second; + + // we have a fork like this: + // + // + // +--> [a2] + // [a1==b1] + // +--> [b2] + // + // and we have a delta applied to a file in a2. we want to + // figure out what to call this delta's path in b2. this means + // reconstructing it in a1==b1, then reconstructing it *again* + // in b2. + + // first work out what the path in a.first == b.first is + reconstruct_path(delta_entry_path(i), a_second_map, + a_analysis.first, path_in_anc); + + // first work out what the path in b.second is + reconstruct_path(path_in_anc, b_first_map, + b_analysis.second, path_in_b_second); + + // then work out what the path in merged is + reconstruct_path(delta_entry_path(i), a_merged_first_map, + a_merged_analysis.second, path_in_merged); + + // now check to see if there was a delta on the b.second name in b + change_set::delta_map::const_iterator j = b.deltas.find(path_in_b_second); + + if (j == b.deltas.end()) + { + // if not, copy ours over using the merged name + L(F("merge is copying delta '%s' : '%s' -> '%s'\n") + % path_in_merged % delta_entry_src(i) % delta_entry_dst(i)); + I(b.deltas.find(path_in_merged) == b.deltas.end()); + if (b.rearrangement.has_deleted_file(path_in_merged)) + // if the file was deleted on the other fork of the merge, then + // we don't want to keep this delta. + L(F("skipping delta '%s'->'%s' on deleted file '%s'\n") + % delta_entry_src(i) % delta_entry_dst(i) % path_in_merged); + else + b_merged.apply_delta(path_in_merged, delta_entry_src(i), delta_entry_dst(i)); + } + else + { + // if so, either... + + if (!(delta_entry_src(i) == delta_entry_src(j))) + { + // This is a bit of a corner case where a file was added then deleted on one + // of the forks. The src for the addition fork will be null_id, but the src + // for the other fork will be the ancestor file's id. + + // if neither of the forks involved a file addition delta (null_id to something) + // then something bad happened. + I(null_id(delta_entry_src(i)) || null_id(delta_entry_src(j))); + + if (null_id(delta_entry_src(i))) + { + // ... use the delta from 'a' + // 'a' change_set included a delta []->[...], ie file added. We want to + // follow this fork so it gets added to the b_merged changeset + L(F("propagating new file addition delta on '%s' : '%s' -> '%s'\n") + % path_in_merged + % delta_entry_src(j) + % delta_entry_dst(i)); + b_merged.apply_delta(path_in_merged, delta_entry_src(i), delta_entry_dst(i)); + } + else if (null_id(delta_entry_src(j))) + { + // ... ignore the delta + // 'b' change_set included a delta []->[...], ie file added. We don't need + // to add it to the b_merged changeset, since any delta in 'a' will be + // ignored (as 'b' includes deletions). + L(F("skipping new file addition delta on '%s' : '' -> '%s'\n") + % path_in_merged + % delta_entry_dst(j)); + } + } + else if (delta_entry_dst(i) == delta_entry_dst(j)) + { + // ... absorb identical deltas + L(F("skipping common delta '%s' : '%s' -> '%s'\n") + % path_in_merged % delta_entry_src(i) % delta_entry_dst(i)); + } + + else if (delta_entry_src(i) == delta_entry_dst(i)) + { + L(F("skipping neutral delta on '%s' : %s -> %s\n") + % delta_entry_path(i) + % delta_entry_src(i) + % delta_entry_dst(i)); + } + + else if (delta_entry_src(j) == delta_entry_dst(j)) + { + L(F("propagating unperturbed delta on '%s' : '%s' -> '%s'\n") + % delta_entry_path(i) + % delta_entry_src(i) + % delta_entry_dst(i)); + b_merged.apply_delta(path_in_merged, delta_entry_dst(j), delta_entry_dst(i)); + } + + else + { + // ... or resolve conflict + L(F("merging delta '%s' : '%s' -> '%s' vs. '%s'\n") + % path_in_merged % delta_entry_src(i) % delta_entry_dst(i) % delta_entry_dst(j)); + file_id finalist; + + merge_deltas(path_in_anc, + delta_entry_path(i), // left_path + delta_entry_path(j), // right_path + path_in_merged, + merge_finalists, + delta_entry_src(i), // anc + delta_entry_dst(i), // left + delta_entry_dst(j), // right + finalist, merger); + L(F("resolved merge to '%s' : '%s' -> '%s'\n") + % path_in_merged % delta_entry_src(i) % finalist); + + // if the conflict resolved to something other than the + // existing post-state of b, add a new entry to the deltas of + // b finishing the job. + if (! (finalist == delta_entry_dst(j))) + b_merged.apply_delta(path_in_merged, delta_entry_dst(j), finalist); + } + } + } +} + + +void +merge_change_sets(change_set const & a, + change_set const & b, + change_set & a_merged, + change_set & b_merged, + merge_provider & merger, + app_state & app) +{ + a.check_sane(); + b.check_sane(); + + L(F("merging change sets\n")); + + tid_source ts; + path_analysis + a_analysis, b_analysis, + a_renumbered, b_renumbered, + a_merged_analysis, b_merged_analysis; + + analyze_rearrangement(a.rearrangement, a_analysis, ts); + analyze_rearrangement(b.rearrangement, b_analysis, ts); + + merge_disjoint_analyses(a_analysis, b_analysis, + a_renumbered, b_renumbered, + a_merged_analysis, b_merged_analysis, + ts, app); + + compose_rearrangement(a_merged_analysis, + a_merged.rearrangement); + + compose_rearrangement(b_merged_analysis, + b_merged.rearrangement); + + std::map merge_finalists; + + project_missing_deltas(a, b, + a_renumbered, b_renumbered, + a_merged_analysis, + b_merged, + merger, merge_finalists); + + project_missing_deltas(b, a, + b_renumbered, a_renumbered, + b_merged_analysis, + a_merged, + merger, merge_finalists); + + { + // confirmation step + change_set a_check, b_check; + // dump_change_set("a", a); + // dump_change_set("a_merged", a_merged); + // dump_change_set("b", b); + // dump_change_set("b_merged", b_merged); + concatenate_change_sets(a, a_merged, a_check); + concatenate_change_sets(b, b_merged, b_check); + // dump_change_set("a_check", a_check); + // dump_change_set("b_check", b_check); + I(a_check == b_check); + } + + normalize_change_set(a_merged); + normalize_change_set(b_merged); + + a_merged.check_sane(); + b_merged.check_sane(); + + L(F("finished merge\n")); +} + +// end stuff related to merging + +void +invert_change_set(change_set const & a2b, + manifest_map const & a_map, + change_set & b2a) +{ + a2b.check_sane(); + tid_source ts; + path_analysis a2b_analysis, b2a_analysis; + + analyze_rearrangement(a2b.rearrangement, a2b_analysis, ts); + + L(F("inverting change set\n")); + b2a_analysis.first = a2b_analysis.second; + b2a_analysis.second = a2b_analysis.first; + compose_rearrangement(b2a_analysis, b2a.rearrangement); + + b2a.deltas.clear(); + + // existing deltas are in "b space" + for (path_state::const_iterator b = b2a_analysis.first.begin(); + b != b2a_analysis.first.end(); ++b) + { + path_state::const_iterator a = b2a_analysis.second.find(path_state_tid(b)); + I(a != b2a_analysis.second.end()); + if (path_item_type(path_state_item(b)) == ptype_file) + { + file_path b_pth, a_pth; + get_full_path(b2a_analysis.first, path_state_tid(b), b_pth); + + if (null_name(path_item_name(path_state_item(b))) && + ! null_name(path_item_name(path_state_item(a)))) + { + // b->a represents an add in "a space" + get_full_path(b2a_analysis.second, path_state_tid(a), a_pth); + manifest_map::const_iterator i = a_map.find(a_pth); + I(i != a_map.end()); + b2a.deltas.insert(std::make_pair(a_pth, + std::make_pair(file_id(), + manifest_entry_id(i)))); + L(F("converted 'delete %s' to 'add as %s' in inverse\n") + % a_pth + % manifest_entry_id(i)); + } + else if (! null_name(path_item_name(path_state_item(b))) && + null_name(path_item_name(path_state_item(a)))) + { + // b->a represents a del from "b space" + get_full_path(b2a_analysis.first, path_state_tid(b), b_pth); + L(F("converted add %s to delete in inverse\n") % b_pth ); + } + else + { + get_full_path(b2a_analysis.first, path_state_tid(b), b_pth); + get_full_path(b2a_analysis.second, path_state_tid(a), a_pth); + change_set::delta_map::const_iterator del = a2b.deltas.find(b_pth); + if (del == a2b.deltas.end()) + continue; + file_id src_id(delta_entry_src(del)), dst_id(delta_entry_dst(del)); + L(F("converting delta %s -> %s on %s\n") + % src_id % dst_id % b_pth); + L(F("inverse is delta %s -> %s on %s\n") + % dst_id % src_id % a_pth); + b2a.deltas.insert(std::make_pair(a_pth, std::make_pair(dst_id, src_id))); + } + } + } + + // some deltas might not have been renamed, however. these we just invert the + // direction on + for (change_set::delta_map::const_iterator del = a2b.deltas.begin(); + del != a2b.deltas.end(); ++del) + { + // check to make sure this isn't the image of an add (now a delete) + if (null_id(delta_entry_src(del))) + continue; + // check to make sure this isn't one of the already-moved deltas + if (b2a.deltas.find(delta_entry_path(del)) != b2a.deltas.end()) + continue; + b2a.deltas.insert(std::make_pair(delta_entry_path(del), + std::make_pair(delta_entry_dst(del), + delta_entry_src(del)))); + } + normalize_change_set(b2a); + b2a.check_sane(); +} + +void +move_files_to_tmp_bottom_up(tid t, + local_path const & temporary_root, + path_state const & state, + directory_map const & dmap) +{ + directory_map::const_iterator dirent = dmap.find(t); + if (dirent != dmap.end()) + { + boost::shared_ptr node = dirent->second; + for (directory_node::const_iterator entry = node->begin(); + entry != node->end(); ++entry) + { + tid child = directory_entry_tid(entry); + file_path path; + path_item item; + + find_item(child, state, item); + + if (null_name(path_item_name(item))) + continue; + + // recursively move all sub-entries + if (path_item_type(item) == ptype_directory) + move_files_to_tmp_bottom_up(child, temporary_root, state, dmap); + + get_full_path(state, child, path); + + local_path src(path()); + local_path dst((mkpath(temporary_root()) + / mkpath(boost::lexical_cast(child))).string()); + + P(F("moving %s -> %s\n") % src % dst); + switch (path_item_type(item)) + { + case ptype_file: + if (file_exists(src)) + move_file(src, dst); + break; + case ptype_directory: + if (directory_exists(src)) + move_dir(src, dst); + break; + } + } + } +} + +void +move_files_from_tmp_top_down(tid t, + local_path const & temporary_root, + path_state const & state, + directory_map const & dmap) +{ + directory_map::const_iterator dirent = dmap.find(t); + if (dirent != dmap.end()) + { + boost::shared_ptr node = dirent->second; + for (directory_node::const_iterator entry = node->begin(); + entry != node->end(); ++entry) + { + tid child = directory_entry_tid(entry); + file_path path; + path_item item; + + find_item(child, state, item); + + if (null_name(path_item_name(item))) + continue; + + get_full_path(state, child, path); + + local_path src((mkpath(temporary_root()) + / mkpath(boost::lexical_cast(child))).string()); + local_path dst(path()); + + switch (path_item_type(item)) + { + case ptype_file: + if (file_exists(src)) + { + P(F("moving file %s -> %s\n") % src % dst); + make_dir_for(path); + move_file(src, dst); + } + break; + case ptype_directory: + if (directory_exists(src)) + { + P(F("moving dir %s -> %s\n") % src % dst); + make_dir_for(path); + move_dir(src, dst); + } + break; + } + + // recursively move all sub-entries + if (path_item_type(item) == ptype_directory) + move_files_from_tmp_top_down(child, temporary_root, state, dmap); + } + } +} + + +void +apply_rearrangement_to_filesystem(change_set::path_rearrangement const & re, + local_path const & temporary_root) +{ + re.check_sane(); + tid_source ts; + path_analysis analysis; + directory_map first_dmap, second_dmap; + + analyze_rearrangement(re, analysis, ts); + build_directory_map(analysis.first, first_dmap); + build_directory_map(analysis.second, second_dmap); + + if (analysis.first.empty()) + return; + + move_files_to_tmp_bottom_up(root_tid, temporary_root, + analysis.first, first_dmap); + + move_files_from_tmp_top_down(root_tid, temporary_root, + analysis.second, second_dmap); +} + +// application stuff + +void +apply_path_rearrangement(path_set const & old_ps, + change_set::path_rearrangement const & pr, + path_set & new_ps) +{ + pr.check_sane(); + change_set::path_rearrangement a, b, c; + a.added_files = old_ps; + concatenate_rearrangements(a, pr, c); + new_ps = c.added_files; +} + +void +build_pure_addition_change_set(manifest_map const & man, + change_set & cs) +{ + for (manifest_map::const_iterator i = man.begin(); i != man.end(); ++i) + cs.add_file(manifest_entry_path(i), manifest_entry_id(i)); + cs.check_sane(); +} + +// this function takes the rearrangement sitting in cs and "completes" the +// changeset by filling in all the deltas + +void +complete_change_set(manifest_map const & m_old, + manifest_map const & m_new, + change_set & cs) +{ + cs.rearrangement.check_sane(); + tid_source ts; + path_analysis analysis; + directory_map first_dmap, second_dmap; + + analyze_rearrangement(cs.rearrangement, analysis, ts); + build_directory_map(analysis.first, first_dmap); + build_directory_map(analysis.second, second_dmap); + + std::set paths; + extract_path_set(m_new, paths); + + for (std::set::const_iterator i = cs.rearrangement.added_files.begin(); + i != cs.rearrangement.added_files.end(); ++i) + { + manifest_map::const_iterator j = m_new.find(*i); + I(j != m_new.end()); + cs.deltas.insert(std::make_pair(*i, + std::make_pair(null_ident, + manifest_entry_id(j)))); + paths.erase(*i); + } + + for (std::set::const_iterator i = paths.begin(); + i != paths.end(); ++i) + { + file_path old_path; + reconstruct_path(*i, second_dmap, analysis.first, old_path); + manifest_map::const_iterator j = m_old.find(old_path); + manifest_map::const_iterator k = m_new.find(*i); + I(j != m_old.end()); + I(k != m_new.end()); + if (!(manifest_entry_id(j) == manifest_entry_id(k))) + cs.deltas.insert(std::make_pair(*i, std::make_pair(manifest_entry_id(j), + manifest_entry_id(k)))); + } + + cs.check_sane(); +} + + +void +apply_change_set(manifest_map const & old_man, + change_set const & cs, + manifest_map & new_man) +{ + cs.check_sane(); + change_set a, b; + build_pure_addition_change_set(old_man, a); + concatenate_change_sets(a, cs, b); + + // If the composed change_set still has renames or deletions in it, then + // they referred to things that weren't in the original manifest, and this + // change_set should never have been applied to this manifest in the first + // place. + I(b.rearrangement.deleted_files.empty()); + I(b.rearrangement.renamed_files.empty()); + // Furthermore, all deltas should be add deltas + for (change_set::delta_map::const_iterator i = b.deltas.begin(); + i != b.deltas.end(); ++i) + { + I(null_id(delta_entry_src(i))); + I(b.rearrangement.added_files.find(delta_entry_path(i)) + != b.rearrangement.added_files.end()); + } + + new_man.clear(); + for (std::set::const_iterator i = b.rearrangement.added_files.begin(); + i != b.rearrangement.added_files.end(); ++i) + { + change_set::delta_map::const_iterator d = b.deltas.find(*i); + I(d != b.deltas.end()); + new_man.insert(std::make_pair(*i, delta_entry_dst(d))); + } +} + +// quick, optimistic and destructive version +void +apply_path_rearrangement(change_set::path_rearrangement const & pr, + path_set & ps) +{ + pr.check_sane(); + if (pr.renamed_files.empty() + && pr.renamed_dirs.empty() + && pr.deleted_dirs.empty()) + { + // fast path for simple drop/add file operations + for (std::set::const_iterator i = pr.deleted_files.begin(); + i != pr.deleted_files.end(); ++i) + { + ps.erase(*i); + } + for (std::set::const_iterator i = pr.added_files.begin(); + i != pr.added_files.end(); ++i) + { + ps.insert(*i); + } + } + else + { + // fall back to the slow way + path_set tmp; + apply_path_rearrangement(ps, pr, tmp); + ps = tmp; + } +} + +// quick, optimistic and destructive version +file_path +apply_change_set_inverse(change_set const & cs, + file_path const & file_in_second) +{ + cs.check_sane(); + tid_source ts; + path_analysis analysis; + directory_map second_dmap; + file_path file_in_first; + + analyze_rearrangement(cs.rearrangement, analysis, ts); + build_directory_map(analysis.second, second_dmap); + reconstruct_path(file_in_second, second_dmap, analysis.first, file_in_first); + return file_in_first; +} + +// quick, optimistic and destructive version +void +apply_change_set(change_set const & cs, + manifest_map & man) +{ + cs.check_sane(); + if (cs.rearrangement.renamed_files.empty() + && cs.rearrangement.renamed_dirs.empty() + && cs.rearrangement.deleted_dirs.empty()) + { + // fast path for simple drop/add/delta file operations + for (std::set::const_iterator i = cs.rearrangement.deleted_files.begin(); + i != cs.rearrangement.deleted_files.end(); ++i) + { + man.erase(*i); + } + for (change_set::delta_map::const_iterator i = cs.deltas.begin(); + i != cs.deltas.end(); ++i) + { + if (!null_id(delta_entry_dst(i))) + man[delta_entry_path(i)] = delta_entry_dst(i); + } + } + else + { + // fall back to the slow way + manifest_map tmp; + apply_change_set(man, cs, tmp); + man = tmp; + } +} + + +// i/o stuff + +namespace +{ + namespace syms + { + std::string const patch("patch"); + std::string const from("from"); + std::string const to("to"); + std::string const add_file("add_file"); + std::string const delete_file("delete_file"); + std::string const delete_dir("delete_dir"); + std::string const rename_file("rename_file"); + std::string const rename_dir("rename_dir"); + } +} + +static void +parse_path_rearrangement(basic_io::parser & parser, + change_set & cs) +{ + while (parser.symp()) + { + std::string t1, t2; + if (parser.symp(syms::add_file)) + { + parser.sym(); + parser.str(t1); + cs.add_file(file_path(t1)); + } + else if (parser.symp(syms::delete_file)) + { + parser.sym(); + parser.str(t1); + cs.delete_file(file_path(t1)); + } + else if (parser.symp(syms::delete_dir)) + { + parser.sym(); + parser.str(t1); + cs.delete_dir(file_path(t1)); + } + else if (parser.symp(syms::rename_file)) + { + parser.sym(); + parser.str(t1); + parser.esym(syms::to); + parser.str(t2); + cs.rename_file(file_path(t1), + file_path(t2)); + } + else if (parser.symp(syms::rename_dir)) + { + parser.sym(); + parser.str(t1); + parser.esym(syms::to); + parser.str(t2); + cs.rename_dir(file_path(t1), + file_path(t2)); + } + else + break; + } + cs.rearrangement.check_sane(); +} + + +void +print_path_rearrangement(basic_io::printer & printer, + change_set::path_rearrangement const & pr) +{ + + pr.check_sane(); + for (std::set::const_iterator i = pr.deleted_files.begin(); + i != pr.deleted_files.end(); ++i) + { + basic_io::stanza st; + st.push_str_pair(syms::delete_file, (*i)()); + printer.print_stanza(st); + } + + for (std::set::const_iterator i = pr.deleted_dirs.begin(); + i != pr.deleted_dirs.end(); ++i) + { + basic_io::stanza st; + st.push_str_pair(syms::delete_dir, (*i)()); + printer.print_stanza(st); + } + + for (std::map::const_iterator i = pr.renamed_files.begin(); + i != pr.renamed_files.end(); ++i) + { + basic_io::stanza st; + st.push_str_pair(syms::rename_file, i->first()); + st.push_str_pair(syms::to, i->second()); + printer.print_stanza(st); + } + + for (std::map::const_iterator i = pr.renamed_dirs.begin(); + i != pr.renamed_dirs.end(); ++i) + { + basic_io::stanza st; + st.push_str_pair(syms::rename_dir, i->first()); + st.push_str_pair(syms::to, i->second()); + printer.print_stanza(st); + } + + for (std::set::const_iterator i = pr.added_files.begin(); + i != pr.added_files.end(); ++i) + { + basic_io::stanza st; + st.push_str_pair(syms::add_file, (*i)()); + printer.print_stanza(st); + } +} + +void +parse_change_set(basic_io::parser & parser, + change_set & cs) +{ + clear_change_set(cs); + + parse_path_rearrangement(parser, cs); + + while (parser.symp(syms::patch)) + { + std::string path, src, dst; + parser.sym(); + parser.str(path); + parser.esym(syms::from); + parser.hex(src); + parser.esym(syms::to); + parser.hex(dst); + cs.deltas.insert(std::make_pair(file_path(path), + std::make_pair(file_id(src), + file_id(dst)))); + } + cs.check_sane(); +} + +void +print_change_set(basic_io::printer & printer, + change_set const & cs) +{ + cs.check_sane(); + print_path_rearrangement(printer, cs.rearrangement); + + for (change_set::delta_map::const_iterator i = cs.deltas.begin(); + i != cs.deltas.end(); ++i) + { + basic_io::stanza st; + st.push_str_pair(syms::patch, i->first()); + st.push_hex_pair(syms::from, i->second.first.inner()()); + st.push_hex_pair(syms::to, i->second.second.inner()()); + printer.print_stanza(st); + } +} + +void +read_path_rearrangement(data const & dat, + change_set::path_rearrangement & re) +{ + std::istringstream iss(dat()); + basic_io::input_source src(iss, "path_rearrangement"); + basic_io::tokenizer tok(src); + basic_io::parser pars(tok); + change_set cs; + parse_path_rearrangement(pars, cs); + re = cs.rearrangement; + I(src.lookahead == EOF); + re.check_sane(); +} + +void +read_change_set(data const & dat, + change_set & cs) +{ + std::istringstream iss(dat()); + basic_io::input_source src(iss, "change_set"); + basic_io::tokenizer tok(src); + basic_io::parser pars(tok); + parse_change_set(pars, cs); + I(src.lookahead == EOF); + cs.check_sane(); +} + +void +write_change_set(change_set const & cs, + data & dat) +{ + cs.check_sane(); + std::ostringstream oss; + basic_io::printer pr(oss); + print_change_set(pr, cs); + dat = data(oss.str()); +} + +void +write_path_rearrangement(change_set::path_rearrangement const & re, + data & dat) +{ + re.check_sane(); + std::ostringstream oss; + basic_io::printer pr(oss); + print_path_rearrangement(pr, re); + dat = data(oss.str()); +} + +#ifdef BUILD_UNIT_TESTS +#include "unit_tests.hh" +#include "sanity.hh" + +static void dump_change_set(std::string const & ctx, + change_set const & cs) +{ + data tmp; + write_change_set(cs, tmp); + L(F("[begin changeset %s]\n") % ctx); + L(F("%s") % tmp); + L(F("[end changeset %s]\n") % ctx); +} + +static void +spin_change_set(change_set const & cs) +{ + data tmp1; + change_set cs1; + write_change_set(cs, tmp1); + dump_change_set("normalized", cs); + read_change_set(tmp1, cs1); + for (int i = 0; i < 5; ++i) + { + data tmp2; + change_set cs2; + write_change_set(cs1, tmp2); + BOOST_CHECK(tmp1 == tmp2); + read_change_set(tmp2, cs2); + BOOST_CHECK(cs1.rearrangement == cs2.rearrangement); + BOOST_CHECK(cs1.deltas == cs2.deltas); + cs1 = cs2; + } +} + +static void +disjoint_merge_test(std::string const & ab_str, + std::string const & ac_str) +{ + change_set ab, ac, bm, cm; + + app_state app; + + L(F("beginning disjoint_merge_test\n")); + + read_change_set(data(ab_str), ab); + read_change_set(data(ac_str), ac); + + manifest_map dummy; + + merge_provider merger(app, dummy, dummy, dummy); + merge_change_sets(ab, ac, bm, cm, merger, app); + + dump_change_set("ab", ab); + dump_change_set("ac", ac); + dump_change_set("bm", bm); + dump_change_set("cm", cm); + + BOOST_CHECK(bm.rearrangement == ac.rearrangement); + BOOST_CHECK(cm.rearrangement == ab.rearrangement); + + L(F("finished disjoint_merge_test\n")); +} + +static void +disjoint_merge_tests() +{ + disjoint_merge_test + ("rename_file \"foo\"\n" + " to \"bar\"\n", + + "rename_file \"apple\"\n" + " to \"orange\"\n"); + + disjoint_merge_test + ("rename_file \"foo/a.txt\"\n" + " to \"bar/b.txt\"\n", + + "rename_file \"bar/c.txt\"\n" + " to \"baz/d.txt\"\n"); + + disjoint_merge_test + ("patch \"foo/file.txt\"\n" + " from [c6a4a6196bb4a744207e1a6e90273369b8c2e925]\n" + " to [fe18ec0c55cbc72e4e51c58dc13af515a2f3a892]\n", + + "rename_file \"foo/file.txt\"\n" + " to \"foo/apple.txt\"\n"); + + disjoint_merge_test + ( + "rename_file \"apple.txt\"\n" + " to \"pear.txt\"\n" + "\n" + "patch \"foo.txt\"\n" + " from [c6a4a6196bb4a744207e1a6e90273369b8c2e925]\n" + " to [fe18ec0c55cbc72e4e51c58dc13af515a2f3a892]\n", + + "rename_file \"foo.txt\"\n" + " to \"bar.txt\"\n" + "\n" + "patch \"apple.txt\"\n" + " from [fe18ec0c55cbc72e4e51c58dc13af515a2f3a892]\n" + " to [435e816c30263c9184f94e7c4d5aec78ea7c028a]\n"); +} + +static void +basic_change_set_test() +{ + try + { + + change_set cs; + cs.delete_file(file_path("usr/lib/zombie")); + cs.add_file(file_path("usr/bin/cat"), + file_id(hexenc("adc83b19e793491b1c6ea0fd8b46cd9f32e592fc"))); + cs.add_file(file_path("usr/local/bin/dog"), + file_id(hexenc("adc83b19e793491b1c6ea0fd8b46cd9f32e592fc"))); + cs.rename_file(file_path("usr/local/bin/dog"), file_path("usr/bin/dog")); + cs.rename_file(file_path("usr/bin/cat"), file_path("usr/local/bin/chicken")); + cs.add_file(file_path("usr/lib/libc.so"), + file_id(hexenc("435e816c30263c9184f94e7c4d5aec78ea7c028a"))); + cs.rename_dir(file_path("usr/lib"), file_path("usr/local/lib")); + cs.apply_delta(file_path("usr/local/bin/chicken"), + file_id(hexenc("c6a4a6196bb4a744207e1a6e90273369b8c2e925")), + file_id(hexenc("fe18ec0c55cbc72e4e51c58dc13af515a2f3a892"))); + spin_change_set(cs); + } + catch (informative_failure & exn) + { + L(F("informative failure: %s\n") % exn.what); + } + catch (std::runtime_error & exn) + { + L(F("runtime error: %s\n") % exn.what()); + } +} + +static void +neutralize_change_test() +{ + try + { + + change_set cs1, cs2, csa; + cs1.add_file(file_path("usr/lib/zombie"), + file_id(hexenc("adc83b19e793491b1c6ea0fd8b46cd9f32e592fc"))); + cs1.rename_file(file_path("usr/lib/apple"), + file_path("usr/lib/orange")); + cs1.rename_dir(file_path("usr/lib/moose"), + file_path("usr/lib/squirrel")); + + dump_change_set("neutralize target", cs1); + + cs2.delete_file(file_path("usr/lib/zombie")); + cs2.rename_file(file_path("usr/lib/orange"), + file_path("usr/lib/apple")); + cs2.rename_dir(file_path("usr/lib/squirrel"), + file_path("usr/lib/moose")); + + dump_change_set("neutralizer", cs2); + + concatenate_change_sets(cs1, cs2, csa); + + dump_change_set("neutralized", csa); + + tid_source ts; + path_analysis analysis; + analyze_rearrangement(csa.rearrangement, analysis, ts); + + BOOST_CHECK(analysis.first.empty()); + BOOST_CHECK(analysis.second.empty()); + } + catch (informative_failure & exn) + { + L(F("informative failure: %s\n") % exn.what); + } + catch (std::runtime_error & exn) + { + L(F("runtime error: %s\n") % exn.what()); + } +} + +static void +non_interfering_change_test() +{ + try + { + + change_set cs1, cs2, csa; + cs1.delete_file(file_path("usr/lib/zombie")); + cs1.rename_file(file_path("usr/lib/orange"), + file_path("usr/lib/apple")); + cs1.rename_dir(file_path("usr/lib/squirrel"), + file_path("usr/lib/moose")); + + dump_change_set("non-interference A", cs1); + + cs2.add_file(file_path("usr/lib/zombie"), + file_id(hexenc("adc83b19e793491b1c6ea0fd8b46cd9f32e592fc"))); + cs2.rename_file(file_path("usr/lib/pear"), + file_path("usr/lib/orange")); + cs2.rename_dir(file_path("usr/lib/spy"), + file_path("usr/lib/squirrel")); + + dump_change_set("non-interference B", cs2); + + concatenate_change_sets(cs1, cs2, csa); + + dump_change_set("non-interference combined", csa); + + tid_source ts; + path_analysis analysis; + analyze_rearrangement(csa.rearrangement, analysis, ts); + + BOOST_CHECK(analysis.first.size() == 8); + BOOST_CHECK(analysis.second.size() == 8); + } + catch (informative_failure & exn) + { + L(F("informative failure: %s\n") % exn.what); + } + catch (std::runtime_error & exn) + { + L(F("runtime error: %s\n") % exn.what()); + } +} + +static const file_id fid_null; +static const file_id fid1 = file_id(hexenc("aaaa3831e5eb74e6cd50b94f9e99e6a14d98d702")); +static const file_id fid2 = file_id(hexenc("bbbb3831e5eb74e6cd50b94f9e99e6a14d98d702")); +static const file_id fid3 = file_id(hexenc("cccc3831e5eb74e6cd50b94f9e99e6a14d98d702")); + +typedef enum { in_a, in_b } which_t; +struct bad_concatenate_change_test +{ + change_set a; + change_set b; + change_set combined; + change_set concat; + bool do_combine; + std::string ident; + bad_concatenate_change_test(char const *file, int line) : + do_combine(false), + ident((F("%s:%d") % file % line).str()) + { + L(F("BEGINNING concatenation test %s\n") % ident); + } + + ~bad_concatenate_change_test() + { + L(F("FINISHING concatenation test %s\n") % ident); + } + + change_set & getit(which_t which) + { + if (which == in_a) + return a; + return b; + } + // Call combine() if you want to make sure that the things that are bad when + // concatenated are also bad when all stuck together into a single + // changeset. + void combine() { do_combine = true; } + void add_file(which_t which, std::string const & path, file_id fid = fid1) + { + getit(which).add_file(file_path(path), fid); + if (do_combine) + combined.add_file(file_path(path), fid); + } + void apply_delta(which_t which, std::string const & path, + file_id from_fid, + file_id to_fid) + { + getit(which).apply_delta(file_path(path), from_fid, to_fid); + if (do_combine) + combined.apply_delta(file_path(path), from_fid, to_fid); + } + void delete_file(which_t which, std::string const & path) + { + getit(which).delete_file(file_path(path)); + if (do_combine) + combined.delete_file(file_path(path)); + } + void delete_dir(which_t which, std::string const & path) + { + getit(which).delete_dir(file_path(path)); + if (do_combine) + combined.delete_dir(file_path(path)); + } + void rename_file(which_t which, + std::string const & path1, std::string const & path2) + { + getit(which).rename_file(file_path(path1), file_path(path2)); + if (do_combine) + combined.rename_file(file_path(path1), file_path(path2)); + } + void rename_dir(which_t which, + std::string const & path1, std::string const & path2) + { + getit(which).rename_dir(file_path(path1), file_path(path2)); + if (do_combine) + combined.rename_dir(file_path(path1), file_path(path2)); + } + void run() + { + L(F("RUNNING bad_concatenate_change_test %s\n") % ident); + try + { + dump_change_set("a", a); + dump_change_set("b", b); + } + catch (std::logic_error e) + { + L(F("skipping change_set printing, one or both are not sane\n")); + } + BOOST_CHECK_THROW(concatenate_change_sets(a, b, concat), + std::logic_error); + try { dump_change_set("concat", concat); } + catch (std::logic_error e) { L(F("concat change_set is insane\n")); } + if (do_combine) + { + L(F("Checking combined change set\n")); + change_set empty_cs, combined_concat; + BOOST_CHECK_THROW(concatenate_change_sets(combined, + empty_cs, + combined_concat), + std::logic_error); + try { dump_change_set("combined_concat", combined_concat); } + catch (std::logic_error e) { L(F("combined_concat is insane\n")); } + } + } + void run_both() + { + run(); + L(F("RUNNING bad_concatenate_change_test %s again backwards\n") % ident); + BOOST_CHECK_THROW(concatenate_change_sets(a, b, concat), + std::logic_error); + } +}; + +// We also do a number of just "bad change set" tests here, leaving one of +// them empty; this is because our main line of defense against bad +// change_sets, check_sane_history, does its checking by doing +// concatenations, so it's doing concatenations that we want to be sure does +// sanity checking. +static void +bad_concatenate_change_tests() +{ + // Files/directories can't be dropped on top of each other: + BOOST_CHECKPOINT("on top"); + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.add_file(in_a, "target"); + t.add_file(in_b, "target"); + t.run(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.combine(); + t.rename_file(in_a, "foo", "target"); + t.rename_file(in_b, "bar", "target"); + t.run(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.combine(); + t.rename_dir(in_a, "foo", "target"); + t.rename_dir(in_b, "bar", "target"); + t.run(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.combine(); + t.rename_file(in_a, "foo", "target"); + t.rename_dir(in_b, "bar", "target"); + t.run_both(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.combine(); + t.add_file(in_a, "target"); + t.rename_file(in_b, "foo", "target"); + t.run_both(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.combine(); + t.add_file(in_a, "target"); + t.rename_dir(in_b, "foo", "target"); + t.run_both(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.combine(); + t.add_file(in_a, "target/subfile"); + t.add_file(in_b, "target"); + t.run_both(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.combine(); + t.add_file(in_a, "target/subfile"); + t.rename_file(in_b, "foo", "target"); + t.run_both(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.add_file(in_a, "target/subfile"); + t.rename_dir(in_b, "foo", "target"); + t.run_both(); + } + // You can only delete something once + BOOST_CHECKPOINT("delete once"); + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.delete_file(in_a, "target"); + t.delete_file(in_b, "target"); + t.run(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.combine(); + t.delete_file(in_a, "target"); + t.delete_dir(in_b, "target"); + t.run_both(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.delete_dir(in_a, "target"); + t.delete_dir(in_b, "target"); + t.run(); + } + // You can't delete something that's not there anymore + BOOST_CHECKPOINT("delete after rename"); + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.combine(); + t.delete_file(in_a, "target"); + t.rename_file(in_b, "target", "foo"); + t.run_both(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.combine(); + t.delete_dir(in_a, "target"); + t.rename_file(in_b, "target", "foo"); + t.run_both(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.combine(); + t.delete_dir(in_a, "target"); + t.rename_dir(in_b, "target", "foo"); + t.run_both(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.combine(); + t.delete_file(in_a, "target"); + t.rename_dir(in_b, "target", "foo"); + t.run_both(); + } + // Files/directories can't be split in two + BOOST_CHECKPOINT("splitting files/dirs"); + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.rename_file(in_a, "target", "foo"); + t.rename_file(in_b, "target", "bar"); + t.run(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.rename_dir(in_a, "target", "foo"); + t.rename_dir(in_b, "target", "bar"); + t.run(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.combine(); + t.rename_dir(in_a, "target", "foo"); + t.rename_file(in_b, "target", "bar"); + t.run_both(); + } + // Files and directories are different + BOOST_CHECKPOINT("files != dirs"); + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.add_file(in_a, "target"); + t.delete_dir(in_b, "target"); + t.run(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.add_file(in_a, "target/subfile"); + t.delete_file(in_b, "target"); + t.run(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.add_file(in_a, "target/subfile"); + t.rename_file(in_b, "target", "foo"); + t.run(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.rename_file(in_a, "foo", "target"); + t.delete_dir(in_b, "target"); + t.run(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.combine(); + t.apply_delta(in_a, "target", fid1, fid2); + t.delete_dir(in_b, "target"); + t.run_both(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.rename_dir(in_a, "foo", "target"); + t.delete_file(in_b, "target"); + t.run(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.combine(); + t.rename_dir(in_a, "foo", "target"); + t.apply_delta(in_b, "target", fid1, fid2); + t.run_both(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.combine(); + t.apply_delta(in_a, "target", fid1, fid2); + t.rename_dir(in_b, "target", "bar"); + t.run_both(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.rename_file(in_a, "foo", "target"); + t.rename_dir(in_b, "target", "bar"); + t.run(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.rename_dir(in_a, "foo", "target"); + t.rename_file(in_b, "target", "bar"); + t.run(); + } + // Directories can't be patched, and patches can't be directoried... + BOOST_CHECKPOINT("can't patch dirs or vice versa"); + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.combine(); + t.add_file(in_a, "target/subfile"); + t.apply_delta(in_b, "target", fid_null, fid1); + t.run_both(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.combine(); + t.add_file(in_a, "target/subfile"); + t.apply_delta(in_b, "target", fid1, fid2); + t.run_both(); + } + // Deltas must be consistent + BOOST_CHECKPOINT("consistent deltas"); + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.apply_delta(in_a, "target", fid1, fid2); + t.apply_delta(in_b, "target", fid3, fid1); + t.run(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.add_file(in_a, "target", fid1); + t.apply_delta(in_b, "target", fid2, fid3); + t.run(); + } + // Can't have a null source id if it's not an add + BOOST_CHECKPOINT("null id on non-add"); + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.apply_delta(in_a, "target", fid_null, fid1); + t.run(); + } + // Can't have drop + delta with no add + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.combine(); + t.delete_file(in_a, "target"); + t.apply_delta(in_b, "target", fid1, fid2); + t.run(); + } + // Can't have a null destination id, ever, with or without a delete_file + BOOST_CHECKPOINT("no null destinations"); + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.delete_file(in_a, "target"); + t.apply_delta(in_a, "target", fid1, fid_null); + t.run(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.apply_delta(in_a, "target", fid1, fid_null); + t.run(); + } + // Can't have a patch with src == dst + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.apply_delta(in_a, "target", fid1, fid1); + t.run(); + } +} + +// FIXME: Things that should be added, but can't be trivially because they +// assert too early: +// anything repeated -- multiple adds, multiple deletes, multiple deltas +// including in one changeset, for both files and dirs +// (probably should put these in strings, and do BOOST_CHECK_THROWS in the +// parser?) + +// FIXME: also need tests for the invariants in apply_manifest (and any +// invariants that should be there but aren't, of course) + +void +add_change_set_tests(test_suite * suite) +{ + I(suite); + suite->add(BOOST_TEST_CASE(&basic_change_set_test)); + suite->add(BOOST_TEST_CASE(&neutralize_change_test)); + suite->add(BOOST_TEST_CASE(&non_interfering_change_test)); + suite->add(BOOST_TEST_CASE(&disjoint_merge_tests)); + suite->add(BOOST_TEST_CASE(&bad_concatenate_change_tests)); +} + + +#endif // BUILD_UNIT_TESTS ============================================================ --- tests/(minor)_test_a_merge_3/right 63ad35cd3955bfa681b76b31d7f2fd745e84f654 +++ tests/(minor)_test_a_merge_3/right 63ad35cd3955bfa681b76b31d7f2fd745e84f654 @@ -0,0 +1,3532 @@ +// copyright (C) 2004 graydon hoare +// all rights reserved. +// licensed to the public under the terms of the GNU GPL (>= 2) +// see the file COPYING for details + +// this is how you "ask for" the C99 constant constructor macros. *and* +// you have to do so before any other files accidentally include +// stdint.h. awesome. +#define __STDC_CONSTANT_MACROS + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "basic_io.hh" +#include "change_set.hh" +#include "constants.hh" +#include "diff_patch.hh" +#include "file_io.hh" +#include "interner.hh" +#include "numeric_vocab.hh" +#include "sanity.hh" +#include "smap.hh" + +// our analyses in this file happen on one of two families of +// related structures: a path_analysis or a directory_map. +// +// a path_analysis corresponds exactly to a normalized +// path_rearrangement; they are two ways of writing the +// same information +// +// the path_analysis stores two path_states. each path_state is a map from +// transient identifiers (tids) to items. each item represents a semantic +// element of a filesystem which has a type (file or directory), a name, +// and a parent link (another tid). tids should be unique across a +// path_analysis. + +typedef enum { ptype_directory, ptype_file } ptype; +typedef u32 tid; +static tid root_tid = 0; + +struct +tid_source +{ + tid ctr; + tid_source() : ctr(root_tid + 1) {} + tid next() { I(ctr != UINT32_C(0xffffffff)); return ctr++; } +}; + +typedef u32 path_component; + +struct +path_component_maker +{ + path_component make(std::string const & s) + { + bool is_new; + path_component pc = intern.intern(s, is_new); + // sanity check new ones + if (is_new) + { + // must be a valid file_path + file_path tmp_file_path = file_path(s); + // must contain exactly 0 or 1 components + fs::path tmp_fs_path = mkpath(s); + I(null_name(s) || ++(tmp_fs_path.begin()) == tmp_fs_path.end()); + } + return pc; + } + std::string lookup(path_component pc) const + { + return intern.lookup(pc); + } +private: + interner intern; +}; + +static path_component_maker the_path_component_maker; + +static path_component +make_null_component() +{ + static path_component null_pc = the_path_component_maker.make(""); + return null_pc; +} + +static bool +null_name(path_component pc) +{ + return pc == make_null_component(); +} + +struct +path_item +{ + tid parent; + ptype ty; + path_component name; + path_item() {} + path_item(tid p, ptype t, path_component n); + path_item(path_item const & other); + path_item const & operator=(path_item const & other); + bool operator==(path_item const & other) const; +}; + + +template struct identity +{ + size_t operator()(T const & v) const + { + return static_cast(v); + } +}; + +typedef smap path_state; +typedef smap state_renumbering; +typedef std::pair path_analysis; + +// nulls and tests + +static file_id null_ident; + +// a directory_map is a more "normal" representation of a directory tree, +// which you can traverse more conveniently from root to tip +// +// tid -> [ name -> (ptype, tid), +// name -> (ptype, tid), +// ... ] +// +// tid -> [ name -> (ptype, tid), +// name -> (ptype, tid), +// ... ] + +typedef smap< path_component, std::pair > directory_node; +typedef smap > directory_map; + +static path_component +directory_entry_name(directory_node::const_iterator const & i) +{ + return i->first; +} + +static ptype +directory_entry_type(directory_node::const_iterator const & i) +{ + return i->second.first; +} + +static tid +directory_entry_tid(directory_node::const_iterator const & i) +{ + return i->second.second; +} + +void +change_set::add_file(file_path const & a) +{ + I(rearrangement.added_files.find(a) == rearrangement.added_files.end()); + rearrangement.added_files.insert(a); +} + +void +change_set::add_file(file_path const & a, file_id const & ident) +{ + I(rearrangement.added_files.find(a) == rearrangement.added_files.end()); + I(deltas.find(a) == deltas.end()); + rearrangement.added_files.insert(a); + deltas.insert(std::make_pair(a, std::make_pair(null_ident, ident))); +} + +void +change_set::apply_delta(file_path const & path, + file_id const & src, + file_id const & dst) +{ + I(deltas.find(path) == deltas.end()); + deltas.insert(std::make_pair(path, std::make_pair(src, dst))); +} + +void +change_set::delete_file(file_path const & d) +{ + I(rearrangement.deleted_files.find(d) == rearrangement.deleted_files.end()); + rearrangement.deleted_files.insert(d); +} + +void +change_set::delete_dir(file_path const & d) +{ + I(rearrangement.deleted_dirs.find(d) == rearrangement.deleted_dirs.end()); + rearrangement.deleted_dirs.insert(d); +} + +void +change_set::rename_file(file_path const & a, file_path const & b) +{ + I(rearrangement.renamed_files.find(a) == rearrangement.renamed_files.end()); + rearrangement.renamed_files.insert(std::make_pair(a,b)); +} + +void +change_set::rename_dir(file_path const & a, file_path const & b) +{ + I(rearrangement.renamed_dirs.find(a) == rearrangement.renamed_dirs.end()); + rearrangement.renamed_dirs.insert(std::make_pair(a,b)); +} + + +bool +change_set::path_rearrangement::operator==(path_rearrangement const & other) const +{ + return deleted_files == other.deleted_files && + deleted_dirs == other.deleted_dirs && + renamed_files == other.renamed_files && + renamed_dirs == other.renamed_dirs && + added_files == other.added_files; +} + +bool +change_set::path_rearrangement::empty() const +{ + return deleted_files.empty() && + deleted_dirs.empty() && + renamed_files.empty() && + renamed_dirs.empty() && + added_files.empty(); +} + +bool +change_set::path_rearrangement::has_added_file(file_path const & file) const +{ + return added_files.find(file) != added_files.end(); +} + +bool +change_set::path_rearrangement::has_deleted_file(file_path const & file) const +{ + return deleted_files.find(file) != deleted_files.end(); +} + +bool +change_set::path_rearrangement::has_renamed_file_dst(file_path const & file) const +{ + // FIXME: this is inefficient, but improvements would require a different + // structure for renamed_files (or perhaps a second reverse map). For now + // we'll assume that few files will be renamed per changeset. + for (std::map::const_iterator rf = renamed_files.begin(); + rf != renamed_files.end(); ++rf) + if (rf->second == file) + return true; + return false; +} + +bool +change_set::path_rearrangement::has_renamed_file_src(file_path const & file) const +{ + return renamed_files.find(file) != renamed_files.end(); +} + +bool +change_set::empty() const +{ + return deltas.empty() && rearrangement.empty(); +} + +bool +change_set::operator==(change_set const & other) const +{ + return rearrangement == other.rearrangement && + deltas == other.deltas; +} + + +// simple accessors + +inline tid const & +path_item_parent(path_item const & p) +{ + return p.parent; +} + +inline ptype const & +path_item_type(path_item const & p) +{ + return p.ty; +} + +inline path_component +path_item_name(path_item const & p) +{ + return p.name; +} + +inline tid +path_state_tid(path_state::const_iterator i) +{ + return i->first; +} + +inline path_item const & +path_state_item(path_state::const_iterator i) +{ + return i->second; +} + + + +// structure dumping +/* + +static void +dump_renumbering(std::string const & s, + state_renumbering const & r) +{ + L(F("BEGIN dumping renumbering '%s'\n") % s); + for (state_renumbering::const_iterator i = r.begin(); + i != r.end(); ++i) + { + L(F("%d -> %d\n") % i->first % i->second); + } + L(F("END dumping renumbering '%s'\n") % s); +} + +static void +dump_state(std::string const & s, + path_state const & st) +{ + L(F("BEGIN dumping state '%s'\n") % s); + for (path_state::const_iterator i = st.begin(); + i != st.end(); ++i) + { + L(F("state '%s': tid %d, parent %d, type %s, name %s\n") + % s + % path_state_tid(i) + % path_item_parent(path_state_item(i)) + % (path_item_type(path_state_item(i)) == ptype_directory ? "dir" : "file") + % the_path_component_maker.lookup(path_item_name(path_state_item(i)))); + } + L(F("END dumping state '%s'\n") % s); +} + +static void +dump_analysis(std::string const & s, + path_analysis const & t) +{ + L(F("BEGIN dumping tree '%s'\n") % s); + dump_state(s + " first", t.first); + dump_state(s + " second", t.second); + L(F("END dumping tree '%s'\n") % s); +} + +*/ + + +// sanity checking + +static void +check_sets_disjoint(std::set const & a, + std::set const & b) +{ + std::set isect; + std::set_intersection(a.begin(), a.end(), + b.begin(), b.end(), + std::inserter(isect, isect.begin())); + if (!global_sanity.relaxed) + { + I(isect.empty()); + } +} + +change_set::path_rearrangement::path_rearrangement(path_rearrangement const & other) +{ + other.check_sane(); + deleted_files = other.deleted_files; + deleted_dirs = other.deleted_dirs; + renamed_files = other.renamed_files; + renamed_dirs = other.renamed_dirs; + added_files = other.added_files; +} + +change_set::path_rearrangement const & +change_set::path_rearrangement::operator=(path_rearrangement const & other) +{ + other.check_sane(); + deleted_files = other.deleted_files; + deleted_dirs = other.deleted_dirs; + renamed_files = other.renamed_files; + renamed_dirs = other.renamed_dirs; + added_files = other.added_files; + return *this; +} + +static void +extract_pairs_and_insert(std::map const & in, + std::set & firsts, + std::set & seconds) +{ + for (std::map::const_iterator i = in.begin(); + i != in.end(); ++i) + { + firsts.insert(i->first); + seconds.insert(i->second); + } +} + +template +static void +extract_first(std::map const & m, std::set & s) +{ + s.clear(); + for (typename std::map::const_iterator i = m.begin(); + i != m.end(); ++i) + { + s.insert(i->first); + } +} + +static void +extract_killed(path_analysis const & a, + std::set & killed); + + +static void +check_no_deltas_on_killed_files(path_analysis const & pa, + change_set::delta_map const & del) +{ + std::set killed; + std::set delta_paths; + + extract_killed(pa, killed); + extract_first(del, delta_paths); + check_sets_disjoint(killed, delta_paths); +} + +static void +check_delta_entries_not_directories(path_analysis const & pa, + change_set::delta_map const & dels); + +void +analyze_rearrangement(change_set::path_rearrangement const & pr, + path_analysis & pa, + tid_source & ts); + +void +sanity_check_path_analysis(path_analysis const & pr); + +void +change_set::path_rearrangement::check_sane() const +{ + delta_map del; + this->check_sane(del); +} + +void +change_set::path_rearrangement::check_sane(delta_map const & deltas) const +{ + tid_source ts; + path_analysis pa; + analyze_rearrangement(*this, pa, ts); + sanity_check_path_analysis (pa); + + check_no_deltas_on_killed_files(pa, deltas); + check_delta_entries_not_directories(pa, deltas); + + // FIXME: extend this as you manage to think of more invariants + // which are cheap enough to check at this level. + std::set renamed_srcs, renamed_dsts; + extract_pairs_and_insert(renamed_files, renamed_srcs, renamed_dsts); + extract_pairs_and_insert(renamed_dirs, renamed_srcs, renamed_dsts); + + // Files cannot be split nor joined by renames. + I(renamed_files.size() + renamed_dirs.size() == renamed_srcs.size()); + I(renamed_files.size() + renamed_dirs.size() == renamed_dsts.size()); + + check_sets_disjoint(deleted_files, deleted_dirs); + check_sets_disjoint(deleted_files, renamed_srcs); + check_sets_disjoint(deleted_dirs, renamed_srcs); + + check_sets_disjoint(added_files, renamed_dsts); +} + +change_set::change_set(change_set const & other) +{ + other.check_sane(); + rearrangement = other.rearrangement; + deltas = other.deltas; +} + +change_set const &change_set::operator=(change_set const & other) +{ + other.check_sane(); + rearrangement = other.rearrangement; + deltas = other.deltas; + return *this; +} + +void +change_set::check_sane() const +{ + // FIXME: extend this as you manage to think of more invariants + // which are cheap enough to check at this level. + + rearrangement.check_sane(this->deltas); + + for (std::set::const_iterator i = rearrangement.added_files.begin(); + i != rearrangement.added_files.end(); ++i) + { + delta_map::const_iterator j = deltas.find(*i); + if (!global_sanity.relaxed) + { + I(j != deltas.end()); + I(null_id(delta_entry_src(j))); + I(!null_id(delta_entry_dst(j))); + } + } + + for (delta_map::const_iterator i = deltas.begin(); + i != deltas.end(); ++i) + { + if (!global_sanity.relaxed) + { + I(!null_name(delta_entry_path(i))); + I(!null_id(delta_entry_dst(i))); + I(!(delta_entry_src(i) == delta_entry_dst(i))); + if (null_id(delta_entry_src(i))) + I(rearrangement.added_files.find(delta_entry_path(i)) + != rearrangement.added_files.end()); + } + } + +} + +static void +sanity_check_path_item(path_item const & pi) +{ +} + +static void +confirm_proper_tree(path_state const & ps) +{ + smap confirmed; + I(ps.find(root_tid) == ps.end()); + for (path_state::const_iterator i = ps.begin(); i != ps.end(); ++i) + { + tid curr = i->first; + path_item item = i->second; + smap ancs; + + while (confirmed.find(curr) == confirmed.end()) + { + sanity_check_path_item(item); + I(ancs.find(curr) == ancs.end()); + ancs.insert(std::make_pair(curr,true)); + if (path_item_parent(item) == root_tid) + break; + else + { + curr = path_item_parent(item); + path_state::const_iterator j = ps.find(curr); + I(j != ps.end()); + + // if we're null, our parent must also be null + if (null_name(item.name)) + I(null_name(path_state_item(j).name)); + + item = path_state_item(j); + I(path_item_type(item) == ptype_directory); + } + } + std::copy(ancs.begin(), ancs.end(), + inserter(confirmed, confirmed.begin())); + } + I(confirmed.find(root_tid) == confirmed.end()); +} + +static void +confirm_unique_entries_in_directories(path_state const & ps) +{ + smap, bool> entries; + for (path_state::const_iterator i = ps.begin(); i != ps.end(); ++i) + { + if (null_name(path_item_name(i->second))) + { + I(path_item_parent(i->second) == root_tid); + continue; + } + + std::pair p = std::make_pair(path_item_parent(i->second), + path_item_name(i->second)); + I(entries.find(p) == entries.end()); + entries.insert(std::make_pair(p,true)); + } +} + +static void +sanity_check_path_state(path_state const & ps) +{ + confirm_proper_tree(ps); + confirm_unique_entries_in_directories(ps); +} + +path_item::path_item(tid p, ptype t, path_component n) + : parent(p), ty(t), name(n) +{ + sanity_check_path_item(*this); +} + +path_item::path_item(path_item const & other) + : parent(other.parent), ty(other.ty), name(other.name) +{ + sanity_check_path_item(*this); +} + +path_item const & path_item::operator=(path_item const & other) +{ + parent = other.parent; + ty = other.ty; + name = other.name; + sanity_check_path_item(*this); + return *this; +} + +bool path_item::operator==(path_item const & other) const +{ + return this->parent == other.parent && + this->ty == other.ty && + this->name == other.name; +} + + +static void +check_states_agree(path_state const & p1, + path_state const & p2) +{ + path_analysis pa; + pa.first = p1; + pa.second = p2; + // dump_analysis("agreement", pa); + for (path_state::const_iterator i = p1.begin(); i != p1.end(); ++i) + { + path_state::const_iterator j = p2.find(i->first); + I(j != p2.end()); + I(path_item_type(i->second) == path_item_type(j->second)); + // I(! (null_name(path_item_name(i->second)) + // && + // null_name(path_item_name(j->second)))); + } +} + +void +sanity_check_path_analysis(path_analysis const & pr) +{ + sanity_check_path_state(pr.first); + sanity_check_path_state(pr.second); + check_states_agree(pr.first, pr.second); + check_states_agree(pr.second, pr.first); +} + + +// construction helpers + +static boost::shared_ptr +new_dnode() +{ + return boost::shared_ptr(new directory_node()); +} + +static boost::shared_ptr +dnode(directory_map & dir, tid t) +{ + boost::shared_ptr node; + directory_map::const_iterator dirent = dir.find(t); + if (dirent == dir.end()) + { + node = new_dnode(); + dir.insert(std::make_pair(t, node)); + } + else + node = dirent->second; + return node; +} + + +// This function takes a vector of path components and joins them into a +// single file_path. Valid input may be a single-element vector whose sole +// element is the empty path component (""); this represents the null path, +// which we use to represent non-existent files. Alternatively, input may be +// a multi-element vector, in which case all elements of the vector are +// required to be non-null. The following are valid inputs (with strings +// replaced by their interned version, of course): +// - [""] +// - ["foo"] +// - ["foo", "bar"] +// The following are not: +// - [] +// - ["foo", ""] +// - ["", "bar"] +static void +compose_path(std::vector const & names, + file_path & path) +{ + try + { + std::vector::const_iterator i = names.begin(); + I(i != names.end()); + fs::path p = mkpath(the_path_component_maker.lookup(*i)); + ++i; + if (names.size() > 1) + I(!null_name(*i)); + for ( ; i != names.end(); ++i) + { + I(!null_name(*i)); + p /= mkpath(the_path_component_maker.lookup(*i)); + } + path = file_path(p.string()); + } + catch (std::runtime_error &e) + { + throw informative_failure(e.what()); + } +} + +static void +get_full_path(path_state const & state, + tid t, + std::vector & pth) +{ + std::vector tmp; + while(t != root_tid) + { + path_state::const_iterator i = state.find(t); + I(i != state.end()); + tmp.push_back(path_item_name(i->second)); + t = path_item_parent(i->second); + } + pth.clear(); + std::copy(tmp.rbegin(), tmp.rend(), inserter(pth, pth.begin())); +} + +static void +get_full_path(path_state const & state, + tid t, + file_path & pth) +{ + std::vector tmp; + get_full_path(state, t, tmp); + // L(F("got %d-entry path for tid %d\n") % tmp.size() % t); + compose_path(tmp, pth); +} + +static void +clear_rearrangement(change_set::path_rearrangement & pr) +{ + pr.deleted_files.clear(); + pr.deleted_dirs.clear(); + pr.renamed_files.clear(); + pr.renamed_dirs.clear(); + pr.added_files.clear(); +} + +static void +clear_change_set(change_set & cs) +{ + clear_rearrangement(cs.rearrangement); + cs.deltas.clear(); +} + +static void +compose_rearrangement(path_analysis const & pa, + change_set::path_rearrangement & pr) +{ + clear_rearrangement(pr); + + for (path_state::const_iterator i = pa.first.begin(); + i != pa.first.end(); ++i) + { + tid curr(path_state_tid(i)); + std::vector old_name, new_name; + file_path old_path, new_path; + + path_state::const_iterator j = pa.second.find(curr); + I(j != pa.second.end()); + path_item old_item(path_state_item(i)); + path_item new_item(path_state_item(j)); + + // compose names + if (!null_name(path_item_name(old_item))) + { + get_full_path(pa.first, curr, old_name); + compose_path(old_name, old_path); + } + + if (!null_name(path_item_name(new_item))) + { + get_full_path(pa.second, curr, new_name); + compose_path(new_name, new_path); + } + + if (old_path == new_path) + { + L(F("skipping preserved %s %d : '%s'\n") + % (path_item_type(old_item) == ptype_directory ? "directory" : "file") + % curr % old_path); + continue; + } + + L(F("analyzing %s %d : '%s' -> '%s'\n") + % (path_item_type(old_item) == ptype_directory ? "directory" : "file") + % curr % old_path % new_path); + + if (null_name(path_item_name(old_item))) + { + // an addition (which must be a file, not a directory) + I(! null_name(path_item_name(new_item))); + I(path_item_type(new_item) != ptype_directory); + pr.added_files.insert(new_path); + } + else if (null_name(path_item_name(new_item))) + { + // a deletion + I(! null_name(path_item_name(old_item))); + switch (path_item_type(new_item)) + { + case ptype_directory: + pr.deleted_dirs.insert(old_path); + break; + case ptype_file: + pr.deleted_files.insert(old_path); + break; + } + } + else + { + // a generic rename + switch (path_item_type(new_item)) + { + case ptype_directory: + pr.renamed_dirs.insert(std::make_pair(old_path, new_path)); + break; + case ptype_file: + pr.renamed_files.insert(std::make_pair(old_path, new_path)); + break; + } + } + } +} + + + + +// +// this takes a path of the form +// +// "p[0]/p[1]/.../p[n-1]/p[n]" +// +// and fills in a vector of paths corresponding to p[0] ... p[n-1], +// along with a separate "leaf path" for element p[n]. +// + +static void +split_path(file_path const & p, + std::vector & components) +{ + components.clear(); + fs::path tmp = mkpath(p()); + for (fs::path::iterator i = tmp.begin(); i != tmp.end(); ++i) + components.push_back(the_path_component_maker.make(*i)); +} + +static void +split_path(file_path const & p, + std::vector & prefix, + path_component & leaf_path) +{ + split_path(p, prefix); + I(prefix.size() > 0); + leaf_path = prefix.back(); + prefix.pop_back(); +} + +static bool +lookup_path(std::vector const & pth, + directory_map const & dir, + tid & t) +{ + t = root_tid; + for (std::vector::const_iterator i = pth.begin(); + i != pth.end(); ++i) + { + directory_map::const_iterator dirent = dir.find(t); + if (dirent != dir.end()) + { + boost::shared_ptr node = dirent->second; + directory_node::const_iterator entry = node->find(*i); + if (entry == node->end()) + return false; + t = directory_entry_tid(entry); + } + else + return false; + } + return true; +} + +static bool +lookup_path(file_path const & pth, + directory_map const & dir, + tid & t) +{ + std::vector vec; + split_path(pth, vec); + return lookup_path(vec, dir, t); +} + +static tid +ensure_entry(directory_map & dmap, + path_state & state, + tid dir_tid, + ptype entry_ty, + path_component entry, + tid_source & ts) +{ + I(! null_name(entry)); + + if (dir_tid != root_tid) + { + path_state::const_iterator parent = state.find(dir_tid); + I( parent != state.end()); + + // if our parent is null, we immediately become null too, and attach to + // the root node (where all null entries reside) + if (null_name(path_item_name(path_state_item(parent)))) + { + tid new_tid = ts.next(); + state.insert(std::make_pair(new_tid, path_item(root_tid, entry_ty, make_null_component()))); + return new_tid; + } + } + + boost::shared_ptr node = dnode(dmap, dir_tid); + directory_node::const_iterator node_entry = node->find(entry); + + if (node_entry != node->end()) + { + I(node_entry->second.first == entry_ty); + return node_entry->second.second; + } + else + { + tid new_tid = ts.next(); + state.insert(std::make_pair(new_tid, path_item(dir_tid, entry_ty, entry))); + node->insert(std::make_pair(entry, std::make_pair(entry_ty, new_tid))); + return new_tid; + } +} + +static tid +ensure_dir_in_map (std::vector pth, + directory_map & dmap, + path_state & state, + tid_source & ts) +{ + tid dir_tid = root_tid; + for (std::vector::const_iterator p = pth.begin(); + p != pth.end(); ++p) + { + dir_tid = ensure_entry(dmap, state, dir_tid, + ptype_directory, *p, ts); + } + return dir_tid; +} + +static tid +ensure_dir_in_map (file_path const & path, + directory_map & dmap, + path_state & state, + tid_source & ts) +{ + std::vector components; + split_path(path, components); + return ensure_dir_in_map(components, dmap, state, ts); +} + +static tid +ensure_file_in_map (file_path const & path, + directory_map & dmap, + path_state & state, + tid_source & ts) +{ + std::vector prefix; + path_component leaf_path; + split_path(path, prefix, leaf_path); + + I(! null_name(leaf_path)); + tid dir_tid = ensure_dir_in_map(prefix, dmap, state, ts); + return ensure_entry(dmap, state, dir_tid, ptype_file, leaf_path, ts); +} + +static void +ensure_entries_exist (path_state const & self_state, + directory_map & other_dmap, + path_state & other_state, + tid_source & ts) +{ + for (path_state::const_iterator i = self_state.begin(); + i != self_state.end(); ++i) + { + if (other_state.find(path_state_tid(i)) != other_state.end()) + continue; + + if (null_name(path_item_name(path_state_item(i)))) + continue; + + file_path full; + get_full_path(self_state, path_state_tid(i), full); + switch (path_item_type(path_state_item(i))) + { + case ptype_directory: + ensure_dir_in_map(full, other_dmap, other_state, ts); + break; + + case ptype_file: + ensure_file_in_map(full, other_dmap, other_state, ts); + break; + } + } +} + + +static void +apply_state_renumbering(state_renumbering const & renumbering, + path_state & state) +{ + sanity_check_path_state(state); + path_state tmp(state); + state.clear(); + + for (path_state::const_iterator i = tmp.begin(); i != tmp.end(); ++i) + { + path_item item = path_state_item(i); + tid t = path_state_tid(i); + + state_renumbering::const_iterator j = renumbering.find(t); + if (j != renumbering.end()) + t = j->second; + + j = renumbering.find(item.parent); + if (j != renumbering.end()) + item.parent = j->second; + + state.insert(std::make_pair(t, item)); + } + sanity_check_path_state(state); +} + +static void +apply_state_renumbering(state_renumbering const & renumbering, + path_analysis & pa) +{ + apply_state_renumbering(renumbering, pa.first); + apply_state_renumbering(renumbering, pa.second); +} + + +// this takes a path in the path space defined by input_dir and rebuilds it +// in the path space defined by output_space, including any changes to +// parents in the path (rather than directly to the path leaf name). it +// therefore *always* succeeds; sometimes it does nothing if there's no +// affected parent, but you always get a rebuilt path in the output space. + +static void +reconstruct_path(file_path const & input, + directory_map const & input_dir, + path_state const & output_space, + file_path & output) +{ + std::vector vec; + std::vector rebuilt; + + // L(F("reconstructing path '%s' under analysis\n") % input); + + split_path(input, vec); + + tid t = root_tid; + std::vector::const_iterator pth = vec.begin(); + while (pth != vec.end()) + { + directory_map::const_iterator dirent = input_dir.find(t); + if (dirent == input_dir.end()) + break; + + boost::shared_ptr node = dirent->second; + directory_node::const_iterator entry = node->find(*pth); + if (entry == node->end()) + break; + + { + // check to see if this is the image of an added or deleted entry + // (i.e. null name in output space), if so it terminates our + // search. + path_state::const_iterator i = output_space.find(directory_entry_tid(entry)); + I(i != output_space.end()); + if (null_name(path_item_name(path_state_item(i)))) + { + // L(F("input path element '%s' is null in output space, mapping truncated\n") % *pth); + break; + } + } + + // L(F("resolved entry '%s' in reconstruction\n") % *pth); + ++pth; + t = directory_entry_tid(entry); + + if (directory_entry_type(entry) != ptype_directory) + break; + } + + get_full_path(output_space, t, rebuilt); + + while(pth != vec.end()) + { + // L(F("copying tail entry '%s' in reconstruction\n") % *pth); + rebuilt.push_back(*pth); + ++pth; + } + + compose_path(rebuilt, output); + // L(F("reconstructed path '%s' as '%s'\n") % input % output); +} + + +static void +build_directory_map(path_state const & state, + directory_map & dir) +{ + sanity_check_path_state(state); + dir.clear(); + // L(F("building directory map for %d entries\n") % state.size()); + for (path_state::const_iterator i = state.begin(); i != state.end(); ++i) + { + tid curr = path_state_tid(i); + path_item item = path_state_item(i); + tid parent = path_item_parent(item); + path_component name = path_item_name(item); + ptype type = path_item_type(item); + // L(F("adding entry %s (%s %d) to directory node %d\n") + // % name % (type == ptype_directory ? "dir" : "file") % curr % parent); + dnode(dir, parent)->insert(std::make_pair(name,std::make_pair(type, curr))); + + // also, make sure to add current node if it's a directory, even if + // there are no entries in it + if (type == ptype_directory) + dnode(dir, curr); + } +} + + +void +analyze_rearrangement(change_set::path_rearrangement const & pr, + path_analysis & pa, + tid_source & ts) +{ + directory_map first_map, second_map; + state_renumbering renumbering; + std::set damaged_in_first, damaged_in_second; + + pa.first.clear(); + pa.second.clear(); + + for (std::set::const_iterator f = pr.deleted_files.begin(); + f != pr.deleted_files.end(); ++f) + { + tid x = ensure_file_in_map(*f, first_map, pa.first, ts); + pa.second.insert(std::make_pair(x, path_item(root_tid, ptype_file, make_null_component()))); + damaged_in_first.insert(x); + } + + for (std::set::const_iterator d = pr.deleted_dirs.begin(); + d != pr.deleted_dirs.end(); ++d) + { + tid x = ensure_dir_in_map(*d, first_map, pa.first, ts); + pa.second.insert(std::make_pair(x, path_item(root_tid, ptype_directory, make_null_component()))); + damaged_in_first.insert(x); + } + + for (std::map::const_iterator rf = pr.renamed_files.begin(); + rf != pr.renamed_files.end(); ++rf) + { + tid a = ensure_file_in_map(rf->first, first_map, pa.first, ts); + tid b = ensure_file_in_map(rf->second, second_map, pa.second, ts); + I(renumbering.find(a) == renumbering.end()); + renumbering.insert(std::make_pair(b,a)); + damaged_in_first.insert(a); + damaged_in_second.insert(b); + } + + for (std::map::const_iterator rd = pr.renamed_dirs.begin(); + rd != pr.renamed_dirs.end(); ++rd) + { + tid a = ensure_dir_in_map(rd->first, first_map, pa.first, ts); + tid b = ensure_dir_in_map(rd->second, second_map, pa.second, ts); + I(renumbering.find(a) == renumbering.end()); + renumbering.insert(std::make_pair(b,a)); + damaged_in_first.insert(a); + damaged_in_second.insert(b); + } + + for (std::set::const_iterator a = pr.added_files.begin(); + a != pr.added_files.end(); ++a) + { + tid x = ensure_file_in_map(*a, second_map, pa.second, ts); + pa.first.insert(std::make_pair(x, path_item(root_tid, ptype_file, make_null_component()))); + damaged_in_second.insert(x); + } + + // we now have two states which probably have a number of entries in + // common. we know already of an interesting set of entries they have in + // common: all the renamed_foo entries. for each such renamed_foo(a,b) + // entry, we made an entry in our state_renumbering of the form b->a, + // while building the states. + + // dump_analysis("analyzed", pa); + // dump_renumbering("first", renumbering); + apply_state_renumbering(renumbering, pa.second); + build_directory_map(pa.first, first_map); + build_directory_map(pa.second, second_map); + renumbering.clear(); + // dump_analysis("renumbered once", pa); + + // that only gets us half way, though: + // + // - every object which was explicitly moved (thus stayed alive) has been + // renumbered in re.second to have the same tid as in re.first + // + // - every object which was merely mentionned in passing -- say due to + // being an intermediate directory in a path -- and was not moved, still + // has differing tids in re.first and re.second (or worse, may only + // even have an *entry* in one of them) + // + // the second point here is what we need to correct: if a path didn't + // move, wasn't destroyed, and wasn't added, we want it to have the same + // tid. but that's a relatively easy condition to check; we've been + // keeping sets of all the objects which were damaged on each side of + // this business anyways. + + + // pass #1 makes sure that all the entries in each state *exist* within + // the other state, even if they have the wrong numbers + + ensure_entries_exist (pa.first, second_map, pa.second, ts); + ensure_entries_exist (pa.second, first_map, pa.first, ts); + + // pass #2 identifies common un-damaged elements from 2->1 and inserts + // renumberings + + for (path_state::const_iterator i = pa.second.begin(); + i != pa.second.end(); ++i) + { + tid first_tid, second_tid; + second_tid = path_state_tid(i); + file_path full; + if (pa.first.find(second_tid) != pa.first.end()) + continue; + get_full_path(pa.second, second_tid, full); + if (damaged_in_second.find(second_tid) != damaged_in_second.end()) + continue; + if (null_name(path_item_name(path_state_item(i)))) + continue; + I(lookup_path(full, first_map, first_tid)); + renumbering.insert(std::make_pair(second_tid, first_tid)); + } + + // dump_renumbering("second", renumbering); + apply_state_renumbering(renumbering, pa.second); + // dump_analysis("renumbered again", pa); + + // that should be the whole deal; if we don't have consensus at this + // point we have done something wrong. + sanity_check_path_analysis (pa); +} + +void +normalize_path_rearrangement(change_set::path_rearrangement & norm) +{ + path_analysis tmp; + tid_source ts; + + analyze_rearrangement(norm, tmp, ts); + clear_rearrangement(norm); + compose_rearrangement(tmp, norm); +} + +void +normalize_change_set(change_set & norm) +{ + normalize_path_rearrangement(norm.rearrangement); + change_set::delta_map tmp = norm.deltas; + for (change_set::delta_map::const_iterator i = tmp.begin(); + i != tmp.end(); ++i) + { + if (delta_entry_src(i) == delta_entry_dst(i)) + norm.deltas.erase(delta_entry_path(i)); + } +} + + +// begin stuff related to concatenation + +static void +index_entries(path_state const & state, + std::map & files, + std::map & dirs) +{ + for (path_state::const_iterator i = state.begin(); + i != state.end(); ++i) + { + file_path full; + path_item item = path_state_item(i); + get_full_path(state, path_state_tid(i), full); + + if (null_name(path_item_name(item))) + continue; + + switch (path_item_type(item)) + { + case ptype_directory: + dirs.insert(std::make_pair(full, path_state_tid(i))); + break; + + case ptype_file: + files.insert(std::make_pair(full, path_state_tid(i))); + break; + } + } +} + +// this takes every (p1,t1) entry in b and, if (p1,t2) it exists in a, +// inserts (t1,t2) in the rename set. in other words, it constructs the +// renumbering from b->a +static void +extend_renumbering_from_path_identities(std::map const & a, + std::map const & b, + state_renumbering & renumbering) +{ + for (std::map::const_iterator i = b.begin(); + i != b.end(); ++i) + { + I(! null_name(i->first)); + std::map::const_iterator j = a.find(i->first); + if (j == a.end()) + continue; + I(renumbering.find(i->second) == renumbering.end()); + renumbering.insert(std::make_pair(i->second, j->second)); + } +} + +static void +extend_state(path_state const & src, + path_state & dst) +{ + for (path_state::const_iterator i = src.begin(); + i != src.end(); ++i) + { + if (dst.find(path_state_tid(i)) == dst.end()) + dst.insert(*i); + } +} + +static void +ensure_tids_disjoint(path_analysis const & a, + path_analysis const & b) +{ + for (path_state::const_iterator i = a.first.begin(); + i != a.first.end(); ++i) + { + I(b.first.find(path_state_tid(i)) == b.first.end()); + } + for (path_state::const_iterator i = b.first.begin(); + i != b.first.end(); ++i) + { + I(a.first.find(path_state_tid(i)) == a.first.end()); + } +} + +static void +extract_killed(path_analysis const & a, + std::set & killed) + +{ + killed.clear(); + directory_map first_map, second_map; + + build_directory_map(a.first, first_map); + build_directory_map(a.second, second_map); + + for (directory_map::const_iterator i = first_map.begin(); + i != first_map.end(); ++i) + { + tid dir_tid = i->first; + directory_map::const_iterator j = second_map.find(dir_tid); + I(j != second_map.end()); + + // a path P = DIR/LEAF is "killed" by a path_analysis iff the + // directory node named DIR in the post-state contains LEAF in the + // pre-state, and does not contain LEAF in the post-state + + boost::shared_ptr first_node = i->second; + boost::shared_ptr second_node = j->second; + + for (directory_node::const_iterator p = first_node->begin(); + p != first_node->end(); ++p) + { + path_component first_name = directory_entry_name(p); + directory_node::const_iterator q = second_node->find(first_name); + if (q == second_node->end()) + { + // found a killed entry + std::vector killed_name; + file_path killed_path; + get_full_path(a.second, dir_tid, killed_name); + killed_name.push_back(first_name); + compose_path(killed_name, killed_path); + killed.insert(killed_path); + } + } + } +} + +static void +check_delta_entries_not_directories(path_analysis const & pa, + change_set::delta_map const & dels) +{ + directory_map dmap; + build_directory_map(pa.second, dmap); + for (change_set::delta_map::const_iterator i = dels.begin(); + i != dels.end(); ++i) + { + tid delta_tid; + if (lookup_path(delta_entry_path(i), dmap, delta_tid)) + { + path_state::const_iterator j = pa.second.find(delta_tid); + I(j != pa.second.end()); + I(path_item_type(path_state_item(j)) == ptype_file); + } + } +} + +static void +concatenate_disjoint_analyses(path_analysis const & a, + path_analysis const & b, + std::set const & a_killed, + path_analysis & concatenated) +{ + std::map a_second_files, a_second_dirs; + std::map b_first_files, b_first_dirs; + path_analysis a_tmp(a), b_tmp(b); + state_renumbering renumbering; + + // the trick here is that a.second and b.first supposedly refer to the + // same state-of-the-world, so all we need to do is: + // + // - confirm that both analyses have disjoint tids + // - work out which tids in b to identify with tids in a + // - renumber b + // + // - copy a.first -> concatenated.first + // - insert all elements of b.first not already in concatenated.first + // - copy b.second -> concatenated.second + // - insert all elements of a.second not already in concatenated.second + + ensure_tids_disjoint(a_tmp, b_tmp); + + index_entries(a_tmp.second, a_second_files, a_second_dirs); + index_entries(b_tmp.first, b_first_files, b_first_dirs); + + { + std::set + a_second_file_set, a_second_dir_set, + b_first_file_set, b_first_dir_set; + + extract_first(a_second_files, a_second_file_set); + extract_first(a_second_dirs, a_second_dir_set); + extract_first(b_first_files, b_first_file_set); + extract_first(b_first_dirs, b_first_dir_set); + + // check that there are no entry-type mismatches + check_sets_disjoint(a_second_file_set, b_first_dir_set); + check_sets_disjoint(a_second_dir_set, b_first_file_set); + + // check that there's no use of killed entries + check_sets_disjoint(a_killed, b_first_dir_set); + check_sets_disjoint(a_killed, b_first_file_set); + } + + extend_renumbering_from_path_identities(a_second_files, b_first_files, renumbering); + extend_renumbering_from_path_identities(a_second_dirs, b_first_dirs, renumbering); + + // dump_analysis("state A", a_tmp); + // dump_analysis("state B", b_tmp); + // dump_renumbering("concatenation", renumbering); + apply_state_renumbering(renumbering, b_tmp); + + concatenated.first = a_tmp.first; + concatenated.second = b_tmp.second; + + extend_state(b_tmp.first, concatenated.first); + extend_state(a_tmp.second, concatenated.second); + + sanity_check_path_analysis(concatenated); +} + +void +concatenate_rearrangements(change_set::path_rearrangement const & a, + change_set::path_rearrangement const & b, + change_set::path_rearrangement & concatenated) +{ + a.check_sane(); + b.check_sane(); + concatenated = change_set::path_rearrangement(); + + tid_source ts; + path_analysis a_analysis, b_analysis, concatenated_analysis; + + analyze_rearrangement(a, a_analysis, ts); + analyze_rearrangement(b, b_analysis, ts); + + std::set a_killed; + extract_killed(a_analysis, a_killed); + + concatenate_disjoint_analyses(a_analysis, + b_analysis, + a_killed, + concatenated_analysis); + + compose_rearrangement(concatenated_analysis, + concatenated); + + concatenated.check_sane(); +} + +void +concatenate_change_sets(change_set const & a, + change_set const & b, + change_set & concatenated) +{ + a.check_sane(); + b.check_sane(); + + L(F("concatenating change sets\n")); + + tid_source ts; + path_analysis a_analysis, b_analysis, concatenated_analysis; + + analyze_rearrangement(a.rearrangement, a_analysis, ts); + analyze_rearrangement(b.rearrangement, b_analysis, ts); + + std::set a_killed; + extract_killed(a_analysis, a_killed); + + concatenate_disjoint_analyses(a_analysis, + b_analysis, + a_killed, + concatenated_analysis); + + compose_rearrangement(concatenated_analysis, + concatenated.rearrangement); + + // now process the deltas + + concatenated.deltas.clear(); + directory_map a_dst_map, b_src_map; + L(F("concatenating %d and %d deltas\n") + % a.deltas.size() % b.deltas.size()); + build_directory_map(a_analysis.second, a_dst_map); + build_directory_map(b_analysis.first, b_src_map); + + // first rename a's deltas under the rearrangement of b + for (change_set::delta_map::const_iterator del = a.deltas.begin(); + del != a.deltas.end(); ++del) + { + file_path new_pth; + L(F("processing delta on %s\n") % delta_entry_path(del)); + + // work out the name of entry in b.first + reconstruct_path(delta_entry_path(del), b_src_map, b_analysis.second, new_pth); + L(F("delta on %s in first changeset renamed to %s\n") + % delta_entry_path(del) % new_pth); + + if (b.rearrangement.has_deleted_file(delta_entry_path(del))) + // the delta should be removed if the file is going to be deleted + L(F("discarding delta [%s]->[%s] for deleted file '%s'\n") + % delta_entry_src(del) % delta_entry_dst(del) % delta_entry_path(del)); + else + concatenated.deltas.insert(std::make_pair(new_pth, + std::make_pair(delta_entry_src(del), + delta_entry_dst(del)))); + } + + // next fuse any deltas id1->id2 and id2->id3 to id1->id3 + for (change_set::delta_map::const_iterator del = b.deltas.begin(); + del != b.deltas.end(); ++del) + { + + file_path del_pth = delta_entry_path(del); + change_set::delta_map::const_iterator existing = + concatenated.deltas.find(del_pth); + if (existing != concatenated.deltas.end()) + { + L(F("fusing deltas on %s : %s -> %s and %s -> %s\n") + % del_pth + % delta_entry_src(existing) + % delta_entry_dst(existing) + % delta_entry_src(del) + % delta_entry_dst(del)); + I(delta_entry_dst(existing) == delta_entry_src(del)); + std::pair fused = std::make_pair(delta_entry_src(existing), + delta_entry_dst(del)); + concatenated.deltas.erase(del_pth); + concatenated.deltas.insert(std::make_pair((del_pth), fused)); + } + else + { + L(F("delta on %s in second changeset copied forward\n") % del_pth); + // in general don't want deltas on deleted files. however if a + // file has been deleted then re-added, then a delta is valid + // (it applies to the newly-added file) + if (!b.rearrangement.has_deleted_file(del_pth) + || b.rearrangement.has_added_file(del_pth) + || b.rearrangement.has_renamed_file_dst(del_pth)) + concatenated.deltas.insert(*del); + } + } + + normalize_change_set(concatenated); + concatenated.check_sane(); + + L(F("finished concatenation\n")); +} + +// end stuff related to concatenation + + +// begin stuff related to merging + + +static void +extend_renumbering_via_added_files(path_analysis const & a, + path_analysis const & b, + state_renumbering & existing_renumbering, + state_renumbering & renumbering) +{ + directory_map a_second_map; + build_directory_map(a.second, a_second_map); + + for (path_state::const_iterator i = b.first.begin(); + i != b.first.end(); ++i) + { + path_item item = path_state_item(i); + if (path_item_type(item) == ptype_file && null_name(path_item_name(item))) + { + path_state::const_iterator j = b.second.find(path_state_tid(i)); + I(j != b.second.end()); + path_component leaf_name = path_item_name(path_state_item(j)); + + I(path_item_type(path_state_item(j)) == ptype_file); + if (! null_name(leaf_name)) + { + tid added_parent_tid = path_item_parent(path_state_item(j)); + state_renumbering::const_iterator ren = existing_renumbering.find(added_parent_tid); + if (ren != existing_renumbering.end()) + added_parent_tid = ren->second; + directory_map::const_iterator dirent = a_second_map.find(added_parent_tid); + if (dirent != a_second_map.end()) + { + boost::shared_ptr node = dirent->second; + directory_node::const_iterator entry = node->find(leaf_name); + if (entry != node->end() && directory_entry_type(entry) == ptype_file) + { + I(renumbering.find(path_state_tid(i)) == renumbering.end()); + renumbering.insert(std::make_pair(path_state_tid(i), + directory_entry_tid(entry))); + } + } + } + } + } +} + +static bool +find_item(tid t, path_state const & ps, + path_item & item) +{ + path_state::const_iterator i = ps.find(t); + if (i == ps.end()) + return false; + item = path_state_item(i); + return true; +} + +static bool +find_items(tid t, path_analysis const & pa, + path_item & first, path_item & second) +{ + if (find_item(t, pa.first, first)) + { + I(find_item(t, pa.second, second)); + I(path_item_type(first) == path_item_type(second)); + return true; + } + else + { + I(!find_item(t, pa.second, second)); + return false; + } +} + +static void +resolve_conflict(tid t, ptype ty, + path_analysis const & a_tmp, + path_analysis const & b_tmp, + path_item & resolved, + path_state & resolved_conflicts, + app_state & app) +{ + path_state::const_iterator i = resolved_conflicts.find(t); + + path_item a_item, b_item; + find_item(t, a_tmp.second, a_item); + find_item(t, b_tmp.second, b_item); + + file_path anc, a, b, res; + get_full_path(a_tmp.first, t, anc); + get_full_path(a_tmp.second, t, a); + get_full_path(b_tmp.second, t, b); + + if (i != resolved_conflicts.end()) + { + resolved = path_state_item(i); + } + else if (null_name(path_item_name(a_item)) && + ! null_name(path_item_name(b_item))) + { + L(F("delete of %s dominates rename to %s\n") % anc % b); + resolved = a_item; + resolved_conflicts.insert(std::make_pair(t, resolved)); + } + else if (null_name(path_item_name(b_item)) && + ! null_name(path_item_name(a_item))) + { + L(F("delete of %s dominates rename to %s\n") % anc % a); + resolved = b_item; + resolved_conflicts.insert(std::make_pair(t, resolved)); + } + else + { + switch (ty) + { + case ptype_file: + N(app.lua.hook_resolve_file_conflict(anc, a, b, res), + F("unable to resolve file conflict '%s' -> '%s' vs. '%s'") % anc % a % b); + break; + case ptype_directory: + N(app.lua.hook_resolve_dir_conflict(anc, a, b, res), + F("unable to resolve dir conflict '%s' -> '%s' vs. '%s'") % anc % a % b); + break; + } + + N((res == a || res == b), + F("illegal conflict resolution '%s', wanted '%s' or '%s'\n") % res % a % b); + + if (res == a) + I(find_item(t, a_tmp.second, resolved)); + else + I(find_item(t, b_tmp.second, resolved)); + + resolved_conflicts.insert(std::make_pair(t, resolved)); + } +} + +static void +ensure_no_rename_clobbers(path_analysis const & a, + path_analysis const & b) +{ + // there is a special non-mergable pair of changes which we need + // to identify here: + // + // tid i : x -> y in change A + // tid j : z -> x in change B + // + // on the surface it looks like it ought to be mergable, since there is + // no conflict in the tids. except for one problem: B effectively + // clobbered i with j. there is nothing you can append to change B to + // revive the identity of i; in fact you risk having i and j identified + // if you form the naive merge concatenation BA. indeed, since A and B + // both supposedly start in the same state (in which i occupies name x), + // it really ought not to be possible to form B; you should have to + // accompany it with some sort of statement about the fate of i. + // + // as it stands, we're going to fault when we see this. if it turns out + // that there's a legal way of constructing such changes, one option is + // to synthesize a delete of i in B; essentially read "z->x" as an + // implicit "delete x first if it exists in post-state". + // + // however, for the time being this is a fault because we believe they + // should be globally illegal clobbers. + + directory_map b_first_map, b_second_map; + build_directory_map (b.first, b_first_map); + build_directory_map (b.second, b_second_map); + tid a_tid, b_tid; + + for (path_state::const_iterator i = a.first.begin(); + i != a.first.end(); ++i) + { + file_path anc_path, a_second_path; + a_tid = path_state_tid(i); + get_full_path(a.first, a_tid, anc_path); + + if (! lookup_path(anc_path, b_first_map, b_tid)) + { + file_path b_second_path; + reconstruct_path(anc_path, b_first_map, b.second, b_second_path); + + N(! lookup_path(b_second_path, b_second_map, b_tid), + (F("tid %d (%s) clobbered tid %d (%s)\n") + % b_tid % b_second_path + % a_tid % anc_path)); + } + } + +} + +static void +project_missing_changes(path_analysis const & a_tmp, + path_analysis const & b_tmp, + path_analysis & b_merged, + path_state & resolved_conflicts, + app_state & app) +{ + + // for each tid t adjusted in a: + // - if t exists in b: + // - if the change to t in b == change in a, skip + // - else resolve conflict + // - if conflict resolved in favour of a, append to merged + // - if resolved in favour of b, skip + // - else (no t in b) insert a's change to t in merged + + for (path_state::const_iterator i = a_tmp.first.begin(); + i != a_tmp.first.end(); ++i) + { + tid t = path_state_tid(i); + path_item a_first_item, a_second_item; + path_item b_first_item, b_second_item; + I(find_items(t, a_tmp, a_first_item, a_second_item)); + if (find_items(t, b_tmp, b_first_item, b_second_item)) + { + I(a_first_item == b_first_item); + if (a_second_item == b_second_item) + { + L(F("skipping common change on %s (tid %d)\n") + % path_item_name(a_first_item) % t); + } + else if (a_first_item == a_second_item) + { + L(F("skipping neutral change of %s -> %s (tid %d)\n") + % path_item_name(a_first_item) + % path_item_name(a_second_item) + % t); + } + else if (b_first_item == b_second_item) + { + L(F("propagating change on %s -> %s (tid %d)\n") + % path_item_name(b_first_item) + % path_item_name(b_second_item) + % t); + b_merged.first.insert(std::make_pair(t, b_second_item)); + b_merged.second.insert(std::make_pair(t, a_second_item)); + } + else + { + // conflict + path_item resolved; + resolve_conflict(t, path_item_type(a_first_item), a_tmp, b_tmp, + resolved, resolved_conflicts, app); + + if (resolved == a_second_item) + { + L(F("conflict detected, resolved in A's favour\n")); + b_merged.first.insert(std::make_pair(t, b_second_item)); + b_merged.second.insert(std::make_pair(t, a_second_item)); + } + else + { + L(F("conflict detected, resolved in B's favour\n")); + } + } + } + else + { + // there was no entry in b at all for this tid, copy it + b_merged.first.insert(std::make_pair(t, a_first_item)); + b_merged.second.insert(std::make_pair(t, a_second_item)); + } + } + + // now drive through b.second's view of the directory structure, in case + // some intermediate b-only directories showed up the preimages of + // A-favoured conflicts. + extend_state(b_tmp.second, b_merged.first); + extend_state(b_merged.first, b_merged.second); +} + +static void +rebuild_analysis(path_analysis const & src, + path_analysis & dst, + tid_source & ts) +{ + state_renumbering renumbering; + + for (path_state::const_iterator i = src.first.begin(); + i != src.first.end(); ++i) + renumbering.insert(std::make_pair(path_state_tid(i), ts.next())); + + dst = src; + apply_state_renumbering(renumbering, dst); +} + +static void +merge_disjoint_analyses(path_analysis const & a, + path_analysis const & b, + path_analysis & a_renumbered, + path_analysis & b_renumbered, + path_analysis & a_merged, + path_analysis & b_merged, + tid_source & ts, + app_state & app) +{ + // we have anc->a and anc->b and we want to construct a->merged and + // b->merged, leading to the eventual identity concatenate(a,a_merged) == + // concatenate(b,b_merged). + + path_analysis a_tmp(a), b_tmp(b); + state_renumbering renumbering; + + ensure_tids_disjoint(a_tmp, b_tmp); + + // fault on a particular class of mal-formed changesets + ensure_no_rename_clobbers(a_tmp, b_tmp); + ensure_no_rename_clobbers(b_tmp, a_tmp); + + // a.first and b.first refer to the same state-of-the-world. + // + // we begin by driving all the entries in a.first into b.first and vice + // versa. + + { + directory_map a_first_map, b_first_map; + build_directory_map(a_tmp.first, a_first_map); + build_directory_map(b_tmp.first, b_first_map); + ensure_entries_exist(a_tmp.first, b_first_map, b_tmp.first, ts); + ensure_entries_exist(b_tmp.first, a_first_map, a_tmp.first, ts); + } + + // we then drive any of the new arrivals in a.first to a.second, and + // likewise on b + + { + directory_map a_second_map, b_second_map; + build_directory_map(a_tmp.second, a_second_map); + build_directory_map(b_tmp.second, b_second_map); + ensure_entries_exist(a_tmp.first, a_second_map, a_tmp.second, ts); + ensure_entries_exist(b_tmp.first, b_second_map, b_tmp.second, ts); + } + + // we then index, identify, and renumber all the immediately apparant + // entries in each side. + + { + std::map a_first_files, a_first_dirs; + std::map b_first_files, b_first_dirs; + index_entries(a_tmp.first, a_first_files, a_first_dirs); + index_entries(b_tmp.first, b_first_files, b_first_dirs); + extend_renumbering_from_path_identities(a_first_files, b_first_files, renumbering); + extend_renumbering_from_path_identities(a_first_dirs, b_first_dirs, renumbering); + } + + // once renamed, b_tmp will have moved a fair bit closer to a_tmp, in + // terms of tids. there is still one set of files we haven't accounted + // for, however: files added in a and b. + + { + state_renumbering aux_renumbering; + extend_renumbering_via_added_files(a_tmp, b_tmp, renumbering, aux_renumbering); + for (state_renumbering::const_iterator i = aux_renumbering.begin(); + i != aux_renumbering.end(); ++i) + { + I(renumbering.find(i->first) == renumbering.end()); + renumbering.insert(*i); + } + } + + // renumbering now contains a *complete* renumbering of b->a, + // so we reset a_tmp and b_tmp, and renumber b_tmp under this + // scheme. + + a_tmp = a; + b_tmp = b; + apply_state_renumbering(renumbering, b_tmp); + + a_renumbered = a_tmp; + b_renumbered = b_tmp; + + // now we're ready to merge (and resolve conflicts) + path_state resolved_conflicts; + project_missing_changes(a_tmp, b_tmp, b_merged, resolved_conflicts, app); + project_missing_changes(b_tmp, a_tmp, a_merged, resolved_conflicts, app); + + { + // now check: the merge analyses, when concatenated with their + // predecessors, should lead to the same composite rearrangement + + tid_source ts_tmp; + path_analysis anc_a_check, a_merge_check, a_check; + path_analysis anc_b_check, b_merge_check, b_check; + change_set::path_rearrangement a_re, b_re; + + rebuild_analysis(a, anc_a_check, ts_tmp); + rebuild_analysis(b, anc_b_check, ts_tmp); + rebuild_analysis(a_merged, a_merge_check, ts_tmp); + rebuild_analysis(b_merged, b_merge_check, ts_tmp); + + std::set anc_a_killed, anc_b_killed; + extract_killed(anc_a_check, anc_a_killed); + extract_killed(anc_b_check, anc_b_killed); + + concatenate_disjoint_analyses(anc_a_check, a_merge_check, anc_a_killed, a_check); + concatenate_disjoint_analyses(anc_b_check, b_merge_check, anc_b_killed, b_check); + compose_rearrangement(a_check, a_re); + compose_rearrangement(b_check, b_re); + I(a_re == b_re); + } + +} + +static void +merge_deltas(file_path const & anc_path, + file_path const & left_path, + file_path const & right_path, + file_path const & path_in_merged, + std::map & merge_finalists, + file_id const & anc, + file_id const & left, + file_id const & right, + file_id & finalist, + merge_provider & merger) +{ + std::map::const_iterator i = merge_finalists.find(path_in_merged); + if (i != merge_finalists.end()) + { + L(F("reusing merge resolution '%s' : '%s' -> '%s'\n") + % path_in_merged % anc % i->second); + finalist = i->second; + } + else + { + if (null_id(anc)) + { + N(merger.try_to_merge_files(left_path, right_path, path_in_merged, left, right, finalist), + F("merge of '%s' : '%s' vs. '%s' (no common ancestor) failed") + % path_in_merged % left % right); + } + else + { + N(merger.try_to_merge_files(anc_path, left_path, right_path, path_in_merged, + anc, left, right, finalist), + F("merge of '%s' : '%s' -> '%s' vs '%s' failed") + % path_in_merged % anc % left % right); + } + + L(F("merge of '%s' : '%s' -> '%s' vs '%s' resolved to '%s'\n") + % path_in_merged % anc % left % right % finalist); + + merge_finalists.insert(std::make_pair(path_in_merged, finalist)); + } +} + +static void +project_missing_deltas(change_set const & a, + change_set const & b, + path_analysis const & a_analysis, + path_analysis const & b_analysis, + path_analysis const & a_merged_analysis, + change_set & b_merged, + merge_provider & merger, + std::map & merge_finalists) +{ + directory_map a_second_map, b_first_map, a_merged_first_map; + build_directory_map(a_analysis.second, a_second_map); + build_directory_map(b_analysis.first, b_first_map); + build_directory_map(a_merged_analysis.first, a_merged_first_map); + + for (change_set::delta_map::const_iterator i = a.deltas.begin(); + i != a.deltas.end(); ++i) + { + file_path path_in_merged, path_in_anc, path_in_b_second; + + // we have a fork like this: + // + // + // +--> [a2] + // [a1==b1] + // +--> [b2] + // + // and we have a delta applied to a file in a2. we want to + // figure out what to call this delta's path in b2. this means + // reconstructing it in a1==b1, then reconstructing it *again* + // in b2. + + // first work out what the path in a.first == b.first is + reconstruct_path(delta_entry_path(i), a_second_map, + a_analysis.first, path_in_anc); + + // first work out what the path in b.second is + reconstruct_path(path_in_anc, b_first_map, + b_analysis.second, path_in_b_second); + + // then work out what the path in merged is + reconstruct_path(delta_entry_path(i), a_merged_first_map, + a_merged_analysis.second, path_in_merged); + + // now check to see if there was a delta on the b.second name in b + change_set::delta_map::const_iterator j = b.deltas.find(path_in_b_second); + + if (j == b.deltas.end()) + { + // if not, copy ours over using the merged name + L(F("merge is copying delta '%s' : '%s' -> '%s'\n") + % path_in_merged % delta_entry_src(i) % delta_entry_dst(i)); + I(b.deltas.find(path_in_merged) == b.deltas.end()); + if (b.rearrangement.has_deleted_file(path_in_merged)) + // if the file was deleted on the other fork of the merge, then + // we don't want to keep this delta. + L(F("skipping delta '%s'->'%s' on deleted file '%s'\n") + % delta_entry_src(i) % delta_entry_dst(i) % path_in_merged); + else + b_merged.apply_delta(path_in_merged, delta_entry_src(i), delta_entry_dst(i)); + } + else + { + // if so, either... + + if (!(delta_entry_src(i) == delta_entry_src(j))) + { + // This is a bit of a corner case where a file was added then deleted on one + // of the forks. The src for the addition fork will be null_id, but the src + // for the other fork will be the ancestor file's id. + + // if neither of the forks involved a file addition delta (null_id to something) + // then something bad happened. + I(null_id(delta_entry_src(i)) || null_id(delta_entry_src(j))); + + if (null_id(delta_entry_src(i))) + { + // ... use the delta from 'a' + // 'a' change_set included a delta []->[...], ie file added. We want to + // follow this fork so it gets added to the b_merged changeset + L(F("propagating new file addition delta on '%s' : '%s' -> '%s'\n") + % path_in_merged + % delta_entry_src(j) + % delta_entry_dst(i)); + b_merged.apply_delta(path_in_merged, delta_entry_src(i), delta_entry_dst(i)); + } + else if (null_id(delta_entry_src(j))) + { + // ... ignore the delta + // 'b' change_set included a delta []->[...], ie file added. We don't need + // to add it to the b_merged changeset, since any delta in 'a' will be + // ignored (as 'b' includes deletions). + L(F("skipping new file addition delta on '%s' : '' -> '%s'\n") + % path_in_merged + % delta_entry_dst(j)); + } + } + else if (delta_entry_dst(i) == delta_entry_dst(j)) + { + // ... absorb identical deltas + L(F("skipping common delta '%s' : '%s' -> '%s'\n") + % path_in_merged % delta_entry_src(i) % delta_entry_dst(i)); + } + + else if (delta_entry_src(i) == delta_entry_dst(i)) + { + L(F("skipping neutral delta on '%s' : %s -> %s\n") + % delta_entry_path(i) + % delta_entry_src(i) + % delta_entry_dst(i)); + } + + else if (delta_entry_src(j) == delta_entry_dst(j)) + { + L(F("propagating unperturbed delta on '%s' : '%s' -> '%s'\n") + % delta_entry_path(i) + % delta_entry_src(i) + % delta_entry_dst(i)); + b_merged.apply_delta(path_in_merged, delta_entry_dst(j), delta_entry_dst(i)); + } + + else + { + // ... or resolve conflict + L(F("merging delta '%s' : '%s' -> '%s' vs. '%s'\n") + % path_in_merged % delta_entry_src(i) % delta_entry_dst(i) % delta_entry_dst(j)); + file_id finalist; + + merge_deltas(path_in_anc, + delta_entry_path(i), // left_path + delta_entry_path(j), // right_path + path_in_merged, + merge_finalists, + delta_entry_src(i), // anc + delta_entry_dst(i), // left + delta_entry_dst(j), // right + finalist, merger); + L(F("resolved merge to '%s' : '%s' -> '%s'\n") + % path_in_merged % delta_entry_src(i) % finalist); + + // if the conflict resolved to something other than the + // existing post-state of b, add a new entry to the deltas of + // b finishing the job. + if (! (finalist == delta_entry_dst(j))) + b_merged.apply_delta(path_in_merged, delta_entry_dst(j), finalist); + } + } + } +} + + +void +merge_change_sets(change_set const & a, + change_set const & b, + change_set & a_merged, + change_set & b_merged, + merge_provider & merger, + app_state & app) +{ + a.check_sane(); + b.check_sane(); + + L(F("merging change sets\n")); + + tid_source ts; + path_analysis + a_analysis, b_analysis, + a_renumbered, b_renumbered, + a_merged_analysis, b_merged_analysis; + + analyze_rearrangement(a.rearrangement, a_analysis, ts); + analyze_rearrangement(b.rearrangement, b_analysis, ts); + + merge_disjoint_analyses(a_analysis, b_analysis, + a_renumbered, b_renumbered, + a_merged_analysis, b_merged_analysis, + ts, app); + + compose_rearrangement(a_merged_analysis, + a_merged.rearrangement); + + compose_rearrangement(b_merged_analysis, + b_merged.rearrangement); + + std::map merge_finalists; + + project_missing_deltas(a, b, + a_renumbered, b_renumbered, + a_merged_analysis, + b_merged, + merger, merge_finalists); + + project_missing_deltas(b, a, + b_renumbered, a_renumbered, + b_merged_analysis, + a_merged, + merger, merge_finalists); + + { + // confirmation step + change_set a_check, b_check; + // dump_change_set("a", a); + // dump_change_set("a_merged", a_merged); + // dump_change_set("b", b); + // dump_change_set("b_merged", b_merged); + concatenate_change_sets(a, a_merged, a_check); + concatenate_change_sets(b, b_merged, b_check); + // dump_change_set("a_check", a_check); + // dump_change_set("b_check", b_check); + I(a_check == b_check); + } + + normalize_change_set(a_merged); + normalize_change_set(b_merged); + + a_merged.check_sane(); + b_merged.check_sane(); + + L(F("finished merge\n")); +} + +// end stuff related to merging + +void +invert_change_set(change_set const & a2b, + manifest_map const & a_map, + change_set & b2a) +{ + a2b.check_sane(); + tid_source ts; + path_analysis a2b_analysis, b2a_analysis; + + analyze_rearrangement(a2b.rearrangement, a2b_analysis, ts); + + L(F("inverting change set\n")); + b2a_analysis.first = a2b_analysis.second; + b2a_analysis.second = a2b_analysis.first; + compose_rearrangement(b2a_analysis, b2a.rearrangement); + + b2a.deltas.clear(); + + // existing deltas are in "b space" + for (path_state::const_iterator b = b2a_analysis.first.begin(); + b != b2a_analysis.first.end(); ++b) + { + path_state::const_iterator a = b2a_analysis.second.find(path_state_tid(b)); + I(a != b2a_analysis.second.end()); + if (path_item_type(path_state_item(b)) == ptype_file) + { + file_path b_pth, a_pth; + get_full_path(b2a_analysis.first, path_state_tid(b), b_pth); + + if (null_name(path_item_name(path_state_item(b))) && + ! null_name(path_item_name(path_state_item(a)))) + { + // b->a represents an add in "a space" + get_full_path(b2a_analysis.second, path_state_tid(a), a_pth); + manifest_map::const_iterator i = a_map.find(a_pth); + I(i != a_map.end()); + b2a.deltas.insert(std::make_pair(a_pth, + std::make_pair(file_id(), + manifest_entry_id(i)))); + L(F("converted 'delete %s' to 'add as %s' in inverse\n") + % a_pth + % manifest_entry_id(i)); + } + else if (! null_name(path_item_name(path_state_item(b))) && + null_name(path_item_name(path_state_item(a)))) + { + // b->a represents a del from "b space" + get_full_path(b2a_analysis.first, path_state_tid(b), b_pth); + L(F("converted add %s to delete in inverse\n") % b_pth ); + } + else + { + get_full_path(b2a_analysis.first, path_state_tid(b), b_pth); + get_full_path(b2a_analysis.second, path_state_tid(a), a_pth); + change_set::delta_map::const_iterator del = a2b.deltas.find(b_pth); + if (del == a2b.deltas.end()) + continue; + file_id src_id(delta_entry_src(del)), dst_id(delta_entry_dst(del)); + L(F("converting delta %s -> %s on %s\n") + % src_id % dst_id % b_pth); + L(F("inverse is delta %s -> %s on %s\n") + % dst_id % src_id % a_pth); + b2a.deltas.insert(std::make_pair(a_pth, std::make_pair(dst_id, src_id))); + } + } + } + + // some deltas might not have been renamed, however. these we just invert the + // direction on + for (change_set::delta_map::const_iterator del = a2b.deltas.begin(); + del != a2b.deltas.end(); ++del) + { + // check to make sure this isn't the image of an add (now a delete) + if (null_id(delta_entry_src(del))) + continue; + // check to make sure this isn't one of the already-moved deltas + if (b2a.deltas.find(delta_entry_path(del)) != b2a.deltas.end()) + continue; + b2a.deltas.insert(std::make_pair(delta_entry_path(del), + std::make_pair(delta_entry_dst(del), + delta_entry_src(del)))); + } + normalize_change_set(b2a); + b2a.check_sane(); +} + +void +move_files_to_tmp_bottom_up(tid t, + local_path const & temporary_root, + path_state const & state, + directory_map const & dmap) +{ + directory_map::const_iterator dirent = dmap.find(t); + if (dirent != dmap.end()) + { + boost::shared_ptr node = dirent->second; + for (directory_node::const_iterator entry = node->begin(); + entry != node->end(); ++entry) + { + tid child = directory_entry_tid(entry); + file_path path; + path_item item; + + find_item(child, state, item); + + if (null_name(path_item_name(item))) + continue; + + // recursively move all sub-entries + if (path_item_type(item) == ptype_directory) + move_files_to_tmp_bottom_up(child, temporary_root, state, dmap); + + get_full_path(state, child, path); + + local_path src(path()); + local_path dst((mkpath(temporary_root()) + / mkpath(boost::lexical_cast(child))).string()); + + P(F("moving %s -> %s\n") % src % dst); + switch (path_item_type(item)) + { + case ptype_file: + if (file_exists(src)) + move_file(src, dst); + break; + case ptype_directory: + if (directory_exists(src)) + move_dir(src, dst); + break; + } + } + } +} + +void +move_files_from_tmp_top_down(tid t, + local_path const & temporary_root, + path_state const & state, + directory_map const & dmap) +{ + directory_map::const_iterator dirent = dmap.find(t); + if (dirent != dmap.end()) + { + boost::shared_ptr node = dirent->second; + for (directory_node::const_iterator entry = node->begin(); + entry != node->end(); ++entry) + { + tid child = directory_entry_tid(entry); + file_path path; + path_item item; + + find_item(child, state, item); + + if (null_name(path_item_name(item))) + continue; + + get_full_path(state, child, path); + + local_path src((mkpath(temporary_root()) + / mkpath(boost::lexical_cast(child))).string()); + local_path dst(path()); + + switch (path_item_type(item)) + { + case ptype_file: + if (file_exists(src)) + { + P(F("moving file %s -> %s\n") % src % dst); + make_dir_for(path); + move_file(src, dst); + } + break; + case ptype_directory: + if (directory_exists(src)) + { + P(F("moving dir %s -> %s\n") % src % dst); + make_dir_for(path); + move_dir(src, dst); + } + break; + } + + // recursively move all sub-entries + if (path_item_type(item) == ptype_directory) + move_files_from_tmp_top_down(child, temporary_root, state, dmap); + } + } +} + + +void +apply_rearrangement_to_filesystem(change_set::path_rearrangement const & re, + local_path const & temporary_root) +{ + re.check_sane(); + tid_source ts; + path_analysis analysis; + directory_map first_dmap, second_dmap; + + analyze_rearrangement(re, analysis, ts); + build_directory_map(analysis.first, first_dmap); + build_directory_map(analysis.second, second_dmap); + + if (analysis.first.empty()) + return; + + move_files_to_tmp_bottom_up(root_tid, temporary_root, + analysis.first, first_dmap); + + move_files_from_tmp_top_down(root_tid, temporary_root, + analysis.second, second_dmap); +} + +// application stuff + +void +apply_path_rearrangement(path_set const & old_ps, + change_set::path_rearrangement const & pr, + path_set & new_ps) +{ + pr.check_sane(); + change_set::path_rearrangement a, b, c; + a.added_files = old_ps; + concatenate_rearrangements(a, pr, c); + new_ps = c.added_files; +} + +void +build_pure_addition_change_set(manifest_map const & man, + change_set & cs) +{ + for (manifest_map::const_iterator i = man.begin(); i != man.end(); ++i) + cs.add_file(manifest_entry_path(i), manifest_entry_id(i)); + cs.check_sane(); +} + +// this function takes the rearrangement sitting in cs and "completes" the +// changeset by filling in all the deltas + +void +complete_change_set(manifest_map const & m_old, + manifest_map const & m_new, + change_set & cs) +{ + cs.rearrangement.check_sane(); + tid_source ts; + path_analysis analysis; + directory_map first_dmap, second_dmap; + + analyze_rearrangement(cs.rearrangement, analysis, ts); + build_directory_map(analysis.first, first_dmap); + build_directory_map(analysis.second, second_dmap); + + std::set paths; + extract_path_set(m_new, paths); + + for (std::set::const_iterator i = cs.rearrangement.added_files.begin(); + i != cs.rearrangement.added_files.end(); ++i) + { + manifest_map::const_iterator j = m_new.find(*i); + I(j != m_new.end()); + cs.deltas.insert(std::make_pair(*i, + std::make_pair(null_ident, + manifest_entry_id(j)))); + paths.erase(*i); + } + + for (std::set::const_iterator i = paths.begin(); + i != paths.end(); ++i) + { + file_path old_path; + reconstruct_path(*i, second_dmap, analysis.first, old_path); + manifest_map::const_iterator j = m_old.find(old_path); + manifest_map::const_iterator k = m_new.find(*i); + I(j != m_old.end()); + I(k != m_new.end()); + if (!(manifest_entry_id(j) == manifest_entry_id(k))) + cs.deltas.insert(std::make_pair(*i, std::make_pair(manifest_entry_id(j), + manifest_entry_id(k)))); + } + + cs.check_sane(); +} + + +void +apply_change_set(manifest_map const & old_man, + change_set const & cs, + manifest_map & new_man) +{ + cs.check_sane(); + change_set a, b; + build_pure_addition_change_set(old_man, a); + concatenate_change_sets(a, cs, b); + + // If the composed change_set still has renames or deletions in it, then + // they referred to things that weren't in the original manifest, and this + // change_set should never have been applied to this manifest in the first + // place. + I(b.rearrangement.deleted_files.empty()); + I(b.rearrangement.renamed_files.empty()); + // Furthermore, all deltas should be add deltas + for (change_set::delta_map::const_iterator i = b.deltas.begin(); + i != b.deltas.end(); ++i) + { + I(null_id(delta_entry_src(i))); + I(b.rearrangement.added_files.find(delta_entry_path(i)) + != b.rearrangement.added_files.end()); + } + + new_man.clear(); + for (std::set::const_iterator i = b.rearrangement.added_files.begin(); + i != b.rearrangement.added_files.end(); ++i) + { + change_set::delta_map::const_iterator d = b.deltas.find(*i); + I(d != b.deltas.end()); + new_man.insert(std::make_pair(*i, delta_entry_dst(d))); + } +} + +// quick, optimistic and destructive version for log walker +file_path +apply_change_set_inverse(change_set const & cs, + file_path const & file_in_second) +{ + cs.check_sane(); + tid_source ts; + path_analysis analysis; + directory_map second_dmap; + file_path file_in_first; + + analyze_rearrangement(cs.rearrangement, analysis, ts); + build_directory_map(analysis.second, second_dmap); + reconstruct_path(file_in_second, second_dmap, analysis.first, file_in_first); + return file_in_first; +} + +// quick, optimistic and destructive version for rcs importer +void +apply_change_set(change_set const & cs, + manifest_map & man) +{ + cs.check_sane(); + if (cs.rearrangement.renamed_files.empty() + && cs.rearrangement.renamed_dirs.empty() + && cs.rearrangement.deleted_dirs.empty()) + { + // fast path for simple drop/add/delta file operations + for (std::set::const_iterator i = cs.rearrangement.deleted_files.begin(); + i != cs.rearrangement.deleted_files.end(); ++i) + { + man.erase(*i); + } + for (change_set::delta_map::const_iterator i = cs.deltas.begin(); + i != cs.deltas.end(); ++i) + { + if (!null_id(delta_entry_dst(i))) + man[delta_entry_path(i)] = delta_entry_dst(i); + } + } + else + { + // fall back to the slow way + manifest_map tmp; + apply_change_set(man, cs, tmp); + man = tmp; + } +} + + +// i/o stuff + +namespace +{ + namespace syms + { + std::string const patch("patch"); + std::string const from("from"); + std::string const to("to"); + std::string const add_file("add_file"); + std::string const delete_file("delete_file"); + std::string const delete_dir("delete_dir"); + std::string const rename_file("rename_file"); + std::string const rename_dir("rename_dir"); + } +} + +static void +parse_path_rearrangement(basic_io::parser & parser, + change_set & cs) +{ + while (parser.symp()) + { + std::string t1, t2; + if (parser.symp(syms::add_file)) + { + parser.sym(); + parser.str(t1); + cs.add_file(file_path(t1)); + } + else if (parser.symp(syms::delete_file)) + { + parser.sym(); + parser.str(t1); + cs.delete_file(file_path(t1)); + } + else if (parser.symp(syms::delete_dir)) + { + parser.sym(); + parser.str(t1); + cs.delete_dir(file_path(t1)); + } + else if (parser.symp(syms::rename_file)) + { + parser.sym(); + parser.str(t1); + parser.esym(syms::to); + parser.str(t2); + cs.rename_file(file_path(t1), + file_path(t2)); + } + else if (parser.symp(syms::rename_dir)) + { + parser.sym(); + parser.str(t1); + parser.esym(syms::to); + parser.str(t2); + cs.rename_dir(file_path(t1), + file_path(t2)); + } + else + break; + } + cs.rearrangement.check_sane(); +} + + +void +print_path_rearrangement(basic_io::printer & printer, + change_set::path_rearrangement const & pr) +{ + + pr.check_sane(); + for (std::set::const_iterator i = pr.deleted_files.begin(); + i != pr.deleted_files.end(); ++i) + { + basic_io::stanza st; + st.push_str_pair(syms::delete_file, (*i)()); + printer.print_stanza(st); + } + + for (std::set::const_iterator i = pr.deleted_dirs.begin(); + i != pr.deleted_dirs.end(); ++i) + { + basic_io::stanza st; + st.push_str_pair(syms::delete_dir, (*i)()); + printer.print_stanza(st); + } + + for (std::map::const_iterator i = pr.renamed_files.begin(); + i != pr.renamed_files.end(); ++i) + { + basic_io::stanza st; + st.push_str_pair(syms::rename_file, i->first()); + st.push_str_pair(syms::to, i->second()); + printer.print_stanza(st); + } + + for (std::map::const_iterator i = pr.renamed_dirs.begin(); + i != pr.renamed_dirs.end(); ++i) + { + basic_io::stanza st; + st.push_str_pair(syms::rename_dir, i->first()); + st.push_str_pair(syms::to, i->second()); + printer.print_stanza(st); + } + + for (std::set::const_iterator i = pr.added_files.begin(); + i != pr.added_files.end(); ++i) + { + basic_io::stanza st; + st.push_str_pair(syms::add_file, (*i)()); + printer.print_stanza(st); + } +} + +void +parse_change_set(basic_io::parser & parser, + change_set & cs) +{ + clear_change_set(cs); + + parse_path_rearrangement(parser, cs); + + while (parser.symp(syms::patch)) + { + std::string path, src, dst; + parser.sym(); + parser.str(path); + parser.esym(syms::from); + parser.hex(src); + parser.esym(syms::to); + parser.hex(dst); + cs.deltas.insert(std::make_pair(file_path(path), + std::make_pair(file_id(src), + file_id(dst)))); + } + cs.check_sane(); +} + +void +print_change_set(basic_io::printer & printer, + change_set const & cs) +{ + cs.check_sane(); + print_path_rearrangement(printer, cs.rearrangement); + + for (change_set::delta_map::const_iterator i = cs.deltas.begin(); + i != cs.deltas.end(); ++i) + { + basic_io::stanza st; + st.push_str_pair(syms::patch, i->first()); + st.push_hex_pair(syms::from, i->second.first.inner()()); + st.push_hex_pair(syms::to, i->second.second.inner()()); + printer.print_stanza(st); + } +} + +void +read_path_rearrangement(data const & dat, + change_set::path_rearrangement & re) +{ + std::istringstream iss(dat()); + basic_io::input_source src(iss, "path_rearrangement"); + basic_io::tokenizer tok(src); + basic_io::parser pars(tok); + change_set cs; + parse_path_rearrangement(pars, cs); + re = cs.rearrangement; + I(src.lookahead == EOF); + re.check_sane(); +} + +void +read_change_set(data const & dat, + change_set & cs) +{ + std::istringstream iss(dat()); + basic_io::input_source src(iss, "change_set"); + basic_io::tokenizer tok(src); + basic_io::parser pars(tok); + parse_change_set(pars, cs); + I(src.lookahead == EOF); + cs.check_sane(); +} + +void +write_change_set(change_set const & cs, + data & dat) +{ + cs.check_sane(); + std::ostringstream oss; + basic_io::printer pr(oss); + print_change_set(pr, cs); + dat = data(oss.str()); +} + +void +write_path_rearrangement(change_set::path_rearrangement const & re, + data & dat) +{ + re.check_sane(); + std::ostringstream oss; + basic_io::printer pr(oss); + print_path_rearrangement(pr, re); + dat = data(oss.str()); +} + +#ifdef BUILD_UNIT_TESTS +#include "unit_tests.hh" +#include "sanity.hh" + +static void dump_change_set(std::string const & ctx, + change_set const & cs) +{ + data tmp; + write_change_set(cs, tmp); + L(F("[begin changeset %s]\n") % ctx); + L(F("%s") % tmp); + L(F("[end changeset %s]\n") % ctx); +} + +static void +spin_change_set(change_set const & cs) +{ + data tmp1; + change_set cs1; + write_change_set(cs, tmp1); + dump_change_set("normalized", cs); + read_change_set(tmp1, cs1); + for (int i = 0; i < 5; ++i) + { + data tmp2; + change_set cs2; + write_change_set(cs1, tmp2); + BOOST_CHECK(tmp1 == tmp2); + read_change_set(tmp2, cs2); + BOOST_CHECK(cs1.rearrangement == cs2.rearrangement); + BOOST_CHECK(cs1.deltas == cs2.deltas); + cs1 = cs2; + } +} + +static void +disjoint_merge_test(std::string const & ab_str, + std::string const & ac_str) +{ + change_set ab, ac, bm, cm; + + app_state app; + + L(F("beginning disjoint_merge_test\n")); + + read_change_set(data(ab_str), ab); + read_change_set(data(ac_str), ac); + + manifest_map dummy; + + merge_provider merger(app, dummy, dummy, dummy); + merge_change_sets(ab, ac, bm, cm, merger, app); + + dump_change_set("ab", ab); + dump_change_set("ac", ac); + dump_change_set("bm", bm); + dump_change_set("cm", cm); + + BOOST_CHECK(bm.rearrangement == ac.rearrangement); + BOOST_CHECK(cm.rearrangement == ab.rearrangement); + + L(F("finished disjoint_merge_test\n")); +} + +static void +disjoint_merge_tests() +{ + disjoint_merge_test + ("rename_file \"foo\"\n" + " to \"bar\"\n", + + "rename_file \"apple\"\n" + " to \"orange\"\n"); + + disjoint_merge_test + ("rename_file \"foo/a.txt\"\n" + " to \"bar/b.txt\"\n", + + "rename_file \"bar/c.txt\"\n" + " to \"baz/d.txt\"\n"); + + disjoint_merge_test + ("patch \"foo/file.txt\"\n" + " from [c6a4a6196bb4a744207e1a6e90273369b8c2e925]\n" + " to [fe18ec0c55cbc72e4e51c58dc13af515a2f3a892]\n", + + "rename_file \"foo/file.txt\"\n" + " to \"foo/apple.txt\"\n"); + + disjoint_merge_test + ( + "rename_file \"apple.txt\"\n" + " to \"pear.txt\"\n" + "\n" + "patch \"foo.txt\"\n" + " from [c6a4a6196bb4a744207e1a6e90273369b8c2e925]\n" + " to [fe18ec0c55cbc72e4e51c58dc13af515a2f3a892]\n", + + "rename_file \"foo.txt\"\n" + " to \"bar.txt\"\n" + "\n" + "patch \"apple.txt\"\n" + " from [fe18ec0c55cbc72e4e51c58dc13af515a2f3a892]\n" + " to [435e816c30263c9184f94e7c4d5aec78ea7c028a]\n"); +} + +static void +basic_change_set_test() +{ + try + { + + change_set cs; + cs.delete_file(file_path("usr/lib/zombie")); + cs.add_file(file_path("usr/bin/cat"), + file_id(hexenc("adc83b19e793491b1c6ea0fd8b46cd9f32e592fc"))); + cs.add_file(file_path("usr/local/bin/dog"), + file_id(hexenc("adc83b19e793491b1c6ea0fd8b46cd9f32e592fc"))); + cs.rename_file(file_path("usr/local/bin/dog"), file_path("usr/bin/dog")); + cs.rename_file(file_path("usr/bin/cat"), file_path("usr/local/bin/chicken")); + cs.add_file(file_path("usr/lib/libc.so"), + file_id(hexenc("435e816c30263c9184f94e7c4d5aec78ea7c028a"))); + cs.rename_dir(file_path("usr/lib"), file_path("usr/local/lib")); + cs.apply_delta(file_path("usr/local/bin/chicken"), + file_id(hexenc("c6a4a6196bb4a744207e1a6e90273369b8c2e925")), + file_id(hexenc("fe18ec0c55cbc72e4e51c58dc13af515a2f3a892"))); + spin_change_set(cs); + } + catch (informative_failure & exn) + { + L(F("informative failure: %s\n") % exn.what); + } + catch (std::runtime_error & exn) + { + L(F("runtime error: %s\n") % exn.what()); + } +} + +static void +neutralize_change_test() +{ + try + { + + change_set cs1, cs2, csa; + cs1.add_file(file_path("usr/lib/zombie"), + file_id(hexenc("adc83b19e793491b1c6ea0fd8b46cd9f32e592fc"))); + cs1.rename_file(file_path("usr/lib/apple"), + file_path("usr/lib/orange")); + cs1.rename_dir(file_path("usr/lib/moose"), + file_path("usr/lib/squirrel")); + + dump_change_set("neutralize target", cs1); + + cs2.delete_file(file_path("usr/lib/zombie")); + cs2.rename_file(file_path("usr/lib/orange"), + file_path("usr/lib/apple")); + cs2.rename_dir(file_path("usr/lib/squirrel"), + file_path("usr/lib/moose")); + + dump_change_set("neutralizer", cs2); + + concatenate_change_sets(cs1, cs2, csa); + + dump_change_set("neutralized", csa); + + tid_source ts; + path_analysis analysis; + analyze_rearrangement(csa.rearrangement, analysis, ts); + + BOOST_CHECK(analysis.first.empty()); + BOOST_CHECK(analysis.second.empty()); + } + catch (informative_failure & exn) + { + L(F("informative failure: %s\n") % exn.what); + } + catch (std::runtime_error & exn) + { + L(F("runtime error: %s\n") % exn.what()); + } +} + +static void +non_interfering_change_test() +{ + try + { + + change_set cs1, cs2, csa; + cs1.delete_file(file_path("usr/lib/zombie")); + cs1.rename_file(file_path("usr/lib/orange"), + file_path("usr/lib/apple")); + cs1.rename_dir(file_path("usr/lib/squirrel"), + file_path("usr/lib/moose")); + + dump_change_set("non-interference A", cs1); + + cs2.add_file(file_path("usr/lib/zombie"), + file_id(hexenc("adc83b19e793491b1c6ea0fd8b46cd9f32e592fc"))); + cs2.rename_file(file_path("usr/lib/pear"), + file_path("usr/lib/orange")); + cs2.rename_dir(file_path("usr/lib/spy"), + file_path("usr/lib/squirrel")); + + dump_change_set("non-interference B", cs2); + + concatenate_change_sets(cs1, cs2, csa); + + dump_change_set("non-interference combined", csa); + + tid_source ts; + path_analysis analysis; + analyze_rearrangement(csa.rearrangement, analysis, ts); + + BOOST_CHECK(analysis.first.size() == 8); + BOOST_CHECK(analysis.second.size() == 8); + } + catch (informative_failure & exn) + { + L(F("informative failure: %s\n") % exn.what); + } + catch (std::runtime_error & exn) + { + L(F("runtime error: %s\n") % exn.what()); + } +} + +static const file_id fid_null; +static const file_id fid1 = file_id(hexenc("aaaa3831e5eb74e6cd50b94f9e99e6a14d98d702")); +static const file_id fid2 = file_id(hexenc("bbbb3831e5eb74e6cd50b94f9e99e6a14d98d702")); +static const file_id fid3 = file_id(hexenc("cccc3831e5eb74e6cd50b94f9e99e6a14d98d702")); + +typedef enum { in_a, in_b } which_t; +struct bad_concatenate_change_test +{ + change_set a; + change_set b; + change_set combined; + change_set concat; + bool do_combine; + std::string ident; + bad_concatenate_change_test(char const *file, int line) : + do_combine(false), + ident((F("%s:%d") % file % line).str()) + { + L(F("BEGINNING concatenation test %s\n") % ident); + } + + ~bad_concatenate_change_test() + { + L(F("FINISHING concatenation test %s\n") % ident); + } + + change_set & getit(which_t which) + { + if (which == in_a) + return a; + return b; + } + // Call combine() if you want to make sure that the things that are bad when + // concatenated are also bad when all stuck together into a single + // changeset. + void combine() { do_combine = true; } + void add_file(which_t which, std::string const & path, file_id fid = fid1) + { + getit(which).add_file(file_path(path), fid); + if (do_combine) + combined.add_file(file_path(path), fid); + } + void apply_delta(which_t which, std::string const & path, + file_id from_fid, + file_id to_fid) + { + getit(which).apply_delta(file_path(path), from_fid, to_fid); + if (do_combine) + combined.apply_delta(file_path(path), from_fid, to_fid); + } + void delete_file(which_t which, std::string const & path) + { + getit(which).delete_file(file_path(path)); + if (do_combine) + combined.delete_file(file_path(path)); + } + void delete_dir(which_t which, std::string const & path) + { + getit(which).delete_dir(file_path(path)); + if (do_combine) + combined.delete_dir(file_path(path)); + } + void rename_file(which_t which, + std::string const & path1, std::string const & path2) + { + getit(which).rename_file(file_path(path1), file_path(path2)); + if (do_combine) + combined.rename_file(file_path(path1), file_path(path2)); + } + void rename_dir(which_t which, + std::string const & path1, std::string const & path2) + { + getit(which).rename_dir(file_path(path1), file_path(path2)); + if (do_combine) + combined.rename_dir(file_path(path1), file_path(path2)); + } + void run() + { + L(F("RUNNING bad_concatenate_change_test %s\n") % ident); + try + { + dump_change_set("a", a); + dump_change_set("b", b); + } + catch (std::logic_error e) + { + L(F("skipping change_set printing, one or both are not sane\n")); + } + BOOST_CHECK_THROW(concatenate_change_sets(a, b, concat), + std::logic_error); + try { dump_change_set("concat", concat); } + catch (std::logic_error e) { L(F("concat change_set is insane\n")); } + if (do_combine) + { + L(F("Checking combined change set\n")); + change_set empty_cs, combined_concat; + BOOST_CHECK_THROW(concatenate_change_sets(combined, + empty_cs, + combined_concat), + std::logic_error); + try { dump_change_set("combined_concat", combined_concat); } + catch (std::logic_error e) { L(F("combined_concat is insane\n")); } + } + } + void run_both() + { + run(); + L(F("RUNNING bad_concatenate_change_test %s again backwards\n") % ident); + BOOST_CHECK_THROW(concatenate_change_sets(a, b, concat), + std::logic_error); + } +}; + +// We also do a number of just "bad change set" tests here, leaving one of +// them empty; this is because our main line of defense against bad +// change_sets, check_sane_history, does its checking by doing +// concatenations, so it's doing concatenations that we want to be sure does +// sanity checking. +static void +bad_concatenate_change_tests() +{ + // Files/directories can't be dropped on top of each other: + BOOST_CHECKPOINT("on top"); + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.add_file(in_a, "target"); + t.add_file(in_b, "target"); + t.run(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.combine(); + t.rename_file(in_a, "foo", "target"); + t.rename_file(in_b, "bar", "target"); + t.run(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.combine(); + t.rename_dir(in_a, "foo", "target"); + t.rename_dir(in_b, "bar", "target"); + t.run(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.combine(); + t.rename_file(in_a, "foo", "target"); + t.rename_dir(in_b, "bar", "target"); + t.run_both(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.combine(); + t.add_file(in_a, "target"); + t.rename_file(in_b, "foo", "target"); + t.run_both(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.combine(); + t.add_file(in_a, "target"); + t.rename_dir(in_b, "foo", "target"); + t.run_both(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.combine(); + t.add_file(in_a, "target/subfile"); + t.add_file(in_b, "target"); + t.run_both(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.combine(); + t.add_file(in_a, "target/subfile"); + t.rename_file(in_b, "foo", "target"); + t.run_both(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.add_file(in_a, "target/subfile"); + t.rename_dir(in_b, "foo", "target"); + t.run_both(); + } + // You can only delete something once + BOOST_CHECKPOINT("delete once"); + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.delete_file(in_a, "target"); + t.delete_file(in_b, "target"); + t.run(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.combine(); + t.delete_file(in_a, "target"); + t.delete_dir(in_b, "target"); + t.run_both(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.delete_dir(in_a, "target"); + t.delete_dir(in_b, "target"); + t.run(); + } + // You can't delete something that's not there anymore + BOOST_CHECKPOINT("delete after rename"); + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.combine(); + t.delete_file(in_a, "target"); + t.rename_file(in_b, "target", "foo"); + t.run_both(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.combine(); + t.delete_dir(in_a, "target"); + t.rename_file(in_b, "target", "foo"); + t.run_both(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.combine(); + t.delete_dir(in_a, "target"); + t.rename_dir(in_b, "target", "foo"); + t.run_both(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.combine(); + t.delete_file(in_a, "target"); + t.rename_dir(in_b, "target", "foo"); + t.run_both(); + } + // Files/directories can't be split in two + BOOST_CHECKPOINT("splitting files/dirs"); + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.rename_file(in_a, "target", "foo"); + t.rename_file(in_b, "target", "bar"); + t.run(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.rename_dir(in_a, "target", "foo"); + t.rename_dir(in_b, "target", "bar"); + t.run(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.combine(); + t.rename_dir(in_a, "target", "foo"); + t.rename_file(in_b, "target", "bar"); + t.run_both(); + } + // Files and directories are different + BOOST_CHECKPOINT("files != dirs"); + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.add_file(in_a, "target"); + t.delete_dir(in_b, "target"); + t.run(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.add_file(in_a, "target/subfile"); + t.delete_file(in_b, "target"); + t.run(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.add_file(in_a, "target/subfile"); + t.rename_file(in_b, "target", "foo"); + t.run(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.rename_file(in_a, "foo", "target"); + t.delete_dir(in_b, "target"); + t.run(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.combine(); + t.apply_delta(in_a, "target", fid1, fid2); + t.delete_dir(in_b, "target"); + t.run_both(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.rename_dir(in_a, "foo", "target"); + t.delete_file(in_b, "target"); + t.run(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.combine(); + t.rename_dir(in_a, "foo", "target"); + t.apply_delta(in_b, "target", fid1, fid2); + t.run_both(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.combine(); + t.apply_delta(in_a, "target", fid1, fid2); + t.rename_dir(in_b, "target", "bar"); + t.run_both(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.rename_file(in_a, "foo", "target"); + t.rename_dir(in_b, "target", "bar"); + t.run(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.rename_dir(in_a, "foo", "target"); + t.rename_file(in_b, "target", "bar"); + t.run(); + } + // Directories can't be patched, and patches can't be directoried... + BOOST_CHECKPOINT("can't patch dirs or vice versa"); + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.combine(); + t.add_file(in_a, "target/subfile"); + t.apply_delta(in_b, "target", fid_null, fid1); + t.run_both(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.combine(); + t.add_file(in_a, "target/subfile"); + t.apply_delta(in_b, "target", fid1, fid2); + t.run_both(); + } + // Deltas must be consistent + BOOST_CHECKPOINT("consistent deltas"); + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.apply_delta(in_a, "target", fid1, fid2); + t.apply_delta(in_b, "target", fid3, fid1); + t.run(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.add_file(in_a, "target", fid1); + t.apply_delta(in_b, "target", fid2, fid3); + t.run(); + } + // Can't have a null source id if it's not an add + BOOST_CHECKPOINT("null id on non-add"); + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.apply_delta(in_a, "target", fid_null, fid1); + t.run(); + } + // Can't have drop + delta with no add + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.combine(); + t.delete_file(in_a, "target"); + t.apply_delta(in_b, "target", fid1, fid2); + t.run(); + } + // Can't have a null destination id, ever, with or without a delete_file + BOOST_CHECKPOINT("no null destinations"); + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.delete_file(in_a, "target"); + t.apply_delta(in_a, "target", fid1, fid_null); + t.run(); + } + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.apply_delta(in_a, "target", fid1, fid_null); + t.run(); + } + // Can't have a patch with src == dst + { + bad_concatenate_change_test t(__FILE__, __LINE__); + t.apply_delta(in_a, "target", fid1, fid1); + t.run(); + } +} + +// FIXME: Things that should be added, but can't be trivially because they +// assert too early: +// anything repeated -- multiple adds, multiple deletes, multiple deltas +// including in one changeset, for both files and dirs +// (probably should put these in strings, and do BOOST_CHECK_THROWS in the +// parser?) + +// FIXME: also need tests for the invariants in apply_manifest (and any +// invariants that should be there but aren't, of course) + +void +add_change_set_tests(test_suite * suite) +{ + I(suite); + suite->add(BOOST_TEST_CASE(&basic_change_set_test)); + suite->add(BOOST_TEST_CASE(&neutralize_change_test)); + suite->add(BOOST_TEST_CASE(&non_interfering_change_test)); + suite->add(BOOST_TEST_CASE(&disjoint_merge_tests)); + suite->add(BOOST_TEST_CASE(&bad_concatenate_change_tests)); +} + + +#endif // BUILD_UNIT_TESTS ============================================================ --- tests/(minor)_test_a_merge_4/__driver__.lua 7a01317489a6c5c99bbdbcf11531d1110f9ffdd3 +++ tests/(minor)_test_a_merge_4/__driver__.lua 7a01317489a6c5c99bbdbcf11531d1110f9ffdd3 @@ -0,0 +1,33 @@ + +mtn_setup() + +-- This test is a bug report. + +-- This is a real merge error -- it should be a clean merge, but it +-- produces a conflict. + +getfile("ancestor") +getfile("left") +getfile("right") + +anc = "a2c50da63f01b242d8aaeb34d65e48edf0fef21b" +left = "8d5a2273e0e3da4aa55ff731e7152a673b63f08a" +right = "6745b398ffecec36bc4fc45598e678b3391d91b2" + +check(anc == sha1("ancestor")) +check(left == sha1("left")) +check(right == sha1("right")) + +copyfile("ancestor", "stdin") +check(mtn("fload"), 0, false, false, true) +copyfile("left", "stdin") +check(mtn("fload"), 0, false, false, true) +copyfile("right", "stdin") +check(mtn("fload"), 0, false, false, true) + +getfile("merge.diff3") + +xfail_if(true, mtn("fmerge", anc, left, right), 0, true, false) +rename("stdout", "merge.monotone") + +check(samefile("merge.diff3", "merge.monotone")) ============================================================ --- tests/(minor)_test_a_merge_4/ancestor a2c50da63f01b242d8aaeb34d65e48edf0fef21b +++ tests/(minor)_test_a_merge_4/ancestor a2c50da63f01b242d8aaeb34d65e48edf0fef21b @@ -0,0 +1,1353 @@ +// copyright (C) 2004 graydon hoare +// all rights reserved. +// licensed to the public under the terms of the GNU GPL (>= 2) +// see the file COPYING for details + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "basic_io.hh" +#include "change_set.hh" +#include "constants.hh" +#include "numeric_vocab.hh" +#include "revision.hh" +#include "sanity.hh" +#include "transforms.hh" +#include "ui.hh" +#include "vocab.hh" + +void revision_set::check_sane() const +{ + I(!null_id(new_manifest)); + + manifest_map fragment; + for (edge_map::const_iterator i = edges.begin(); i != edges.end(); ++i) + { + change_set const & cs = edge_changes(i); + cs.check_sane(); + if (!global_sanity.relaxed) + { + // null old revisions come with null old manifests + I(!null_id(edge_old_revision(i)) || null_id(edge_old_manifest(i))); + } + for (change_set::delta_map::const_iterator j = cs.deltas.begin(); j != cs.deltas.end(); ++j) + { + manifest_map::const_iterator k = fragment.find(delta_entry_path(j)); + if (k == fragment.end()) + fragment.insert(std::make_pair(delta_entry_path(j), + delta_entry_dst(j))); + else + { + if (!global_sanity.relaxed) + { + I(delta_entry_dst(j) == manifest_entry_id(k)); + } + } + } + } +} + +revision_set::revision_set(revision_set const & other) +{ + other.check_sane(); + new_manifest = other.new_manifest; + edges = other.edges; +} + +revision_set const & +revision_set::operator=(revision_set const & other) +{ + other.check_sane(); + new_manifest = other.new_manifest; + edges = other.edges; + return *this; +} + + +// Traces history back 'depth' levels from 'child_id', ensuring that +// historical information is consistent within this subgraph. +// The child must be in the database already. +// +// "Consistent" currently means that we compose manifests along every path (of +// any length) that terminates at the child, and for each one check that paths +// that should be the same in fact are the same, and that the calculated +// change sets can be applied to the old manifests to create the new +// manifest. +// +// NB: While this function has some invariants in it itself, a lot of its +// purpose is just to exercise all the invariants inside change_set.cc. So +// don't remove those invariants. (As if you needed another reason...) +void +check_sane_history(revision_id const & child_id, + int depth, + database & db) +{ + L(F("Verifying revision %s has sane history (to depth %i)\n") + % child_id % depth); + + typedef boost::shared_ptr shared_cs; + // (ancestor, change_set from ancestor to child) + std::map changesets; + + manifest_id m_child_id; + db.get_revision_manifest(child_id, m_child_id); + manifest_map m_child; + db.get_manifest(m_child_id, m_child); + + std::set frontier; + frontier.insert(child_id); + + while (depth-- > 0) + { + std::set next_frontier; + + for (std::set::const_iterator i = frontier.begin(); + i != frontier.end(); + ++i) + { + revision_id current_id = *i; + revision_set current; + db.get_revision(current_id, current); + // and the parents's manifests to the manifests + // and the change_set's to the parents to the changesets + for (edge_map::const_iterator j = current.edges.begin(); + j != current.edges.end(); + ++j) + { + revision_id old_id = edge_old_revision(j); + manifest_id m_old_id = edge_old_manifest(j); + change_set old_to_current_changes = edge_changes(j); + if (!null_id(old_id)) + next_frontier.insert(old_id); + + L(F("Examining %s -> %s\n") % old_id % child_id); + + // build the change_set + // if + shared_cs old_to_child_changes_p = shared_cs(new change_set); + if (current_id == child_id) + *old_to_child_changes_p = old_to_current_changes; + else + { + shared_cs current_to_child_changes_p; + I(changesets.find(current_id) != changesets.end()); + current_to_child_changes_p = changesets.find(current_id)->second; + concatenate_change_sets(old_to_current_changes, + *current_to_child_changes_p, + *old_to_child_changes_p); + } + + // we have the change_set; now, is it one we've seen before? + if (changesets.find(old_id) != changesets.end()) + { + // If it is, then make sure the paths agree on the + // changeset. + I(*changesets.find(old_id)->second == *old_to_child_changes_p); + } + else + { + // If not, this is the first time we've seen this. + // So store it in the map for later reference: + changesets.insert(std::make_pair(old_id, old_to_child_changes_p)); + // and check that it works: + + manifest_map m_old; + if (!null_id(old_id)) + db.get_manifest(m_old_id, m_old); + // The null revision has empty manifest, which is the + // default. + manifest_map purported_m_child; + apply_change_set(m_old, *old_to_child_changes_p, + purported_m_child); + I(purported_m_child == m_child); + } + } + } + frontier = next_frontier; + } +} + + +// calculating least common ancestors is a delicate thing. +// +// it turns out that we cannot choose the simple "least common ancestor" +// for purposes of a merge, because it is possible that there are two +// equally reachable common ancestors, and this produces ambiguity in the +// merge. the result -- in a pathological case -- is silently accepting one +// set of edits while discarding another; not exactly what you want a +// version control tool to do. +// +// a conservative approximation is what we'll call a "subgraph recurring" +// LCA algorithm. this is somewhat like locating the least common dominator +// node, but not quite. it is actually just a vanilla LCA search, except +// that any time there's a fork (a historical merge looks like a fork from +// our perspective, working backwards from children to parents) it reduces +// the fork to a common parent via a sequence of pairwise recursive calls +// to itself before proceeding. this will always resolve to a common parent +// with no ambiguity, unless it falls off the root of the graph. +// +// unfortunately the subgraph recurring algorithm sometimes goes too far +// back in history -- for example if there is an unambiguous propagate from +// one branch to another, the entire subgraph preceeding the propagate on +// the recipient branch is elided, since it is a merge. +// +// our current hypothesis is that the *exact* condition we're looking for, +// when doing a merge, is the least node which dominates one side of the +// merge and is an ancestor of the other. + +typedef unsigned long ctx; +typedef boost::dynamic_bitset<> bitmap; +typedef boost::shared_ptr shared_bitmap; + +static void +ensure_parents_loaded(ctx child, + std::map & parents, + interner & intern, + app_state & app) +{ + if (parents.find(child) != parents.end()) + return; + + L(F("loading parents for node %d\n") % child); + + std::set imm_parents; + app.db.get_revision_parents(revision_id(intern.lookup(child)), imm_parents); + + // The null revision is not a parent for purposes of finding common + // ancestors. + for (std::set::iterator p = imm_parents.begin(); + p != imm_parents.end(); ++p) + { + if (null_id(*p)) + imm_parents.erase(p); + } + + shared_bitmap bits = shared_bitmap(new bitmap(parents.size())); + + for (std::set::const_iterator p = imm_parents.begin(); + p != imm_parents.end(); ++p) + { + ctx pn = intern.intern(p->inner()()); + L(F("parent %s -> node %d\n") % *p % pn); + if (pn >= bits->size()) + bits->resize(pn+1); + bits->set(pn); + } + + parents.insert(std::make_pair(child, bits)); +} + +static bool +expand_dominators(std::map & parents, + std::map & dominators, + interner & intern, + app_state & app) +{ + bool something_changed = false; + std::vector nodes; + + nodes.reserve(dominators.size()); + + // pass 1, pull out all the node numbers we're going to scan this time around + for (std::map::const_iterator e = dominators.begin(); + e != dominators.end(); ++e) + nodes.push_back(e->first); + + // pass 2, update any of the dominator entries we can + for (std::vector::const_iterator n = nodes.begin(); + n != nodes.end(); ++n) + { + shared_bitmap bits = dominators[*n]; + bitmap saved(*bits); + if (bits->size() <= *n) + bits->resize(*n + 1); + bits->set(*n); + + ensure_parents_loaded(*n, parents, intern, app); + shared_bitmap n_parents = parents[*n]; + + bitmap intersection(bits->size()); + + bool first = true; + for (unsigned long parent = 0; + parent != n_parents->size(); ++parent) + { + if (! n_parents->test(parent)) + continue; + + if (dominators.find(parent) == dominators.end()) + dominators.insert(std::make_pair(parent, + shared_bitmap(new bitmap()))); + shared_bitmap pbits = dominators[parent]; + + if (bits->size() > pbits->size()) + pbits->resize(bits->size()); + + if (pbits->size() > bits->size()) + bits->resize(pbits->size()); + + if (first) + { + intersection = (*pbits); + first = false; + } + else + intersection &= (*pbits); + } + + (*bits) |= intersection; + if (*bits != saved) + something_changed = true; + } + return something_changed; +} + + +static bool +expand_ancestors(std::map & parents, + std::map & ancestors, + interner & intern, + app_state & app) +{ + bool something_changed = false; + std::vector nodes; + + nodes.reserve(ancestors.size()); + + // pass 1, pull out all the node numbers we're going to scan this time around + for (std::map::const_iterator e = ancestors.begin(); + e != ancestors.end(); ++e) + nodes.push_back(e->first); + + // pass 2, update any of the ancestor entries we can + for (std::vector::const_iterator n = nodes.begin(); n != nodes.end(); ++n) + { + shared_bitmap bits = ancestors[*n]; + bitmap saved(*bits); + if (bits->size() <= *n) + bits->resize(*n + 1); + bits->set(*n); + + ensure_parents_loaded(*n, parents, intern, app); + shared_bitmap n_parents = parents[*n]; + for (ctx parent = 0; parent != n_parents->size(); ++parent) + { + if (! n_parents->test(parent)) + continue; + + if (bits->size() <= parent) + bits->resize(parent + 1); + bits->set(parent); + + if (ancestors.find(parent) == ancestors.end()) + ancestors.insert(make_pair(parent, + shared_bitmap(new bitmap()))); + shared_bitmap pbits = ancestors[parent]; + + if (bits->size() > pbits->size()) + pbits->resize(bits->size()); + + if (pbits->size() > bits->size()) + bits->resize(pbits->size()); + + (*bits) |= (*pbits); + } + if (*bits != saved) + something_changed = true; + } + return something_changed; +} + +static bool +find_intersecting_node(bitmap & fst, + bitmap & snd, + interner const & intern, + revision_id & anc) +{ + + if (fst.size() > snd.size()) + snd.resize(fst.size()); + else if (snd.size() > fst.size()) + fst.resize(snd.size()); + + bitmap intersection = fst & snd; + if (intersection.any()) + { + L(F("found %d intersecting nodes\n") % intersection.count()); + for (ctx i = 0; i < intersection.size(); ++i) + { + if (intersection.test(i)) + { + anc = revision_id(intern.lookup(i)); + return true; + } + } + } + return false; +} + +// static void +// dump_bitset_map(std::string const & hdr, +// std::map< ctx, shared_bitmap > const & mm) +// { +// L(F("dumping [%s] (%d entries)\n") % hdr % mm.size()); +// for (std::map< ctx, shared_bitmap >::const_iterator i = mm.begin(); +// i != mm.end(); ++i) +// { +// L(F("dump [%s]: %d -> %s\n") % hdr % i->first % (*(i->second))); +// } +// } + +bool +find_common_ancestor_for_merge(revision_id const & left, + revision_id const & right, + revision_id & anc, + app_state & app) +{ + interner intern; + std::map< ctx, shared_bitmap > + parents, ancestors, dominators; + + ctx ln = intern.intern(left.inner()()); + ctx rn = intern.intern(right.inner()()); + + shared_bitmap lanc = shared_bitmap(new bitmap()); + shared_bitmap ranc = shared_bitmap(new bitmap()); + shared_bitmap ldom = shared_bitmap(new bitmap()); + shared_bitmap rdom = shared_bitmap(new bitmap()); + + ancestors.insert(make_pair(ln, lanc)); + ancestors.insert(make_pair(rn, ranc)); + dominators.insert(make_pair(ln, ldom)); + dominators.insert(make_pair(rn, rdom)); + + L(F("searching for common ancestor, left=%s right=%s\n") % left % right); + + while (expand_ancestors(parents, ancestors, intern, app) || + expand_dominators(parents, dominators, intern, app)) + { + L(F("common ancestor scan [par=%d,anc=%d,dom=%d]\n") % + parents.size() % ancestors.size() % dominators.size()); + + if (find_intersecting_node(*lanc, *rdom, intern, anc)) + { + L(F("found node %d, ancestor of left %s and dominating right %s\n") + % anc % left % right); + return true; + } + + else if (find_intersecting_node(*ranc, *ldom, intern, anc)) + { + L(F("found node %d, ancestor of right %s and dominating left %s\n") + % anc % right % left); + return true; + } + } +// dump_bitset_map("ancestors", ancestors); +// dump_bitset_map("dominators", dominators); +// dump_bitset_map("parents", parents); + return false; +} + + +bool +find_least_common_ancestor(revision_id const & left, + revision_id const & right, + revision_id & anc, + app_state & app) +{ + interner intern; + std::map< ctx, shared_bitmap > + parents, ancestors; + + ctx ln = intern.intern(left.inner()()); + ctx rn = intern.intern(right.inner()()); + + shared_bitmap lanc = shared_bitmap(new bitmap()); + shared_bitmap ranc = shared_bitmap(new bitmap()); + + ancestors.insert(make_pair(ln, lanc)); + ancestors.insert(make_pair(rn, ranc)); + + L(F("searching for least common ancestor, left=%s right=%s\n") % left % right); + + while (expand_ancestors(parents, ancestors, intern, app)) + { + L(F("least common ancestor scan [par=%d,anc=%d]\n") % + parents.size() % ancestors.size()); + + if (find_intersecting_node(*lanc, *ranc, intern, anc)) + { + L(F("found node %d, ancestor of left %s and right %s\n") + % anc % left % right); + return true; + } + } +// dump_bitset_map("ancestors", ancestors); +// dump_bitset_map("parents", parents); + return false; +} + + +// FIXME: this algorithm is incredibly inefficient; it's O(n) where n is the +// size of the entire revision graph. + +static bool +is_ancestor(revision_id const & ancestor_id, + revision_id const & descendent_id, + std::multimap const & graph) +{ + + std::set visited; + std::queue queue; + + queue.push(ancestor_id); + + while (!queue.empty()) + { + revision_id current_id = queue.front(); + queue.pop(); + + if (current_id == descendent_id) + return true; + else + { + typedef std::multimap::const_iterator gi; + std::pair children = graph.equal_range(current_id); + for (gi i = children.first; i != children.second; ++i) + { + if (visited.find(i->second) == visited.end()) + { + queue.push(i->second); + visited.insert(i->second); + } + } + } + } + return false; +} + +bool +is_ancestor(revision_id const & ancestor_id, + revision_id const & descendent_id, + app_state & app) +{ + L(F("checking whether %s is an ancestor of %s\n") % ancestor_id % descendent_id); + + std::multimap graph; + app.db.get_revision_ancestry(graph); + return is_ancestor(ancestor_id, descendent_id, graph); +} + + +static void +add_bitset_to_union(shared_bitmap src, + shared_bitmap dst) +{ + if (dst->size() > src->size()) + src->resize(dst->size()); + if (src->size() > dst->size()) + dst->resize(src->size()); + *dst |= *src; +} + + +static void +calculate_ancestors_from_graph(interner & intern, + revision_id const & init, + std::set const & legal, + std::multimap const & graph, + std::map< ctx, shared_bitmap > & ancestors, + shared_bitmap & total_union) +{ + typedef std::multimap::const_iterator gi; + std::stack stk; + + stk.push(intern.intern(init.inner()())); + + while (! stk.empty()) + { + ctx us = stk.top(); + revision_id rev(hexenc(intern.lookup(us))); + + std::pair parents = graph.equal_range(rev); + bool pushed = false; + + // first make sure all parents are done + for (gi i = parents.first; i != parents.second; ++i) + { + ctx parent = intern.intern(i->second.inner()()); + if (ancestors.find(parent) == ancestors.end()) + { + stk.push(parent); + pushed = true; + break; + } + } + + // if we pushed anything we stop now. we'll come back later when all + // the parents are done. + if (pushed) + continue; + + shared_bitmap b = shared_bitmap(new bitmap()); + + for (gi i = parents.first; i != parents.second; ++i) + { + ctx parent = intern.intern(i->second.inner()()); + + // set any parent which is a member of the underlying legal set + if (legal.find(i->second) != legal.end()) + { + if (b->size() <= parent) + b->resize(parent + 1); + b->set(parent); + } + + // ensure all parents are loaded into the ancestor map + I(ancestors.find(parent) != ancestors.end()); + + // union them into our map + std::map< ctx, shared_bitmap >::const_iterator j = ancestors.find(parent); + I(j != ancestors.end()); + add_bitset_to_union(j->second, b); + } + + add_bitset_to_union(b, total_union); + ancestors.insert(std::make_pair(us, b)); + stk.pop(); + } +} + +// This function looks at a set of revisions, and for every pair A, B in that +// set such that A is an ancestor of B, it erases A. + +void +erase_ancestors(std::set & revisions, app_state & app) +{ + typedef std::multimap::const_iterator gi; + std::multimap graph; + std::multimap inverse_graph; + + app.db.get_revision_ancestry(graph); + for (gi i = graph.begin(); i != graph.end(); ++i) + inverse_graph.insert(std::make_pair(i->second, i->first)); + + interner intern; + std::map< ctx, shared_bitmap > ancestors; + + shared_bitmap u = shared_bitmap(new bitmap()); + + for (std::set::const_iterator i = revisions.begin(); + i != revisions.end(); ++i) + { + calculate_ancestors_from_graph(intern, *i, revisions, + inverse_graph, ancestors, u); + } + + std::set tmp; + for (std::set::const_iterator i = revisions.begin(); + i != revisions.end(); ++i) + { + ctx id = intern.intern(i->inner()()); + bool has_ancestor_in_set = id < u->size() && u->test(id); + if (!has_ancestor_in_set) + tmp.insert(*i); + } + + revisions = tmp; +} + +// +// The idea with this algorithm is to walk from child up to ancestor, +// recursively, accumulating all the change_sets associated with +// intermediate nodes into *one big change_set*. +// +// clever readers will realize this is an overlapping-subproblem type +// situation and thus needs to keep a dynamic programming map to keep +// itself in linear complexity. +// +// in fact, we keep two: one which maps to computed results (partial_csets) +// and one which just keeps a set of all nodes we traversed +// (visited_nodes). in theory it could be one map with an extra bool stuck +// on each entry, but I think that would make it even less readable. it's +// already quite ugly. +// + +static bool +calculate_change_sets_recursive(revision_id const & ancestor, + revision_id const & child, + app_state & app, + change_set & cumulative_cset, + std::map > & partial_csets, + std::set & visited_nodes, + std::set const & subgraph) +{ + + if (ancestor == child) + return true; + + if (subgraph.find(child) == subgraph.end()) + return false; + + visited_nodes.insert(child); + + bool relevant_child = false; + + revision_set rev; + app.db.get_revision(child, rev); + + L(F("exploring changesets from parents of %s, seeking towards %s\n") + % child % ancestor); + + for(edge_map::const_iterator i = rev.edges.begin(); i != rev.edges.end(); ++i) + { + bool relevant_parent = false; + revision_id curr_parent = edge_old_revision(i); + + if (curr_parent.inner()().empty()) + continue; + + change_set cset_to_curr_parent; + + L(F("considering parent %s of %s\n") % curr_parent % child); + + std::map >::const_iterator j = + partial_csets.find(curr_parent); + if (j != partial_csets.end()) + { + // a recursive call has traversed this parent before and built an + // existing cset. just reuse that rather than re-traversing + cset_to_curr_parent = *(j->second); + relevant_parent = true; + } + else if (visited_nodes.find(curr_parent) != visited_nodes.end()) + { + // a recursive call has traversed this parent, but there was no + // path from it to the root, so the parent is irrelevant. skip. + relevant_parent = false; + } + else + relevant_parent = calculate_change_sets_recursive(ancestor, curr_parent, app, + cset_to_curr_parent, + partial_csets, + visited_nodes, + subgraph); + + if (relevant_parent) + { + L(F("revision %s is relevant, composing with edge to %s\n") + % curr_parent % child); + concatenate_change_sets(cset_to_curr_parent, edge_changes(i), cumulative_cset); + relevant_child = true; + break; + } + else + L(F("parent %s of %s is not relevant\n") % curr_parent % child); + } + + // store the partial edge from ancestor -> child, so that if anyone + // re-traverses this edge they'll just fetch from the partial_edges + // cache. + if (relevant_child) + partial_csets.insert(std::make_pair(child, + boost::shared_ptr + (new change_set(cumulative_cset)))); + + return relevant_child; +} + +// this finds (by breadth-first search) the set of nodes you'll have to +// walk over in calculate_change_sets_recursive, to build the composite +// changeset. this is to prevent the recursive algorithm from going way +// back in history on an unlucky guess of parent. + +static void +find_subgraph_for_composite_search(revision_id const & ancestor, + revision_id const & child, + app_state & app, + std::set & subgraph) +{ + std::set frontier; + frontier.insert(child); + subgraph.insert(child); + while (!frontier.empty()) + { + std::set next_frontier; + for (std::set::const_iterator i = frontier.begin(); + i != frontier.end(); ++i) + { + revision_set rev; + app.db.get_revision(*i, rev); + L(F("adding parents of %s to subgraph\n") % *i); + + for(edge_map::const_iterator j = rev.edges.begin(); j != rev.edges.end(); ++j) + { + revision_id curr_parent = edge_old_revision(j); + if (null_id(curr_parent)) + continue; + subgraph.insert(curr_parent); + if (curr_parent == ancestor) + { + L(F("found parent %s of %s\n") % curr_parent % *i); + return; + } + else + L(F("adding parent %s to next frontier\n") % curr_parent); + next_frontier.insert(curr_parent); + } + } + frontier = next_frontier; + } +} + +void +calculate_composite_change_set(revision_id const & ancestor, + revision_id const & child, + app_state & app, + change_set & composed) +{ + L(F("calculating composite changeset between %s and %s\n") + % ancestor % child); + std::set visited; + std::set subgraph; + std::map > partial; + find_subgraph_for_composite_search(ancestor, child, app, subgraph); + calculate_change_sets_recursive(ancestor, child, app, composed, partial, + visited, subgraph); +} + + +// Stuff related to rebuilding the revision graph. Unfortunately this is a +// real enough error case that we need support code for it. + + +static void +analyze_manifest_changes(app_state & app, + manifest_id const & parent, + manifest_id const & child, + change_set & cs) +{ + manifest_map m_parent, m_child; + + if (!null_id(parent)) + app.db.get_manifest(parent, m_parent); + + I(!null_id(child)); + app.db.get_manifest(child, m_child); + + L(F("analyzing manifest changes from '%s' -> '%s'\n") % parent % child); + + for (manifest_map::const_iterator i = m_parent.begin(); + i != m_parent.end(); ++i) + { + manifest_map::const_iterator j = m_child.find(manifest_entry_path(i)); + if (j == m_child.end()) + cs.delete_file(manifest_entry_path(i)); + else if (! (manifest_entry_id(i) == manifest_entry_id(j))) + { + cs.apply_delta(manifest_entry_path(i), + manifest_entry_id(i), + manifest_entry_id(j)); + } + } + for (manifest_map::const_iterator i = m_child.begin(); + i != m_child.end(); ++i) + { + manifest_map::const_iterator j = m_parent.find(manifest_entry_path(i)); + if (j == m_parent.end()) + cs.add_file(manifest_entry_path(i), + manifest_entry_id(i)); + } +} + + +struct anc_graph +{ + anc_graph(bool existing, app_state & a) : + existing_graph(existing), + app(a), + max_node(0), + n_nodes("nodes", "n", 1), + n_certs_in("certs in", "c", 1), + n_revs_out("revs out", "r", 1), + n_certs_out("certs out", "C", 1) + {} + + bool existing_graph; + app_state & app; + u64 max_node; + + ticker n_nodes; + ticker n_certs_in; + ticker n_revs_out; + ticker n_certs_out; + + std::map node_to_old_man; + std::map old_man_to_node; + + std::map node_to_old_rev; + std::map old_rev_to_node; + + std::map node_to_new_rev; + std::multimap > certs; + std::multimap ancestry; + + void add_node_ancestry(u64 child, u64 parent); + void write_certs(); + void rebuild_ancestry(); + void get_node_manifest(u64 node, manifest_id & man); + u64 add_node_for_old_manifest(manifest_id const & man); + u64 add_node_for_old_revision(revision_id const & rev); + revision_id construct_revision_from_ancestry(u64 child); +}; + + +void anc_graph::add_node_ancestry(u64 child, u64 parent) +{ + L(F("noting ancestry from child %d -> parent %d\n") % child % parent); + ancestry.insert(std::make_pair(child, parent)); +} + +void anc_graph::get_node_manifest(u64 node, manifest_id & man) +{ + std::map::const_iterator i = node_to_old_man.find(node); + I(i != node_to_old_man.end()); + man = i->second; +} + +void anc_graph::write_certs() +{ + std::set cnames; + cnames.insert(cert_name(branch_cert_name)); + cnames.insert(cert_name(date_cert_name)); + cnames.insert(cert_name(author_cert_name)); + cnames.insert(cert_name(tag_cert_name)); + cnames.insert(cert_name(changelog_cert_name)); + cnames.insert(cert_name(comment_cert_name)); + cnames.insert(cert_name(testresult_cert_name)); + + typedef std::multimap >::const_iterator ci; + + for (std::map::const_iterator i = node_to_new_rev.begin(); + i != node_to_new_rev.end(); ++i) + { + revision_id rev(i->second); + + std::pair range = certs.equal_range(i->first); + + for (ci j = range.first; j != range.second; ++j) + { + cert_name name(j->second.first); + cert_value val(j->second.second); + + if (cnames.find(name) == cnames.end()) + continue; + + cert new_cert; + make_simple_cert(rev.inner(), name, val, app, new_cert); + revision rcert(new_cert); + if (! app.db.revision_cert_exists(rcert)) + { + ++n_certs_out; + app.db.put_revision_cert(rcert); + } + } + } +} + +void +anc_graph::rebuild_ancestry() +{ + P(F("rebuilding %d nodes\n") % max_node); + { + transaction_guard guard(app.db); + if (existing_graph) + app.db.delete_existing_revs_and_certs(); + + std::set parents, children, heads; + for (std::multimap::const_iterator i = ancestry.begin(); + i != ancestry.end(); ++i) + { + children.insert(i->first); + parents.insert(i->second); + } + set_difference(children.begin(), children.end(), + parents.begin(), parents.end(), + std::inserter(heads, heads.begin())); + + // FIXME: should do a depth-first traversal here, or something like, + // instead of being recursive. + for (std::set::const_iterator i = heads.begin(); + i != heads.end(); ++i) + { + construct_revision_from_ancestry(*i); + } + write_certs(); + guard.commit(); + } +} + +u64 anc_graph::add_node_for_old_manifest(manifest_id const & man) +{ + I(!existing_graph); + u64 node = 0; + if (old_man_to_node.find(man) == old_man_to_node.end()) + { + node = max_node++; + ++n_nodes; + L(F("node %d = manifest %s\n") % node % man); + old_man_to_node.insert(std::make_pair(man, node)); + node_to_old_man.insert(std::make_pair(node, man)); + + // load certs + std::vector< manifest > mcerts; + app.db.get_manifest_certs(man, mcerts); + erase_bogus_certs(mcerts, app); + for(std::vector< manifest >::const_iterator i = mcerts.begin(); + i != mcerts.end(); ++i) + { + L(F("loaded '%s' manifest cert for node %s\n") % i->inner().name % node); + cert_value tv; + decode_base64(i->inner().value, tv); + ++n_certs_in; + certs.insert(std::make_pair(node, + std::make_pair(i->inner().name, tv))); + } + } + else + { + node = old_man_to_node[man]; + } + return node; +} + +u64 anc_graph::add_node_for_old_revision(revision_id const & rev) +{ + I(existing_graph); + I(!null_id(rev)); + u64 node = 0; + if (old_rev_to_node.find(rev) == old_rev_to_node.end()) + { + node = max_node++; + ++n_nodes; + + manifest_id man; + app.db.get_revision_manifest(rev, man); + + L(F("node %d = revision %s = manifest %s\n") % node % rev % man); + old_rev_to_node.insert(std::make_pair(rev, node)); + node_to_old_rev.insert(std::make_pair(node, rev)); + node_to_old_man.insert(std::make_pair(node, man)); + + // load certs + std::vector< revision > rcerts; + app.db.get_revision_certs(rev, rcerts); + erase_bogus_certs(rcerts, app); + for(std::vector< revision >::const_iterator i = rcerts.begin(); + i != rcerts.end(); ++i) + { + L(F("loaded '%s' revision cert for node %s\n") % i->inner().name % node); + cert_value tv; + decode_base64(i->inner().value, tv); + ++n_certs_in; + certs.insert(std::make_pair(node, + std::make_pair(i->inner().name, tv))); + } + } + else + { + node = old_rev_to_node[rev]; + } + + return node; +} + +// FIXME: this is recursive -- stack depth grows as ancestry depth -- and will +// overflow the stack on large histories. +revision_id +anc_graph::construct_revision_from_ancestry(u64 child) +{ + L(F("processing node %d\n") % child); + + if (node_to_new_rev.find(child) != node_to_new_rev.end()) + { + L(F("node %d already processed, skipping\n") % child); + return node_to_new_rev.find(child)->second; + } + + manifest_id child_man; + get_node_manifest(child, child_man); + + revision_set rev; + rev.new_manifest = child_man; + + typedef std::multimap::const_iterator ci; + std::pair range = ancestry.equal_range(child); + if (range.first == range.second) + { + L(F("node %d is a root node\n") % child); + revision_id null_rid; + manifest_id null_mid; + change_set cs; + analyze_manifest_changes(app, null_mid, child_man, cs); + rev.edges.insert(std::make_pair(null_rid, + std::make_pair(null_mid, cs))); + } + else + { + for (ci i = range.first; i != range.second; ++i) + { + I(child == i->first); + u64 parent(i->second); + L(F("processing edge from child %d -> parent %d\n") % child % parent); + + revision_id parent_rid; + std::map::const_iterator + j = node_to_new_rev.find(parent); + + if (j != node_to_new_rev.end()) + parent_rid = j->second; + else + { + parent_rid = construct_revision_from_ancestry(parent); + node_to_new_rev.insert(std::make_pair(parent, parent_rid)); + } + + L(F("parent node %d = revision %s\n") % parent % parent_rid); + manifest_id parent_man; + get_node_manifest(parent, parent_man); + change_set cs; + analyze_manifest_changes(app, parent_man, child_man, cs); + rev.edges.insert(std::make_pair(parent_rid, + std::make_pair(parent_man, cs))); + } + } + + revision_id rid; + calculate_ident(rev, rid); + node_to_new_rev.insert(std::make_pair(child, rid)); + + if (!app.db.revision_exists (rid)) + { + L(F("mapped node %d to revision %s\n") % child % rid); + app.db.put_revision(rid, rev); + ++n_revs_out; + } + else + { + L(F("skipping already existing revision %s\n") % rid); + } + + return rid; +} + +void +build_changesets_from_existing_revs(app_state & app) +{ + global_sanity.set_relaxed(true); + anc_graph graph(true, app); + + P(F("rebuilding revision graph from existing graph\n")); + std::multimap existing_graph; + + app.db.get_revision_ancestry(existing_graph); + for (std::multimap::const_iterator i = existing_graph.begin(); + i != existing_graph.end(); ++i) + { + if (!null_id(i->first)) + { + u64 parent_node = graph.add_node_for_old_revision(i->first); + u64 child_node = graph.add_node_for_old_revision(i->second); + graph.add_node_ancestry(child_node, parent_node); + } + } + + global_sanity.set_relaxed(false); + graph.rebuild_ancestry(); +} + + +void +build_changesets_from_manifest_ancestry(app_state & app) +{ + anc_graph graph(false, app); + + P(F("rebuilding revision graph from manifest certs\n")); + std::vector< manifest > tmp; + app.db.get_manifest_certs(cert_name("ancestor"), tmp); + erase_bogus_certs(tmp, app); + + for (std::vector< manifest >::const_iterator i = tmp.begin(); + i != tmp.end(); ++i) + { + cert_value tv; + decode_base64(i->inner().value, tv); + manifest_id child, parent; + child = i->inner().ident; + parent = hexenc(tv()); + + u64 parent_node = graph.add_node_for_old_manifest(parent); + u64 child_node = graph.add_node_for_old_manifest(child); + graph.add_node_ancestry(child_node, parent_node); + } + + graph.rebuild_ancestry(); +} + + +// i/o stuff + +std::string revision_file_name("revision"); + +namespace +{ + namespace syms + { + std::string const old_revision("old_revision"); + std::string const new_manifest("new_manifest"); + std::string const old_manifest("old_manifest"); + } +} + + +void +print_edge(basic_io::printer & printer, + edge_entry const & e) +{ + basic_io::stanza st; + st.push_hex_pair(syms::old_revision, edge_old_revision(e).inner()()); + st.push_hex_pair(syms::old_manifest, edge_old_manifest(e).inner()()); + printer.print_stanza(st); + print_change_set(printer, edge_changes(e)); +} + + +void +print_revision(basic_io::printer & printer, + revision_set const & rev) +{ + rev.check_sane(); + basic_io::stanza st; + st.push_hex_pair(syms::new_manifest, rev.new_manifest.inner()()); + printer.print_stanza(st); + for (edge_map::const_iterator edge = rev.edges.begin(); + edge != rev.edges.end(); ++edge) + print_edge(printer, *edge); +} + + +void +parse_edge(basic_io::parser & parser, + edge_map & es) +{ + change_set cs; + manifest_id old_man; + revision_id old_rev; + std::string tmp; + + parser.esym(syms::old_revision); + parser.hex(tmp); + old_rev = revision_id(tmp); + + parser.esym(syms::old_manifest); + parser.hex(tmp); + old_man = manifest_id(tmp); + + parse_change_set(parser, cs); + + es.insert(std::make_pair(old_rev, std::make_pair(old_man, cs))); +} + + +void +parse_revision(basic_io::parser & parser, + revision_set & rev) +{ + rev.edges.clear(); + std::string tmp; + parser.esym(syms::new_manifest); + parser.hex(tmp); + rev.new_manifest = manifest_id(tmp); + while (parser.symp(syms::old_revision)) + parse_edge(parser, rev.edges); + rev.check_sane(); +} + +void +read_revision_set(data const & dat, + revision_set & rev) +{ + std::istringstream iss(dat()); + basic_io::input_source src(iss, "revision"); + basic_io::tokenizer tok(src); + basic_io::parser pars(tok); + parse_revision(pars, rev); + I(src.lookahead == EOF); + rev.check_sane(); +} + +void +read_revision_set(revision_data const & dat, + revision_set & rev) +{ + data unpacked; + unpack(dat.inner(), unpacked); + read_revision_set(unpacked, rev); + rev.check_sane(); +} + +void +write_revision_set(revision_set const & rev, + data & dat) +{ + rev.check_sane(); + std::ostringstream oss; + basic_io::printer pr(oss); + print_revision(pr, rev); + dat = data(oss.str()); +} + +void +write_revision_set(revision_set const & rev, + revision_data & dat) +{ + rev.check_sane(); + data d; + write_revision_set(rev, d); + base64< gzip > packed; + pack(d, packed); + dat = revision_data(packed); +} + +#ifdef BUILD_UNIT_TESTS +#include "unit_tests.hh" +#include "sanity.hh" + +static void +revision_test() +{ +} + +void +add_revision_tests(test_suite * suite) +{ + I(suite); + suite->add(BOOST_TEST_CASE(&revision_test)); +} + + +#endif // BUILD_UNIT_TESTS ============================================================ --- tests/(minor)_test_a_merge_4/left 8d5a2273e0e3da4aa55ff731e7152a673b63f08a +++ tests/(minor)_test_a_merge_4/left 8d5a2273e0e3da4aa55ff731e7152a673b63f08a @@ -0,0 +1,1351 @@ +// copyright (C) 2004 graydon hoare +// all rights reserved. +// licensed to the public under the terms of the GNU GPL (>= 2) +// see the file COPYING for details + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "basic_io.hh" +#include "change_set.hh" +#include "constants.hh" +#include "numeric_vocab.hh" +#include "revision.hh" +#include "sanity.hh" +#include "transforms.hh" +#include "ui.hh" +#include "vocab.hh" + +void revision_set::check_sane() const +{ + I(!null_id(new_manifest)); + + manifest_map fragment; + for (edge_map::const_iterator i = edges.begin(); i != edges.end(); ++i) + { + change_set const & cs = edge_changes(i); + cs.check_sane(); + if (!global_sanity.relaxed) + { + // null old revisions come with null old manifests + I(!null_id(edge_old_revision(i)) || null_id(edge_old_manifest(i))); + } + for (change_set::delta_map::const_iterator j = cs.deltas.begin(); j != cs.deltas.end(); ++j) + { + manifest_map::const_iterator k = fragment.find(delta_entry_path(j)); + if (k == fragment.end()) + fragment.insert(std::make_pair(delta_entry_path(j), + delta_entry_dst(j))); + else + { + if (!global_sanity.relaxed) + { + I(delta_entry_dst(j) == manifest_entry_id(k)); + } + } + } + } +} + +revision_set::revision_set(revision_set const & other) +{ + other.check_sane(); + new_manifest = other.new_manifest; + edges = other.edges; +} + +revision_set const & +revision_set::operator=(revision_set const & other) +{ + other.check_sane(); + new_manifest = other.new_manifest; + edges = other.edges; + return *this; +} + + +// Traces history back 'depth' levels from 'child_id', ensuring that +// historical information is consistent within this subgraph. +// The child must be in the database already. +// +// "Consistent" currently means that we compose manifests along every path (of +// any length) that terminates at the child, and for each one check that paths +// that should be the same in fact are the same, and that the calculated +// change sets can be applied to the old manifests to create the new +// manifest. +// +// NB: While this function has some invariants in it itself, a lot of its +// purpose is just to exercise all the invariants inside change_set.cc. So +// don't remove those invariants. (As if you needed another reason...) +void +check_sane_history(revision_id const & child_id, + int depth, + database & db) +{ + L(F("Verifying revision %s has sane history (to depth %i)\n") + % child_id % depth); + + typedef boost::shared_ptr shared_cs; + // (ancestor, change_set from ancestor to child) + std::map changesets; + + manifest_id m_child_id; + db.get_revision_manifest(child_id, m_child_id); + manifest_map m_child; + db.get_manifest(m_child_id, m_child); + + std::set frontier; + frontier.insert(child_id); + + while (depth-- > 0) + { + std::set next_frontier; + + for (std::set::const_iterator i = frontier.begin(); + i != frontier.end(); + ++i) + { + revision_id current_id = *i; + revision_set current; + db.get_revision(current_id, current); + // and the parents's manifests to the manifests + // and the change_set's to the parents to the changesets + for (edge_map::const_iterator j = current.edges.begin(); + j != current.edges.end(); + ++j) + { + revision_id old_id = edge_old_revision(j); + manifest_id m_old_id = edge_old_manifest(j); + change_set old_to_current_changes = edge_changes(j); + if (!null_id(old_id)) + next_frontier.insert(old_id); + + L(F("Examining %s -> %s\n") % old_id % child_id); + + // build the change_set + // if + shared_cs old_to_child_changes_p = shared_cs(new change_set); + if (current_id == child_id) + *old_to_child_changes_p = old_to_current_changes; + else + { + shared_cs current_to_child_changes_p; + I(changesets.find(current_id) != changesets.end()); + current_to_child_changes_p = changesets.find(current_id)->second; + concatenate_change_sets(old_to_current_changes, + *current_to_child_changes_p, + *old_to_child_changes_p); + } + + // we have the change_set; now, is it one we've seen before? + if (changesets.find(old_id) != changesets.end()) + { + // If it is, then make sure the paths agree on the + // changeset. + I(*changesets.find(old_id)->second == *old_to_child_changes_p); + } + else + { + // If not, this is the first time we've seen this. + // So store it in the map for later reference: + changesets.insert(std::make_pair(old_id, old_to_child_changes_p)); + // and check that it works: + + manifest_map purported_m_child; + // The null revision has empty manifest, which is the + // default. + if (!null_id(old_id)) + db.get_manifest(m_old_id, purported_m_child); + apply_change_set(*old_to_child_changes_p, purported_m_child); + I(purported_m_child == m_child); + } + } + } + frontier = next_frontier; + } +} + + +// calculating least common ancestors is a delicate thing. +// +// it turns out that we cannot choose the simple "least common ancestor" +// for purposes of a merge, because it is possible that there are two +// equally reachable common ancestors, and this produces ambiguity in the +// merge. the result -- in a pathological case -- is silently accepting one +// set of edits while discarding another; not exactly what you want a +// version control tool to do. +// +// a conservative approximation is what we'll call a "subgraph recurring" +// LCA algorithm. this is somewhat like locating the least common dominator +// node, but not quite. it is actually just a vanilla LCA search, except +// that any time there's a fork (a historical merge looks like a fork from +// our perspective, working backwards from children to parents) it reduces +// the fork to a common parent via a sequence of pairwise recursive calls +// to itself before proceeding. this will always resolve to a common parent +// with no ambiguity, unless it falls off the root of the graph. +// +// unfortunately the subgraph recurring algorithm sometimes goes too far +// back in history -- for example if there is an unambiguous propagate from +// one branch to another, the entire subgraph preceeding the propagate on +// the recipient branch is elided, since it is a merge. +// +// our current hypothesis is that the *exact* condition we're looking for, +// when doing a merge, is the least node which dominates one side of the +// merge and is an ancestor of the other. + +typedef unsigned long ctx; +typedef boost::dynamic_bitset<> bitmap; +typedef boost::shared_ptr shared_bitmap; + +static void +ensure_parents_loaded(ctx child, + std::map & parents, + interner & intern, + app_state & app) +{ + if (parents.find(child) != parents.end()) + return; + + L(F("loading parents for node %d\n") % child); + + std::set imm_parents; + app.db.get_revision_parents(revision_id(intern.lookup(child)), imm_parents); + + // The null revision is not a parent for purposes of finding common + // ancestors. + for (std::set::iterator p = imm_parents.begin(); + p != imm_parents.end(); ++p) + { + if (null_id(*p)) + imm_parents.erase(p); + } + + shared_bitmap bits = shared_bitmap(new bitmap(parents.size())); + + for (std::set::const_iterator p = imm_parents.begin(); + p != imm_parents.end(); ++p) + { + ctx pn = intern.intern(p->inner()()); + L(F("parent %s -> node %d\n") % *p % pn); + if (pn >= bits->size()) + bits->resize(pn+1); + bits->set(pn); + } + + parents.insert(std::make_pair(child, bits)); +} + +static bool +expand_dominators(std::map & parents, + std::map & dominators, + interner & intern, + app_state & app) +{ + bool something_changed = false; + std::vector nodes; + + nodes.reserve(dominators.size()); + + // pass 1, pull out all the node numbers we're going to scan this time around + for (std::map::const_iterator e = dominators.begin(); + e != dominators.end(); ++e) + nodes.push_back(e->first); + + // pass 2, update any of the dominator entries we can + for (std::vector::const_iterator n = nodes.begin(); + n != nodes.end(); ++n) + { + shared_bitmap bits = dominators[*n]; + bitmap saved(*bits); + if (bits->size() <= *n) + bits->resize(*n + 1); + bits->set(*n); + + ensure_parents_loaded(*n, parents, intern, app); + shared_bitmap n_parents = parents[*n]; + + bitmap intersection(bits->size()); + + bool first = true; + for (unsigned long parent = 0; + parent != n_parents->size(); ++parent) + { + if (! n_parents->test(parent)) + continue; + + if (dominators.find(parent) == dominators.end()) + dominators.insert(std::make_pair(parent, + shared_bitmap(new bitmap()))); + shared_bitmap pbits = dominators[parent]; + + if (bits->size() > pbits->size()) + pbits->resize(bits->size()); + + if (pbits->size() > bits->size()) + bits->resize(pbits->size()); + + if (first) + { + intersection = (*pbits); + first = false; + } + else + intersection &= (*pbits); + } + + (*bits) |= intersection; + if (*bits != saved) + something_changed = true; + } + return something_changed; +} + + +static bool +expand_ancestors(std::map & parents, + std::map & ancestors, + interner & intern, + app_state & app) +{ + bool something_changed = false; + std::vector nodes; + + nodes.reserve(ancestors.size()); + + // pass 1, pull out all the node numbers we're going to scan this time around + for (std::map::const_iterator e = ancestors.begin(); + e != ancestors.end(); ++e) + nodes.push_back(e->first); + + // pass 2, update any of the ancestor entries we can + for (std::vector::const_iterator n = nodes.begin(); n != nodes.end(); ++n) + { + shared_bitmap bits = ancestors[*n]; + bitmap saved(*bits); + if (bits->size() <= *n) + bits->resize(*n + 1); + bits->set(*n); + + ensure_parents_loaded(*n, parents, intern, app); + shared_bitmap n_parents = parents[*n]; + for (ctx parent = 0; parent != n_parents->size(); ++parent) + { + if (! n_parents->test(parent)) + continue; + + if (bits->size() <= parent) + bits->resize(parent + 1); + bits->set(parent); + + if (ancestors.find(parent) == ancestors.end()) + ancestors.insert(make_pair(parent, + shared_bitmap(new bitmap()))); + shared_bitmap pbits = ancestors[parent]; + + if (bits->size() > pbits->size()) + pbits->resize(bits->size()); + + if (pbits->size() > bits->size()) + bits->resize(pbits->size()); + + (*bits) |= (*pbits); + } + if (*bits != saved) + something_changed = true; + } + return something_changed; +} + +static bool +find_intersecting_node(bitmap & fst, + bitmap & snd, + interner const & intern, + revision_id & anc) +{ + + if (fst.size() > snd.size()) + snd.resize(fst.size()); + else if (snd.size() > fst.size()) + fst.resize(snd.size()); + + bitmap intersection = fst & snd; + if (intersection.any()) + { + L(F("found %d intersecting nodes\n") % intersection.count()); + for (ctx i = 0; i < intersection.size(); ++i) + { + if (intersection.test(i)) + { + anc = revision_id(intern.lookup(i)); + return true; + } + } + } + return false; +} + +// static void +// dump_bitset_map(std::string const & hdr, +// std::map< ctx, shared_bitmap > const & mm) +// { +// L(F("dumping [%s] (%d entries)\n") % hdr % mm.size()); +// for (std::map< ctx, shared_bitmap >::const_iterator i = mm.begin(); +// i != mm.end(); ++i) +// { +// L(F("dump [%s]: %d -> %s\n") % hdr % i->first % (*(i->second))); +// } +// } + +bool +find_common_ancestor_for_merge(revision_id const & left, + revision_id const & right, + revision_id & anc, + app_state & app) +{ + interner intern; + std::map< ctx, shared_bitmap > + parents, ancestors, dominators; + + ctx ln = intern.intern(left.inner()()); + ctx rn = intern.intern(right.inner()()); + + shared_bitmap lanc = shared_bitmap(new bitmap()); + shared_bitmap ranc = shared_bitmap(new bitmap()); + shared_bitmap ldom = shared_bitmap(new bitmap()); + shared_bitmap rdom = shared_bitmap(new bitmap()); + + ancestors.insert(make_pair(ln, lanc)); + ancestors.insert(make_pair(rn, ranc)); + dominators.insert(make_pair(ln, ldom)); + dominators.insert(make_pair(rn, rdom)); + + L(F("searching for common ancestor, left=%s right=%s\n") % left % right); + + while (expand_ancestors(parents, ancestors, intern, app) || + expand_dominators(parents, dominators, intern, app)) + { + L(F("common ancestor scan [par=%d,anc=%d,dom=%d]\n") % + parents.size() % ancestors.size() % dominators.size()); + + if (find_intersecting_node(*lanc, *rdom, intern, anc)) + { + L(F("found node %d, ancestor of left %s and dominating right %s\n") + % anc % left % right); + return true; + } + + else if (find_intersecting_node(*ranc, *ldom, intern, anc)) + { + L(F("found node %d, ancestor of right %s and dominating left %s\n") + % anc % right % left); + return true; + } + } +// dump_bitset_map("ancestors", ancestors); +// dump_bitset_map("dominators", dominators); +// dump_bitset_map("parents", parents); + return false; +} + + +bool +find_least_common_ancestor(revision_id const & left, + revision_id const & right, + revision_id & anc, + app_state & app) +{ + interner intern; + std::map< ctx, shared_bitmap > + parents, ancestors; + + ctx ln = intern.intern(left.inner()()); + ctx rn = intern.intern(right.inner()()); + + shared_bitmap lanc = shared_bitmap(new bitmap()); + shared_bitmap ranc = shared_bitmap(new bitmap()); + + ancestors.insert(make_pair(ln, lanc)); + ancestors.insert(make_pair(rn, ranc)); + + L(F("searching for least common ancestor, left=%s right=%s\n") % left % right); + + while (expand_ancestors(parents, ancestors, intern, app)) + { + L(F("least common ancestor scan [par=%d,anc=%d]\n") % + parents.size() % ancestors.size()); + + if (find_intersecting_node(*lanc, *ranc, intern, anc)) + { + L(F("found node %d, ancestor of left %s and right %s\n") + % anc % left % right); + return true; + } + } +// dump_bitset_map("ancestors", ancestors); +// dump_bitset_map("parents", parents); + return false; +} + + +// FIXME: this algorithm is incredibly inefficient; it's O(n) where n is the +// size of the entire revision graph. + +static bool +is_ancestor(revision_id const & ancestor_id, + revision_id const & descendent_id, + std::multimap const & graph) +{ + + std::set visited; + std::queue queue; + + queue.push(ancestor_id); + + while (!queue.empty()) + { + revision_id current_id = queue.front(); + queue.pop(); + + if (current_id == descendent_id) + return true; + else + { + typedef std::multimap::const_iterator gi; + std::pair children = graph.equal_range(current_id); + for (gi i = children.first; i != children.second; ++i) + { + if (visited.find(i->second) == visited.end()) + { + queue.push(i->second); + visited.insert(i->second); + } + } + } + } + return false; +} + +bool +is_ancestor(revision_id const & ancestor_id, + revision_id const & descendent_id, + app_state & app) +{ + L(F("checking whether %s is an ancestor of %s\n") % ancestor_id % descendent_id); + + std::multimap graph; + app.db.get_revision_ancestry(graph); + return is_ancestor(ancestor_id, descendent_id, graph); +} + + +static void +add_bitset_to_union(shared_bitmap src, + shared_bitmap dst) +{ + if (dst->size() > src->size()) + src->resize(dst->size()); + if (src->size() > dst->size()) + dst->resize(src->size()); + *dst |= *src; +} + + +static void +calculate_ancestors_from_graph(interner & intern, + revision_id const & init, + std::set const & legal, + std::multimap const & graph, + std::map< ctx, shared_bitmap > & ancestors, + shared_bitmap & total_union) +{ + typedef std::multimap::const_iterator gi; + std::stack stk; + + stk.push(intern.intern(init.inner()())); + + while (! stk.empty()) + { + ctx us = stk.top(); + revision_id rev(hexenc(intern.lookup(us))); + + std::pair parents = graph.equal_range(rev); + bool pushed = false; + + // first make sure all parents are done + for (gi i = parents.first; i != parents.second; ++i) + { + ctx parent = intern.intern(i->second.inner()()); + if (ancestors.find(parent) == ancestors.end()) + { + stk.push(parent); + pushed = true; + break; + } + } + + // if we pushed anything we stop now. we'll come back later when all + // the parents are done. + if (pushed) + continue; + + shared_bitmap b = shared_bitmap(new bitmap()); + + for (gi i = parents.first; i != parents.second; ++i) + { + ctx parent = intern.intern(i->second.inner()()); + + // set any parent which is a member of the underlying legal set + if (legal.find(i->second) != legal.end()) + { + if (b->size() <= parent) + b->resize(parent + 1); + b->set(parent); + } + + // ensure all parents are loaded into the ancestor map + I(ancestors.find(parent) != ancestors.end()); + + // union them into our map + std::map< ctx, shared_bitmap >::const_iterator j = ancestors.find(parent); + I(j != ancestors.end()); + add_bitset_to_union(j->second, b); + } + + add_bitset_to_union(b, total_union); + ancestors.insert(std::make_pair(us, b)); + stk.pop(); + } +} + +// This function looks at a set of revisions, and for every pair A, B in that +// set such that A is an ancestor of B, it erases A. + +void +erase_ancestors(std::set & revisions, app_state & app) +{ + typedef std::multimap::const_iterator gi; + std::multimap graph; + std::multimap inverse_graph; + + app.db.get_revision_ancestry(graph); + for (gi i = graph.begin(); i != graph.end(); ++i) + inverse_graph.insert(std::make_pair(i->second, i->first)); + + interner intern; + std::map< ctx, shared_bitmap > ancestors; + + shared_bitmap u = shared_bitmap(new bitmap()); + + for (std::set::const_iterator i = revisions.begin(); + i != revisions.end(); ++i) + { + calculate_ancestors_from_graph(intern, *i, revisions, + inverse_graph, ancestors, u); + } + + std::set tmp; + for (std::set::const_iterator i = revisions.begin(); + i != revisions.end(); ++i) + { + ctx id = intern.intern(i->inner()()); + bool has_ancestor_in_set = id < u->size() && u->test(id); + if (!has_ancestor_in_set) + tmp.insert(*i); + } + + revisions = tmp; +} + +// +// The idea with this algorithm is to walk from child up to ancestor, +// recursively, accumulating all the change_sets associated with +// intermediate nodes into *one big change_set*. +// +// clever readers will realize this is an overlapping-subproblem type +// situation and thus needs to keep a dynamic programming map to keep +// itself in linear complexity. +// +// in fact, we keep two: one which maps to computed results (partial_csets) +// and one which just keeps a set of all nodes we traversed +// (visited_nodes). in theory it could be one map with an extra bool stuck +// on each entry, but I think that would make it even less readable. it's +// already quite ugly. +// + +static bool +calculate_change_sets_recursive(revision_id const & ancestor, + revision_id const & child, + app_state & app, + change_set & cumulative_cset, + std::map > & partial_csets, + std::set & visited_nodes, + std::set const & subgraph) +{ + + if (ancestor == child) + return true; + + if (subgraph.find(child) == subgraph.end()) + return false; + + visited_nodes.insert(child); + + bool relevant_child = false; + + revision_set rev; + app.db.get_revision(child, rev); + + L(F("exploring changesets from parents of %s, seeking towards %s\n") + % child % ancestor); + + for(edge_map::const_iterator i = rev.edges.begin(); i != rev.edges.end(); ++i) + { + bool relevant_parent = false; + revision_id curr_parent = edge_old_revision(i); + + if (curr_parent.inner()().empty()) + continue; + + change_set cset_to_curr_parent; + + L(F("considering parent %s of %s\n") % curr_parent % child); + + std::map >::const_iterator j = + partial_csets.find(curr_parent); + if (j != partial_csets.end()) + { + // a recursive call has traversed this parent before and built an + // existing cset. just reuse that rather than re-traversing + cset_to_curr_parent = *(j->second); + relevant_parent = true; + } + else if (visited_nodes.find(curr_parent) != visited_nodes.end()) + { + // a recursive call has traversed this parent, but there was no + // path from it to the root, so the parent is irrelevant. skip. + relevant_parent = false; + } + else + relevant_parent = calculate_change_sets_recursive(ancestor, curr_parent, app, + cset_to_curr_parent, + partial_csets, + visited_nodes, + subgraph); + + if (relevant_parent) + { + L(F("revision %s is relevant, composing with edge to %s\n") + % curr_parent % child); + concatenate_change_sets(cset_to_curr_parent, edge_changes(i), cumulative_cset); + relevant_child = true; + break; + } + else + L(F("parent %s of %s is not relevant\n") % curr_parent % child); + } + + // store the partial edge from ancestor -> child, so that if anyone + // re-traverses this edge they'll just fetch from the partial_edges + // cache. + if (relevant_child) + partial_csets.insert(std::make_pair(child, + boost::shared_ptr + (new change_set(cumulative_cset)))); + + return relevant_child; +} + +// this finds (by breadth-first search) the set of nodes you'll have to +// walk over in calculate_change_sets_recursive, to build the composite +// changeset. this is to prevent the recursive algorithm from going way +// back in history on an unlucky guess of parent. + +static void +find_subgraph_for_composite_search(revision_id const & ancestor, + revision_id const & child, + app_state & app, + std::set & subgraph) +{ + std::set frontier; + frontier.insert(child); + subgraph.insert(child); + while (!frontier.empty()) + { + std::set next_frontier; + for (std::set::const_iterator i = frontier.begin(); + i != frontier.end(); ++i) + { + revision_set rev; + app.db.get_revision(*i, rev); + L(F("adding parents of %s to subgraph\n") % *i); + + for(edge_map::const_iterator j = rev.edges.begin(); j != rev.edges.end(); ++j) + { + revision_id curr_parent = edge_old_revision(j); + if (null_id(curr_parent)) + continue; + subgraph.insert(curr_parent); + if (curr_parent == ancestor) + { + L(F("found parent %s of %s\n") % curr_parent % *i); + return; + } + else + L(F("adding parent %s to next frontier\n") % curr_parent); + next_frontier.insert(curr_parent); + } + } + frontier = next_frontier; + } +} + +void +calculate_composite_change_set(revision_id const & ancestor, + revision_id const & child, + app_state & app, + change_set & composed) +{ + L(F("calculating composite changeset between %s and %s\n") + % ancestor % child); + std::set visited; + std::set subgraph; + std::map > partial; + find_subgraph_for_composite_search(ancestor, child, app, subgraph); + calculate_change_sets_recursive(ancestor, child, app, composed, partial, + visited, subgraph); +} + + +// Stuff related to rebuilding the revision graph. Unfortunately this is a +// real enough error case that we need support code for it. + + +static void +analyze_manifest_changes(app_state & app, + manifest_id const & parent, + manifest_id const & child, + change_set & cs) +{ + manifest_map m_parent, m_child; + + if (!null_id(parent)) + app.db.get_manifest(parent, m_parent); + + I(!null_id(child)); + app.db.get_manifest(child, m_child); + + L(F("analyzing manifest changes from '%s' -> '%s'\n") % parent % child); + + for (manifest_map::const_iterator i = m_parent.begin(); + i != m_parent.end(); ++i) + { + manifest_map::const_iterator j = m_child.find(manifest_entry_path(i)); + if (j == m_child.end()) + cs.delete_file(manifest_entry_path(i)); + else if (! (manifest_entry_id(i) == manifest_entry_id(j))) + { + cs.apply_delta(manifest_entry_path(i), + manifest_entry_id(i), + manifest_entry_id(j)); + } + } + for (manifest_map::const_iterator i = m_child.begin(); + i != m_child.end(); ++i) + { + manifest_map::const_iterator j = m_parent.find(manifest_entry_path(i)); + if (j == m_parent.end()) + cs.add_file(manifest_entry_path(i), + manifest_entry_id(i)); + } +} + + +struct anc_graph +{ + anc_graph(bool existing, app_state & a) : + existing_graph(existing), + app(a), + max_node(0), + n_nodes("nodes", "n", 1), + n_certs_in("certs in", "c", 1), + n_revs_out("revs out", "r", 1), + n_certs_out("certs out", "C", 1) + {} + + bool existing_graph; + app_state & app; + u64 max_node; + + ticker n_nodes; + ticker n_certs_in; + ticker n_revs_out; + ticker n_certs_out; + + std::map node_to_old_man; + std::map old_man_to_node; + + std::map node_to_old_rev; + std::map old_rev_to_node; + + std::map node_to_new_rev; + std::multimap > certs; + std::multimap ancestry; + + void add_node_ancestry(u64 child, u64 parent); + void write_certs(); + void rebuild_ancestry(); + void get_node_manifest(u64 node, manifest_id & man); + u64 add_node_for_old_manifest(manifest_id const & man); + u64 add_node_for_old_revision(revision_id const & rev); + revision_id construct_revision_from_ancestry(u64 child); +}; + + +void anc_graph::add_node_ancestry(u64 child, u64 parent) +{ + L(F("noting ancestry from child %d -> parent %d\n") % child % parent); + ancestry.insert(std::make_pair(child, parent)); +} + +void anc_graph::get_node_manifest(u64 node, manifest_id & man) +{ + std::map::const_iterator i = node_to_old_man.find(node); + I(i != node_to_old_man.end()); + man = i->second; +} + +void anc_graph::write_certs() +{ + std::set cnames; + cnames.insert(cert_name(branch_cert_name)); + cnames.insert(cert_name(date_cert_name)); + cnames.insert(cert_name(author_cert_name)); + cnames.insert(cert_name(tag_cert_name)); + cnames.insert(cert_name(changelog_cert_name)); + cnames.insert(cert_name(comment_cert_name)); + cnames.insert(cert_name(testresult_cert_name)); + + typedef std::multimap >::const_iterator ci; + + for (std::map::const_iterator i = node_to_new_rev.begin(); + i != node_to_new_rev.end(); ++i) + { + revision_id rev(i->second); + + std::pair range = certs.equal_range(i->first); + + for (ci j = range.first; j != range.second; ++j) + { + cert_name name(j->second.first); + cert_value val(j->second.second); + + if (cnames.find(name) == cnames.end()) + continue; + + cert new_cert; + make_simple_cert(rev.inner(), name, val, app, new_cert); + revision rcert(new_cert); + if (! app.db.revision_cert_exists(rcert)) + { + ++n_certs_out; + app.db.put_revision_cert(rcert); + } + } + } +} + +void +anc_graph::rebuild_ancestry() +{ + P(F("rebuilding %d nodes\n") % max_node); + { + transaction_guard guard(app.db); + if (existing_graph) + app.db.delete_existing_revs_and_certs(); + + std::set parents, children, heads; + for (std::multimap::const_iterator i = ancestry.begin(); + i != ancestry.end(); ++i) + { + children.insert(i->first); + parents.insert(i->second); + } + set_difference(children.begin(), children.end(), + parents.begin(), parents.end(), + std::inserter(heads, heads.begin())); + + // FIXME: should do a depth-first traversal here, or something like, + // instead of being recursive. + for (std::set::const_iterator i = heads.begin(); + i != heads.end(); ++i) + { + construct_revision_from_ancestry(*i); + } + write_certs(); + guard.commit(); + } +} + +u64 anc_graph::add_node_for_old_manifest(manifest_id const & man) +{ + I(!existing_graph); + u64 node = 0; + if (old_man_to_node.find(man) == old_man_to_node.end()) + { + node = max_node++; + ++n_nodes; + L(F("node %d = manifest %s\n") % node % man); + old_man_to_node.insert(std::make_pair(man, node)); + node_to_old_man.insert(std::make_pair(node, man)); + + // load certs + std::vector< manifest > mcerts; + app.db.get_manifest_certs(man, mcerts); + erase_bogus_certs(mcerts, app); + for(std::vector< manifest >::const_iterator i = mcerts.begin(); + i != mcerts.end(); ++i) + { + L(F("loaded '%s' manifest cert for node %s\n") % i->inner().name % node); + cert_value tv; + decode_base64(i->inner().value, tv); + ++n_certs_in; + certs.insert(std::make_pair(node, + std::make_pair(i->inner().name, tv))); + } + } + else + { + node = old_man_to_node[man]; + } + return node; +} + +u64 anc_graph::add_node_for_old_revision(revision_id const & rev) +{ + I(existing_graph); + I(!null_id(rev)); + u64 node = 0; + if (old_rev_to_node.find(rev) == old_rev_to_node.end()) + { + node = max_node++; + ++n_nodes; + + manifest_id man; + app.db.get_revision_manifest(rev, man); + + L(F("node %d = revision %s = manifest %s\n") % node % rev % man); + old_rev_to_node.insert(std::make_pair(rev, node)); + node_to_old_rev.insert(std::make_pair(node, rev)); + node_to_old_man.insert(std::make_pair(node, man)); + + // load certs + std::vector< revision > rcerts; + app.db.get_revision_certs(rev, rcerts); + erase_bogus_certs(rcerts, app); + for(std::vector< revision >::const_iterator i = rcerts.begin(); + i != rcerts.end(); ++i) + { + L(F("loaded '%s' revision cert for node %s\n") % i->inner().name % node); + cert_value tv; + decode_base64(i->inner().value, tv); + ++n_certs_in; + certs.insert(std::make_pair(node, + std::make_pair(i->inner().name, tv))); + } + } + else + { + node = old_rev_to_node[rev]; + } + + return node; +} + +// FIXME: this is recursive -- stack depth grows as ancestry depth -- and will +// overflow the stack on large histories. +revision_id +anc_graph::construct_revision_from_ancestry(u64 child) +{ + L(F("processing node %d\n") % child); + + if (node_to_new_rev.find(child) != node_to_new_rev.end()) + { + L(F("node %d already processed, skipping\n") % child); + return node_to_new_rev.find(child)->second; + } + + manifest_id child_man; + get_node_manifest(child, child_man); + + revision_set rev; + rev.new_manifest = child_man; + + typedef std::multimap::const_iterator ci; + std::pair range = ancestry.equal_range(child); + if (range.first == range.second) + { + L(F("node %d is a root node\n") % child); + revision_id null_rid; + manifest_id null_mid; + boost::shared_ptr cs(new change_set()); + analyze_manifest_changes(app, null_mid, child_man, *cs); + rev.edges.insert(std::make_pair(null_rid, + std::make_pair(null_mid, cs))); + } + else + { + for (ci i = range.first; i != range.second; ++i) + { + I(child == i->first); + u64 parent(i->second); + L(F("processing edge from child %d -> parent %d\n") % child % parent); + + revision_id parent_rid; + std::map::const_iterator + j = node_to_new_rev.find(parent); + + if (j != node_to_new_rev.end()) + parent_rid = j->second; + else + { + parent_rid = construct_revision_from_ancestry(parent); + node_to_new_rev.insert(std::make_pair(parent, parent_rid)); + } + + L(F("parent node %d = revision %s\n") % parent % parent_rid); + manifest_id parent_man; + get_node_manifest(parent, parent_man); + boost::shared_ptr cs(new change_set()); + analyze_manifest_changes(app, parent_man, child_man, *cs); + rev.edges.insert(std::make_pair(parent_rid, + std::make_pair(parent_man, cs))); + } + } + + revision_id rid; + calculate_ident(rev, rid); + node_to_new_rev.insert(std::make_pair(child, rid)); + + if (!app.db.revision_exists (rid)) + { + L(F("mapped node %d to revision %s\n") % child % rid); + app.db.put_revision(rid, rev); + ++n_revs_out; + } + else + { + L(F("skipping already existing revision %s\n") % rid); + } + + return rid; +} + +void +build_changesets_from_existing_revs(app_state & app) +{ + global_sanity.set_relaxed(true); + anc_graph graph(true, app); + + P(F("rebuilding revision graph from existing graph\n")); + std::multimap existing_graph; + + app.db.get_revision_ancestry(existing_graph); + for (std::multimap::const_iterator i = existing_graph.begin(); + i != existing_graph.end(); ++i) + { + if (!null_id(i->first)) + { + u64 parent_node = graph.add_node_for_old_revision(i->first); + u64 child_node = graph.add_node_for_old_revision(i->second); + graph.add_node_ancestry(child_node, parent_node); + } + } + + global_sanity.set_relaxed(false); + graph.rebuild_ancestry(); +} + + +void +build_changesets_from_manifest_ancestry(app_state & app) +{ + anc_graph graph(false, app); + + P(F("rebuilding revision graph from manifest certs\n")); + std::vector< manifest > tmp; + app.db.get_manifest_certs(cert_name("ancestor"), tmp); + erase_bogus_certs(tmp, app); + + for (std::vector< manifest >::const_iterator i = tmp.begin(); + i != tmp.end(); ++i) + { + cert_value tv; + decode_base64(i->inner().value, tv); + manifest_id child, parent; + child = i->inner().ident; + parent = hexenc(tv()); + + u64 parent_node = graph.add_node_for_old_manifest(parent); + u64 child_node = graph.add_node_for_old_manifest(child); + graph.add_node_ancestry(child_node, parent_node); + } + + graph.rebuild_ancestry(); +} + + +// i/o stuff + +std::string revision_file_name("revision"); + +namespace +{ + namespace syms + { + std::string const old_revision("old_revision"); + std::string const new_manifest("new_manifest"); + std::string const old_manifest("old_manifest"); + } +} + + +void +print_edge(basic_io::printer & printer, + edge_entry const & e) +{ + basic_io::stanza st; + st.push_hex_pair(syms::old_revision, edge_old_revision(e).inner()()); + st.push_hex_pair(syms::old_manifest, edge_old_manifest(e).inner()()); + printer.print_stanza(st); + print_change_set(printer, edge_changes(e)); +} + + +void +print_revision(basic_io::printer & printer, + revision_set const & rev) +{ + rev.check_sane(); + basic_io::stanza st; + st.push_hex_pair(syms::new_manifest, rev.new_manifest.inner()()); + printer.print_stanza(st); + for (edge_map::const_iterator edge = rev.edges.begin(); + edge != rev.edges.end(); ++edge) + print_edge(printer, *edge); +} + + +void +parse_edge(basic_io::parser & parser, + edge_map & es) +{ + boost::shared_ptr cs(new change_set()); + manifest_id old_man; + revision_id old_rev; + std::string tmp; + + parser.esym(syms::old_revision); + parser.hex(tmp); + old_rev = revision_id(tmp); + + parser.esym(syms::old_manifest); + parser.hex(tmp); + old_man = manifest_id(tmp); + + parse_change_set(parser, *cs); + + es.insert(std::make_pair(old_rev, std::make_pair(old_man, cs))); +} + + +void +parse_revision(basic_io::parser & parser, + revision_set & rev) +{ + rev.edges.clear(); + std::string tmp; + parser.esym(syms::new_manifest); + parser.hex(tmp); + rev.new_manifest = manifest_id(tmp); + while (parser.symp(syms::old_revision)) + parse_edge(parser, rev.edges); + rev.check_sane(); +} + +void +read_revision_set(data const & dat, + revision_set & rev) +{ + std::istringstream iss(dat()); + basic_io::input_source src(iss, "revision"); + basic_io::tokenizer tok(src); + basic_io::parser pars(tok); + parse_revision(pars, rev); + I(src.lookahead == EOF); + rev.check_sane(); +} + +void +read_revision_set(revision_data const & dat, + revision_set & rev) +{ + data unpacked; + unpack(dat.inner(), unpacked); + read_revision_set(unpacked, rev); + rev.check_sane(); +} + +void +write_revision_set(revision_set const & rev, + data & dat) +{ + rev.check_sane(); + std::ostringstream oss; + basic_io::printer pr(oss); + print_revision(pr, rev); + dat = data(oss.str()); +} + +void +write_revision_set(revision_set const & rev, + revision_data & dat) +{ + rev.check_sane(); + data d; + write_revision_set(rev, d); + base64< gzip > packed; + pack(d, packed); + dat = revision_data(packed); +} + +#ifdef BUILD_UNIT_TESTS +#include "unit_tests.hh" +#include "sanity.hh" + +static void +revision_test() +{ +} + +void +add_revision_tests(test_suite * suite) +{ + I(suite); + suite->add(BOOST_TEST_CASE(&revision_test)); +} + + +#endif // BUILD_UNIT_TESTS ============================================================ --- tests/(minor)_test_a_merge_4/merge.diff3 8d5a2273e0e3da4aa55ff731e7152a673b63f08a +++ tests/(minor)_test_a_merge_4/merge.diff3 8d5a2273e0e3da4aa55ff731e7152a673b63f08a @@ -0,0 +1,1351 @@ +// copyright (C) 2004 graydon hoare +// all rights reserved. +// licensed to the public under the terms of the GNU GPL (>= 2) +// see the file COPYING for details + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "basic_io.hh" +#include "change_set.hh" +#include "constants.hh" +#include "numeric_vocab.hh" +#include "revision.hh" +#include "sanity.hh" +#include "transforms.hh" +#include "ui.hh" +#include "vocab.hh" + +void revision_set::check_sane() const +{ + I(!null_id(new_manifest)); + + manifest_map fragment; + for (edge_map::const_iterator i = edges.begin(); i != edges.end(); ++i) + { + change_set const & cs = edge_changes(i); + cs.check_sane(); + if (!global_sanity.relaxed) + { + // null old revisions come with null old manifests + I(!null_id(edge_old_revision(i)) || null_id(edge_old_manifest(i))); + } + for (change_set::delta_map::const_iterator j = cs.deltas.begin(); j != cs.deltas.end(); ++j) + { + manifest_map::const_iterator k = fragment.find(delta_entry_path(j)); + if (k == fragment.end()) + fragment.insert(std::make_pair(delta_entry_path(j), + delta_entry_dst(j))); + else + { + if (!global_sanity.relaxed) + { + I(delta_entry_dst(j) == manifest_entry_id(k)); + } + } + } + } +} + +revision_set::revision_set(revision_set const & other) +{ + other.check_sane(); + new_manifest = other.new_manifest; + edges = other.edges; +} + +revision_set const & +revision_set::operator=(revision_set const & other) +{ + other.check_sane(); + new_manifest = other.new_manifest; + edges = other.edges; + return *this; +} + + +// Traces history back 'depth' levels from 'child_id', ensuring that +// historical information is consistent within this subgraph. +// The child must be in the database already. +// +// "Consistent" currently means that we compose manifests along every path (of +// any length) that terminates at the child, and for each one check that paths +// that should be the same in fact are the same, and that the calculated +// change sets can be applied to the old manifests to create the new +// manifest. +// +// NB: While this function has some invariants in it itself, a lot of its +// purpose is just to exercise all the invariants inside change_set.cc. So +// don't remove those invariants. (As if you needed another reason...) +void +check_sane_history(revision_id const & child_id, + int depth, + database & db) +{ + L(F("Verifying revision %s has sane history (to depth %i)\n") + % child_id % depth); + + typedef boost::shared_ptr shared_cs; + // (ancestor, change_set from ancestor to child) + std::map changesets; + + manifest_id m_child_id; + db.get_revision_manifest(child_id, m_child_id); + manifest_map m_child; + db.get_manifest(m_child_id, m_child); + + std::set frontier; + frontier.insert(child_id); + + while (depth-- > 0) + { + std::set next_frontier; + + for (std::set::const_iterator i = frontier.begin(); + i != frontier.end(); + ++i) + { + revision_id current_id = *i; + revision_set current; + db.get_revision(current_id, current); + // and the parents's manifests to the manifests + // and the change_set's to the parents to the changesets + for (edge_map::const_iterator j = current.edges.begin(); + j != current.edges.end(); + ++j) + { + revision_id old_id = edge_old_revision(j); + manifest_id m_old_id = edge_old_manifest(j); + change_set old_to_current_changes = edge_changes(j); + if (!null_id(old_id)) + next_frontier.insert(old_id); + + L(F("Examining %s -> %s\n") % old_id % child_id); + + // build the change_set + // if + shared_cs old_to_child_changes_p = shared_cs(new change_set); + if (current_id == child_id) + *old_to_child_changes_p = old_to_current_changes; + else + { + shared_cs current_to_child_changes_p; + I(changesets.find(current_id) != changesets.end()); + current_to_child_changes_p = changesets.find(current_id)->second; + concatenate_change_sets(old_to_current_changes, + *current_to_child_changes_p, + *old_to_child_changes_p); + } + + // we have the change_set; now, is it one we've seen before? + if (changesets.find(old_id) != changesets.end()) + { + // If it is, then make sure the paths agree on the + // changeset. + I(*changesets.find(old_id)->second == *old_to_child_changes_p); + } + else + { + // If not, this is the first time we've seen this. + // So store it in the map for later reference: + changesets.insert(std::make_pair(old_id, old_to_child_changes_p)); + // and check that it works: + + manifest_map purported_m_child; + // The null revision has empty manifest, which is the + // default. + if (!null_id(old_id)) + db.get_manifest(m_old_id, purported_m_child); + apply_change_set(*old_to_child_changes_p, purported_m_child); + I(purported_m_child == m_child); + } + } + } + frontier = next_frontier; + } +} + + +// calculating least common ancestors is a delicate thing. +// +// it turns out that we cannot choose the simple "least common ancestor" +// for purposes of a merge, because it is possible that there are two +// equally reachable common ancestors, and this produces ambiguity in the +// merge. the result -- in a pathological case -- is silently accepting one +// set of edits while discarding another; not exactly what you want a +// version control tool to do. +// +// a conservative approximation is what we'll call a "subgraph recurring" +// LCA algorithm. this is somewhat like locating the least common dominator +// node, but not quite. it is actually just a vanilla LCA search, except +// that any time there's a fork (a historical merge looks like a fork from +// our perspective, working backwards from children to parents) it reduces +// the fork to a common parent via a sequence of pairwise recursive calls +// to itself before proceeding. this will always resolve to a common parent +// with no ambiguity, unless it falls off the root of the graph. +// +// unfortunately the subgraph recurring algorithm sometimes goes too far +// back in history -- for example if there is an unambiguous propagate from +// one branch to another, the entire subgraph preceeding the propagate on +// the recipient branch is elided, since it is a merge. +// +// our current hypothesis is that the *exact* condition we're looking for, +// when doing a merge, is the least node which dominates one side of the +// merge and is an ancestor of the other. + +typedef unsigned long ctx; +typedef boost::dynamic_bitset<> bitmap; +typedef boost::shared_ptr shared_bitmap; + +static void +ensure_parents_loaded(ctx child, + std::map & parents, + interner & intern, + app_state & app) +{ + if (parents.find(child) != parents.end()) + return; + + L(F("loading parents for node %d\n") % child); + + std::set imm_parents; + app.db.get_revision_parents(revision_id(intern.lookup(child)), imm_parents); + + // The null revision is not a parent for purposes of finding common + // ancestors. + for (std::set::iterator p = imm_parents.begin(); + p != imm_parents.end(); ++p) + { + if (null_id(*p)) + imm_parents.erase(p); + } + + shared_bitmap bits = shared_bitmap(new bitmap(parents.size())); + + for (std::set::const_iterator p = imm_parents.begin(); + p != imm_parents.end(); ++p) + { + ctx pn = intern.intern(p->inner()()); + L(F("parent %s -> node %d\n") % *p % pn); + if (pn >= bits->size()) + bits->resize(pn+1); + bits->set(pn); + } + + parents.insert(std::make_pair(child, bits)); +} + +static bool +expand_dominators(std::map & parents, + std::map & dominators, + interner & intern, + app_state & app) +{ + bool something_changed = false; + std::vector nodes; + + nodes.reserve(dominators.size()); + + // pass 1, pull out all the node numbers we're going to scan this time around + for (std::map::const_iterator e = dominators.begin(); + e != dominators.end(); ++e) + nodes.push_back(e->first); + + // pass 2, update any of the dominator entries we can + for (std::vector::const_iterator n = nodes.begin(); + n != nodes.end(); ++n) + { + shared_bitmap bits = dominators[*n]; + bitmap saved(*bits); + if (bits->size() <= *n) + bits->resize(*n + 1); + bits->set(*n); + + ensure_parents_loaded(*n, parents, intern, app); + shared_bitmap n_parents = parents[*n]; + + bitmap intersection(bits->size()); + + bool first = true; + for (unsigned long parent = 0; + parent != n_parents->size(); ++parent) + { + if (! n_parents->test(parent)) + continue; + + if (dominators.find(parent) == dominators.end()) + dominators.insert(std::make_pair(parent, + shared_bitmap(new bitmap()))); + shared_bitmap pbits = dominators[parent]; + + if (bits->size() > pbits->size()) + pbits->resize(bits->size()); + + if (pbits->size() > bits->size()) + bits->resize(pbits->size()); + + if (first) + { + intersection = (*pbits); + first = false; + } + else + intersection &= (*pbits); + } + + (*bits) |= intersection; + if (*bits != saved) + something_changed = true; + } + return something_changed; +} + + +static bool +expand_ancestors(std::map & parents, + std::map & ancestors, + interner & intern, + app_state & app) +{ + bool something_changed = false; + std::vector nodes; + + nodes.reserve(ancestors.size()); + + // pass 1, pull out all the node numbers we're going to scan this time around + for (std::map::const_iterator e = ancestors.begin(); + e != ancestors.end(); ++e) + nodes.push_back(e->first); + + // pass 2, update any of the ancestor entries we can + for (std::vector::const_iterator n = nodes.begin(); n != nodes.end(); ++n) + { + shared_bitmap bits = ancestors[*n]; + bitmap saved(*bits); + if (bits->size() <= *n) + bits->resize(*n + 1); + bits->set(*n); + + ensure_parents_loaded(*n, parents, intern, app); + shared_bitmap n_parents = parents[*n]; + for (ctx parent = 0; parent != n_parents->size(); ++parent) + { + if (! n_parents->test(parent)) + continue; + + if (bits->size() <= parent) + bits->resize(parent + 1); + bits->set(parent); + + if (ancestors.find(parent) == ancestors.end()) + ancestors.insert(make_pair(parent, + shared_bitmap(new bitmap()))); + shared_bitmap pbits = ancestors[parent]; + + if (bits->size() > pbits->size()) + pbits->resize(bits->size()); + + if (pbits->size() > bits->size()) + bits->resize(pbits->size()); + + (*bits) |= (*pbits); + } + if (*bits != saved) + something_changed = true; + } + return something_changed; +} + +static bool +find_intersecting_node(bitmap & fst, + bitmap & snd, + interner const & intern, + revision_id & anc) +{ + + if (fst.size() > snd.size()) + snd.resize(fst.size()); + else if (snd.size() > fst.size()) + fst.resize(snd.size()); + + bitmap intersection = fst & snd; + if (intersection.any()) + { + L(F("found %d intersecting nodes\n") % intersection.count()); + for (ctx i = 0; i < intersection.size(); ++i) + { + if (intersection.test(i)) + { + anc = revision_id(intern.lookup(i)); + return true; + } + } + } + return false; +} + +// static void +// dump_bitset_map(std::string const & hdr, +// std::map< ctx, shared_bitmap > const & mm) +// { +// L(F("dumping [%s] (%d entries)\n") % hdr % mm.size()); +// for (std::map< ctx, shared_bitmap >::const_iterator i = mm.begin(); +// i != mm.end(); ++i) +// { +// L(F("dump [%s]: %d -> %s\n") % hdr % i->first % (*(i->second))); +// } +// } + +bool +find_common_ancestor_for_merge(revision_id const & left, + revision_id const & right, + revision_id & anc, + app_state & app) +{ + interner intern; + std::map< ctx, shared_bitmap > + parents, ancestors, dominators; + + ctx ln = intern.intern(left.inner()()); + ctx rn = intern.intern(right.inner()()); + + shared_bitmap lanc = shared_bitmap(new bitmap()); + shared_bitmap ranc = shared_bitmap(new bitmap()); + shared_bitmap ldom = shared_bitmap(new bitmap()); + shared_bitmap rdom = shared_bitmap(new bitmap()); + + ancestors.insert(make_pair(ln, lanc)); + ancestors.insert(make_pair(rn, ranc)); + dominators.insert(make_pair(ln, ldom)); + dominators.insert(make_pair(rn, rdom)); + + L(F("searching for common ancestor, left=%s right=%s\n") % left % right); + + while (expand_ancestors(parents, ancestors, intern, app) || + expand_dominators(parents, dominators, intern, app)) + { + L(F("common ancestor scan [par=%d,anc=%d,dom=%d]\n") % + parents.size() % ancestors.size() % dominators.size()); + + if (find_intersecting_node(*lanc, *rdom, intern, anc)) + { + L(F("found node %d, ancestor of left %s and dominating right %s\n") + % anc % left % right); + return true; + } + + else if (find_intersecting_node(*ranc, *ldom, intern, anc)) + { + L(F("found node %d, ancestor of right %s and dominating left %s\n") + % anc % right % left); + return true; + } + } +// dump_bitset_map("ancestors", ancestors); +// dump_bitset_map("dominators", dominators); +// dump_bitset_map("parents", parents); + return false; +} + + +bool +find_least_common_ancestor(revision_id const & left, + revision_id const & right, + revision_id & anc, + app_state & app) +{ + interner intern; + std::map< ctx, shared_bitmap > + parents, ancestors; + + ctx ln = intern.intern(left.inner()()); + ctx rn = intern.intern(right.inner()()); + + shared_bitmap lanc = shared_bitmap(new bitmap()); + shared_bitmap ranc = shared_bitmap(new bitmap()); + + ancestors.insert(make_pair(ln, lanc)); + ancestors.insert(make_pair(rn, ranc)); + + L(F("searching for least common ancestor, left=%s right=%s\n") % left % right); + + while (expand_ancestors(parents, ancestors, intern, app)) + { + L(F("least common ancestor scan [par=%d,anc=%d]\n") % + parents.size() % ancestors.size()); + + if (find_intersecting_node(*lanc, *ranc, intern, anc)) + { + L(F("found node %d, ancestor of left %s and right %s\n") + % anc % left % right); + return true; + } + } +// dump_bitset_map("ancestors", ancestors); +// dump_bitset_map("parents", parents); + return false; +} + + +// FIXME: this algorithm is incredibly inefficient; it's O(n) where n is the +// size of the entire revision graph. + +static bool +is_ancestor(revision_id const & ancestor_id, + revision_id const & descendent_id, + std::multimap const & graph) +{ + + std::set visited; + std::queue queue; + + queue.push(ancestor_id); + + while (!queue.empty()) + { + revision_id current_id = queue.front(); + queue.pop(); + + if (current_id == descendent_id) + return true; + else + { + typedef std::multimap::const_iterator gi; + std::pair children = graph.equal_range(current_id); + for (gi i = children.first; i != children.second; ++i) + { + if (visited.find(i->second) == visited.end()) + { + queue.push(i->second); + visited.insert(i->second); + } + } + } + } + return false; +} + +bool +is_ancestor(revision_id const & ancestor_id, + revision_id const & descendent_id, + app_state & app) +{ + L(F("checking whether %s is an ancestor of %s\n") % ancestor_id % descendent_id); + + std::multimap graph; + app.db.get_revision_ancestry(graph); + return is_ancestor(ancestor_id, descendent_id, graph); +} + + +static void +add_bitset_to_union(shared_bitmap src, + shared_bitmap dst) +{ + if (dst->size() > src->size()) + src->resize(dst->size()); + if (src->size() > dst->size()) + dst->resize(src->size()); + *dst |= *src; +} + + +static void +calculate_ancestors_from_graph(interner & intern, + revision_id const & init, + std::set const & legal, + std::multimap const & graph, + std::map< ctx, shared_bitmap > & ancestors, + shared_bitmap & total_union) +{ + typedef std::multimap::const_iterator gi; + std::stack stk; + + stk.push(intern.intern(init.inner()())); + + while (! stk.empty()) + { + ctx us = stk.top(); + revision_id rev(hexenc(intern.lookup(us))); + + std::pair parents = graph.equal_range(rev); + bool pushed = false; + + // first make sure all parents are done + for (gi i = parents.first; i != parents.second; ++i) + { + ctx parent = intern.intern(i->second.inner()()); + if (ancestors.find(parent) == ancestors.end()) + { + stk.push(parent); + pushed = true; + break; + } + } + + // if we pushed anything we stop now. we'll come back later when all + // the parents are done. + if (pushed) + continue; + + shared_bitmap b = shared_bitmap(new bitmap()); + + for (gi i = parents.first; i != parents.second; ++i) + { + ctx parent = intern.intern(i->second.inner()()); + + // set any parent which is a member of the underlying legal set + if (legal.find(i->second) != legal.end()) + { + if (b->size() <= parent) + b->resize(parent + 1); + b->set(parent); + } + + // ensure all parents are loaded into the ancestor map + I(ancestors.find(parent) != ancestors.end()); + + // union them into our map + std::map< ctx, shared_bitmap >::const_iterator j = ancestors.find(parent); + I(j != ancestors.end()); + add_bitset_to_union(j->second, b); + } + + add_bitset_to_union(b, total_union); + ancestors.insert(std::make_pair(us, b)); + stk.pop(); + } +} + +// This function looks at a set of revisions, and for every pair A, B in that +// set such that A is an ancestor of B, it erases A. + +void +erase_ancestors(std::set & revisions, app_state & app) +{ + typedef std::multimap::const_iterator gi; + std::multimap graph; + std::multimap inverse_graph; + + app.db.get_revision_ancestry(graph); + for (gi i = graph.begin(); i != graph.end(); ++i) + inverse_graph.insert(std::make_pair(i->second, i->first)); + + interner intern; + std::map< ctx, shared_bitmap > ancestors; + + shared_bitmap u = shared_bitmap(new bitmap()); + + for (std::set::const_iterator i = revisions.begin(); + i != revisions.end(); ++i) + { + calculate_ancestors_from_graph(intern, *i, revisions, + inverse_graph, ancestors, u); + } + + std::set tmp; + for (std::set::const_iterator i = revisions.begin(); + i != revisions.end(); ++i) + { + ctx id = intern.intern(i->inner()()); + bool has_ancestor_in_set = id < u->size() && u->test(id); + if (!has_ancestor_in_set) + tmp.insert(*i); + } + + revisions = tmp; +} + +// +// The idea with this algorithm is to walk from child up to ancestor, +// recursively, accumulating all the change_sets associated with +// intermediate nodes into *one big change_set*. +// +// clever readers will realize this is an overlapping-subproblem type +// situation and thus needs to keep a dynamic programming map to keep +// itself in linear complexity. +// +// in fact, we keep two: one which maps to computed results (partial_csets) +// and one which just keeps a set of all nodes we traversed +// (visited_nodes). in theory it could be one map with an extra bool stuck +// on each entry, but I think that would make it even less readable. it's +// already quite ugly. +// + +static bool +calculate_change_sets_recursive(revision_id const & ancestor, + revision_id const & child, + app_state & app, + change_set & cumulative_cset, + std::map > & partial_csets, + std::set & visited_nodes, + std::set const & subgraph) +{ + + if (ancestor == child) + return true; + + if (subgraph.find(child) == subgraph.end()) + return false; + + visited_nodes.insert(child); + + bool relevant_child = false; + + revision_set rev; + app.db.get_revision(child, rev); + + L(F("exploring changesets from parents of %s, seeking towards %s\n") + % child % ancestor); + + for(edge_map::const_iterator i = rev.edges.begin(); i != rev.edges.end(); ++i) + { + bool relevant_parent = false; + revision_id curr_parent = edge_old_revision(i); + + if (curr_parent.inner()().empty()) + continue; + + change_set cset_to_curr_parent; + + L(F("considering parent %s of %s\n") % curr_parent % child); + + std::map >::const_iterator j = + partial_csets.find(curr_parent); + if (j != partial_csets.end()) + { + // a recursive call has traversed this parent before and built an + // existing cset. just reuse that rather than re-traversing + cset_to_curr_parent = *(j->second); + relevant_parent = true; + } + else if (visited_nodes.find(curr_parent) != visited_nodes.end()) + { + // a recursive call has traversed this parent, but there was no + // path from it to the root, so the parent is irrelevant. skip. + relevant_parent = false; + } + else + relevant_parent = calculate_change_sets_recursive(ancestor, curr_parent, app, + cset_to_curr_parent, + partial_csets, + visited_nodes, + subgraph); + + if (relevant_parent) + { + L(F("revision %s is relevant, composing with edge to %s\n") + % curr_parent % child); + concatenate_change_sets(cset_to_curr_parent, edge_changes(i), cumulative_cset); + relevant_child = true; + break; + } + else + L(F("parent %s of %s is not relevant\n") % curr_parent % child); + } + + // store the partial edge from ancestor -> child, so that if anyone + // re-traverses this edge they'll just fetch from the partial_edges + // cache. + if (relevant_child) + partial_csets.insert(std::make_pair(child, + boost::shared_ptr + (new change_set(cumulative_cset)))); + + return relevant_child; +} + +// this finds (by breadth-first search) the set of nodes you'll have to +// walk over in calculate_change_sets_recursive, to build the composite +// changeset. this is to prevent the recursive algorithm from going way +// back in history on an unlucky guess of parent. + +static void +find_subgraph_for_composite_search(revision_id const & ancestor, + revision_id const & child, + app_state & app, + std::set & subgraph) +{ + std::set frontier; + frontier.insert(child); + subgraph.insert(child); + while (!frontier.empty()) + { + std::set next_frontier; + for (std::set::const_iterator i = frontier.begin(); + i != frontier.end(); ++i) + { + revision_set rev; + app.db.get_revision(*i, rev); + L(F("adding parents of %s to subgraph\n") % *i); + + for(edge_map::const_iterator j = rev.edges.begin(); j != rev.edges.end(); ++j) + { + revision_id curr_parent = edge_old_revision(j); + if (null_id(curr_parent)) + continue; + subgraph.insert(curr_parent); + if (curr_parent == ancestor) + { + L(F("found parent %s of %s\n") % curr_parent % *i); + return; + } + else + L(F("adding parent %s to next frontier\n") % curr_parent); + next_frontier.insert(curr_parent); + } + } + frontier = next_frontier; + } +} + +void +calculate_composite_change_set(revision_id const & ancestor, + revision_id const & child, + app_state & app, + change_set & composed) +{ + L(F("calculating composite changeset between %s and %s\n") + % ancestor % child); + std::set visited; + std::set subgraph; + std::map > partial; + find_subgraph_for_composite_search(ancestor, child, app, subgraph); + calculate_change_sets_recursive(ancestor, child, app, composed, partial, + visited, subgraph); +} + + +// Stuff related to rebuilding the revision graph. Unfortunately this is a +// real enough error case that we need support code for it. + + +static void +analyze_manifest_changes(app_state & app, + manifest_id const & parent, + manifest_id const & child, + change_set & cs) +{ + manifest_map m_parent, m_child; + + if (!null_id(parent)) + app.db.get_manifest(parent, m_parent); + + I(!null_id(child)); + app.db.get_manifest(child, m_child); + + L(F("analyzing manifest changes from '%s' -> '%s'\n") % parent % child); + + for (manifest_map::const_iterator i = m_parent.begin(); + i != m_parent.end(); ++i) + { + manifest_map::const_iterator j = m_child.find(manifest_entry_path(i)); + if (j == m_child.end()) + cs.delete_file(manifest_entry_path(i)); + else if (! (manifest_entry_id(i) == manifest_entry_id(j))) + { + cs.apply_delta(manifest_entry_path(i), + manifest_entry_id(i), + manifest_entry_id(j)); + } + } + for (manifest_map::const_iterator i = m_child.begin(); + i != m_child.end(); ++i) + { + manifest_map::const_iterator j = m_parent.find(manifest_entry_path(i)); + if (j == m_parent.end()) + cs.add_file(manifest_entry_path(i), + manifest_entry_id(i)); + } +} + + +struct anc_graph +{ + anc_graph(bool existing, app_state & a) : + existing_graph(existing), + app(a), + max_node(0), + n_nodes("nodes", "n", 1), + n_certs_in("certs in", "c", 1), + n_revs_out("revs out", "r", 1), + n_certs_out("certs out", "C", 1) + {} + + bool existing_graph; + app_state & app; + u64 max_node; + + ticker n_nodes; + ticker n_certs_in; + ticker n_revs_out; + ticker n_certs_out; + + std::map node_to_old_man; + std::map old_man_to_node; + + std::map node_to_old_rev; + std::map old_rev_to_node; + + std::map node_to_new_rev; + std::multimap > certs; + std::multimap ancestry; + + void add_node_ancestry(u64 child, u64 parent); + void write_certs(); + void rebuild_ancestry(); + void get_node_manifest(u64 node, manifest_id & man); + u64 add_node_for_old_manifest(manifest_id const & man); + u64 add_node_for_old_revision(revision_id const & rev); + revision_id construct_revision_from_ancestry(u64 child); +}; + + +void anc_graph::add_node_ancestry(u64 child, u64 parent) +{ + L(F("noting ancestry from child %d -> parent %d\n") % child % parent); + ancestry.insert(std::make_pair(child, parent)); +} + +void anc_graph::get_node_manifest(u64 node, manifest_id & man) +{ + std::map::const_iterator i = node_to_old_man.find(node); + I(i != node_to_old_man.end()); + man = i->second; +} + +void anc_graph::write_certs() +{ + std::set cnames; + cnames.insert(cert_name(branch_cert_name)); + cnames.insert(cert_name(date_cert_name)); + cnames.insert(cert_name(author_cert_name)); + cnames.insert(cert_name(tag_cert_name)); + cnames.insert(cert_name(changelog_cert_name)); + cnames.insert(cert_name(comment_cert_name)); + cnames.insert(cert_name(testresult_cert_name)); + + typedef std::multimap >::const_iterator ci; + + for (std::map::const_iterator i = node_to_new_rev.begin(); + i != node_to_new_rev.end(); ++i) + { + revision_id rev(i->second); + + std::pair range = certs.equal_range(i->first); + + for (ci j = range.first; j != range.second; ++j) + { + cert_name name(j->second.first); + cert_value val(j->second.second); + + if (cnames.find(name) == cnames.end()) + continue; + + cert new_cert; + make_simple_cert(rev.inner(), name, val, app, new_cert); + revision rcert(new_cert); + if (! app.db.revision_cert_exists(rcert)) + { + ++n_certs_out; + app.db.put_revision_cert(rcert); + } + } + } +} + +void +anc_graph::rebuild_ancestry() +{ + P(F("rebuilding %d nodes\n") % max_node); + { + transaction_guard guard(app.db); + if (existing_graph) + app.db.delete_existing_revs_and_certs(); + + std::set parents, children, heads; + for (std::multimap::const_iterator i = ancestry.begin(); + i != ancestry.end(); ++i) + { + children.insert(i->first); + parents.insert(i->second); + } + set_difference(children.begin(), children.end(), + parents.begin(), parents.end(), + std::inserter(heads, heads.begin())); + + // FIXME: should do a depth-first traversal here, or something like, + // instead of being recursive. + for (std::set::const_iterator i = heads.begin(); + i != heads.end(); ++i) + { + construct_revision_from_ancestry(*i); + } + write_certs(); + guard.commit(); + } +} + +u64 anc_graph::add_node_for_old_manifest(manifest_id const & man) +{ + I(!existing_graph); + u64 node = 0; + if (old_man_to_node.find(man) == old_man_to_node.end()) + { + node = max_node++; + ++n_nodes; + L(F("node %d = manifest %s\n") % node % man); + old_man_to_node.insert(std::make_pair(man, node)); + node_to_old_man.insert(std::make_pair(node, man)); + + // load certs + std::vector< manifest > mcerts; + app.db.get_manifest_certs(man, mcerts); + erase_bogus_certs(mcerts, app); + for(std::vector< manifest >::const_iterator i = mcerts.begin(); + i != mcerts.end(); ++i) + { + L(F("loaded '%s' manifest cert for node %s\n") % i->inner().name % node); + cert_value tv; + decode_base64(i->inner().value, tv); + ++n_certs_in; + certs.insert(std::make_pair(node, + std::make_pair(i->inner().name, tv))); + } + } + else + { + node = old_man_to_node[man]; + } + return node; +} + +u64 anc_graph::add_node_for_old_revision(revision_id const & rev) +{ + I(existing_graph); + I(!null_id(rev)); + u64 node = 0; + if (old_rev_to_node.find(rev) == old_rev_to_node.end()) + { + node = max_node++; + ++n_nodes; + + manifest_id man; + app.db.get_revision_manifest(rev, man); + + L(F("node %d = revision %s = manifest %s\n") % node % rev % man); + old_rev_to_node.insert(std::make_pair(rev, node)); + node_to_old_rev.insert(std::make_pair(node, rev)); + node_to_old_man.insert(std::make_pair(node, man)); + + // load certs + std::vector< revision > rcerts; + app.db.get_revision_certs(rev, rcerts); + erase_bogus_certs(rcerts, app); + for(std::vector< revision >::const_iterator i = rcerts.begin(); + i != rcerts.end(); ++i) + { + L(F("loaded '%s' revision cert for node %s\n") % i->inner().name % node); + cert_value tv; + decode_base64(i->inner().value, tv); + ++n_certs_in; + certs.insert(std::make_pair(node, + std::make_pair(i->inner().name, tv))); + } + } + else + { + node = old_rev_to_node[rev]; + } + + return node; +} + +// FIXME: this is recursive -- stack depth grows as ancestry depth -- and will +// overflow the stack on large histories. +revision_id +anc_graph::construct_revision_from_ancestry(u64 child) +{ + L(F("processing node %d\n") % child); + + if (node_to_new_rev.find(child) != node_to_new_rev.end()) + { + L(F("node %d already processed, skipping\n") % child); + return node_to_new_rev.find(child)->second; + } + + manifest_id child_man; + get_node_manifest(child, child_man); + + revision_set rev; + rev.new_manifest = child_man; + + typedef std::multimap::const_iterator ci; + std::pair range = ancestry.equal_range(child); + if (range.first == range.second) + { + L(F("node %d is a root node\n") % child); + revision_id null_rid; + manifest_id null_mid; + boost::shared_ptr cs(new change_set()); + analyze_manifest_changes(app, null_mid, child_man, *cs); + rev.edges.insert(std::make_pair(null_rid, + std::make_pair(null_mid, cs))); + } + else + { + for (ci i = range.first; i != range.second; ++i) + { + I(child == i->first); + u64 parent(i->second); + L(F("processing edge from child %d -> parent %d\n") % child % parent); + + revision_id parent_rid; + std::map::const_iterator + j = node_to_new_rev.find(parent); + + if (j != node_to_new_rev.end()) + parent_rid = j->second; + else + { + parent_rid = construct_revision_from_ancestry(parent); + node_to_new_rev.insert(std::make_pair(parent, parent_rid)); + } + + L(F("parent node %d = revision %s\n") % parent % parent_rid); + manifest_id parent_man; + get_node_manifest(parent, parent_man); + boost::shared_ptr cs(new change_set()); + analyze_manifest_changes(app, parent_man, child_man, *cs); + rev.edges.insert(std::make_pair(parent_rid, + std::make_pair(parent_man, cs))); + } + } + + revision_id rid; + calculate_ident(rev, rid); + node_to_new_rev.insert(std::make_pair(child, rid)); + + if (!app.db.revision_exists (rid)) + { + L(F("mapped node %d to revision %s\n") % child % rid); + app.db.put_revision(rid, rev); + ++n_revs_out; + } + else + { + L(F("skipping already existing revision %s\n") % rid); + } + + return rid; +} + +void +build_changesets_from_existing_revs(app_state & app) +{ + global_sanity.set_relaxed(true); + anc_graph graph(true, app); + + P(F("rebuilding revision graph from existing graph\n")); + std::multimap existing_graph; + + app.db.get_revision_ancestry(existing_graph); + for (std::multimap::const_iterator i = existing_graph.begin(); + i != existing_graph.end(); ++i) + { + if (!null_id(i->first)) + { + u64 parent_node = graph.add_node_for_old_revision(i->first); + u64 child_node = graph.add_node_for_old_revision(i->second); + graph.add_node_ancestry(child_node, parent_node); + } + } + + global_sanity.set_relaxed(false); + graph.rebuild_ancestry(); +} + + +void +build_changesets_from_manifest_ancestry(app_state & app) +{ + anc_graph graph(false, app); + + P(F("rebuilding revision graph from manifest certs\n")); + std::vector< manifest > tmp; + app.db.get_manifest_certs(cert_name("ancestor"), tmp); + erase_bogus_certs(tmp, app); + + for (std::vector< manifest >::const_iterator i = tmp.begin(); + i != tmp.end(); ++i) + { + cert_value tv; + decode_base64(i->inner().value, tv); + manifest_id child, parent; + child = i->inner().ident; + parent = hexenc(tv()); + + u64 parent_node = graph.add_node_for_old_manifest(parent); + u64 child_node = graph.add_node_for_old_manifest(child); + graph.add_node_ancestry(child_node, parent_node); + } + + graph.rebuild_ancestry(); +} + + +// i/o stuff + +std::string revision_file_name("revision"); + +namespace +{ + namespace syms + { + std::string const old_revision("old_revision"); + std::string const new_manifest("new_manifest"); + std::string const old_manifest("old_manifest"); + } +} + + +void +print_edge(basic_io::printer & printer, + edge_entry const & e) +{ + basic_io::stanza st; + st.push_hex_pair(syms::old_revision, edge_old_revision(e).inner()()); + st.push_hex_pair(syms::old_manifest, edge_old_manifest(e).inner()()); + printer.print_stanza(st); + print_change_set(printer, edge_changes(e)); +} + + +void +print_revision(basic_io::printer & printer, + revision_set const & rev) +{ + rev.check_sane(); + basic_io::stanza st; + st.push_hex_pair(syms::new_manifest, rev.new_manifest.inner()()); + printer.print_stanza(st); + for (edge_map::const_iterator edge = rev.edges.begin(); + edge != rev.edges.end(); ++edge) + print_edge(printer, *edge); +} + + +void +parse_edge(basic_io::parser & parser, + edge_map & es) +{ + boost::shared_ptr cs(new change_set()); + manifest_id old_man; + revision_id old_rev; + std::string tmp; + + parser.esym(syms::old_revision); + parser.hex(tmp); + old_rev = revision_id(tmp); + + parser.esym(syms::old_manifest); + parser.hex(tmp); + old_man = manifest_id(tmp); + + parse_change_set(parser, *cs); + + es.insert(std::make_pair(old_rev, std::make_pair(old_man, cs))); +} + + +void +parse_revision(basic_io::parser & parser, + revision_set & rev) +{ + rev.edges.clear(); + std::string tmp; + parser.esym(syms::new_manifest); + parser.hex(tmp); + rev.new_manifest = manifest_id(tmp); + while (parser.symp(syms::old_revision)) + parse_edge(parser, rev.edges); + rev.check_sane(); +} + +void +read_revision_set(data const & dat, + revision_set & rev) +{ + std::istringstream iss(dat()); + basic_io::input_source src(iss, "revision"); + basic_io::tokenizer tok(src); + basic_io::parser pars(tok); + parse_revision(pars, rev); + I(src.lookahead == EOF); + rev.check_sane(); +} + +void +read_revision_set(revision_data const & dat, + revision_set & rev) +{ + data unpacked; + unpack(dat.inner(), unpacked); + read_revision_set(unpacked, rev); + rev.check_sane(); +} + +void +write_revision_set(revision_set const & rev, + data & dat) +{ + rev.check_sane(); + std::ostringstream oss; + basic_io::printer pr(oss); + print_revision(pr, rev); + dat = data(oss.str()); +} + +void +write_revision_set(revision_set const & rev, + revision_data & dat) +{ + rev.check_sane(); + data d; + write_revision_set(rev, d); + base64< gzip > packed; + pack(d, packed); + dat = revision_data(packed); +} + +#ifdef BUILD_UNIT_TESTS +#include "unit_tests.hh" +#include "sanity.hh" + +static void +revision_test() +{ +} + +void +add_revision_tests(test_suite * suite) +{ + I(suite); + suite->add(BOOST_TEST_CASE(&revision_test)); +} + + +#endif // BUILD_UNIT_TESTS ============================================================ --- tests/(minor)_test_a_merge_4/right 6745b398ffecec36bc4fc45598e678b3391d91b2 +++ tests/(minor)_test_a_merge_4/right 6745b398ffecec36bc4fc45598e678b3391d91b2 @@ -0,0 +1,1353 @@ +// copyright (C) 2004 graydon hoare +// all rights reserved. +// licensed to the public under the terms of the GNU GPL (>= 2) +// see the file COPYING for details + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "basic_io.hh" +#include "change_set.hh" +#include "constants.hh" +#include "numeric_vocab.hh" +#include "revision.hh" +#include "sanity.hh" +#include "transforms.hh" +#include "ui.hh" +#include "vocab.hh" + +void revision_set::check_sane() const +{ + I(!null_id(new_manifest)); + + manifest_map fragment; + for (edge_map::const_iterator i = edges.begin(); i != edges.end(); ++i) + { + change_set const & cs = edge_changes(i); + cs.check_sane(); + if (!global_sanity.relaxed) + { + // null old revisions come with null old manifests + I(!null_id(edge_old_revision(i)) || null_id(edge_old_manifest(i))); + } + for (change_set::delta_map::const_iterator j = cs.deltas.begin(); j != cs.deltas.end(); ++j) + { + manifest_map::const_iterator k = fragment.find(delta_entry_path(j)); + if (k == fragment.end()) + fragment.insert(std::make_pair(delta_entry_path(j), + delta_entry_dst(j))); + else + { + if (!global_sanity.relaxed) + { + I(delta_entry_dst(j) == manifest_entry_id(k)); + } + } + } + } +} + +revision_set::revision_set(revision_set const & other) +{ + other.check_sane(); + new_manifest = other.new_manifest; + edges = other.edges; +} + +revision_set const & +revision_set::operator=(revision_set const & other) +{ + other.check_sane(); + new_manifest = other.new_manifest; + edges = other.edges; + return *this; +} + + +// Traces history back 'depth' levels from 'child_id', ensuring that +// historical information is consistent within this subgraph. +// The child must be in the database already. +// +// "Consistent" currently means that we compose manifests along every path (of +// any length) that terminates at the child, and for each one check that paths +// that should be the same in fact are the same, and that the calculated +// change sets can be applied to the old manifests to create the new +// manifest. +// +// NB: While this function has some invariants in it itself, a lot of its +// purpose is just to exercise all the invariants inside change_set.cc. So +// don't remove those invariants. (As if you needed another reason...) +void +check_sane_history(revision_id const & child_id, + int depth, + database & db) +{ + L(F("Verifying revision %s has sane history (to depth %i)\n") + % child_id % depth); + + typedef boost::shared_ptr shared_cs; + // (ancestor, change_set from ancestor to child) + std::map changesets; + + manifest_id m_child_id; + db.get_revision_manifest(child_id, m_child_id); + manifest_map m_child; + db.get_manifest(m_child_id, m_child); + + std::set frontier; + frontier.insert(child_id); + + while (depth-- > 0) + { + std::set next_frontier; + + for (std::set::const_iterator i = frontier.begin(); + i != frontier.end(); + ++i) + { + revision_id current_id = *i; + revision_set current; + db.get_revision(current_id, current); + // and the parents's manifests to the manifests + // and the change_set's to the parents to the changesets + for (edge_map::const_iterator j = current.edges.begin(); + j != current.edges.end(); + ++j) + { + revision_id old_id = edge_old_revision(j); + manifest_id m_old_id = edge_old_manifest(j); + change_set old_to_current_changes = edge_changes(j); + if (!null_id(old_id)) + next_frontier.insert(old_id); + + L(F("Examining %s -> %s\n") % old_id % child_id); + + // build the change_set + // if + shared_cs old_to_child_changes_p = shared_cs(new change_set); + if (current_id == child_id) + *old_to_child_changes_p = old_to_current_changes; + else + { + shared_cs current_to_child_changes_p; + I(changesets.find(current_id) != changesets.end()); + current_to_child_changes_p = changesets.find(current_id)->second; + concatenate_change_sets(old_to_current_changes, + *current_to_child_changes_p, + *old_to_child_changes_p); + } + + // we have the change_set; now, is it one we've seen before? + if (changesets.find(old_id) != changesets.end()) + { + // If it is, then make sure the paths agree on the + // changeset. + I(*changesets.find(old_id)->second == *old_to_child_changes_p); + } + else + { + // If not, this is the first time we've seen this. + // So store it in the map for later reference: + changesets.insert(std::make_pair(old_id, old_to_child_changes_p)); + // and check that it works: + + manifest_map m_old; + if (!null_id(old_id)) + db.get_manifest(m_old_id, m_old); + // The null revision has empty manifest, which is the + // default. + manifest_map purported_m_child; + apply_change_set(m_old, *old_to_child_changes_p, + purported_m_child); + I(purported_m_child == m_child); + } + } + } + frontier = next_frontier; + } +} + + +// calculating least common ancestors is a delicate thing. +// +// it turns out that we cannot choose the simple "least common ancestor" +// for purposes of a merge, because it is possible that there are two +// equally reachable common ancestors, and this produces ambiguity in the +// merge. the result -- in a pathological case -- is silently accepting one +// set of edits while discarding another; not exactly what you want a +// version control tool to do. +// +// a conservative approximation is what we'll call a "subgraph recurring" +// LCA algorithm. this is somewhat like locating the least common dominator +// node, but not quite. it is actually just a vanilla LCA search, except +// that any time there's a fork (a historical merge looks like a fork from +// our perspective, working backwards from children to parents) it reduces +// the fork to a common parent via a sequence of pairwise recursive calls +// to itself before proceeding. this will always resolve to a common parent +// with no ambiguity, unless it falls off the root of the graph. +// +// unfortunately the subgraph recurring algorithm sometimes goes too far +// back in history -- for example if there is an unambiguous propagate from +// one branch to another, the entire subgraph preceeding the propagate on +// the recipient branch is elided, since it is a merge. +// +// our current hypothesis is that the *exact* condition we're looking for, +// when doing a merge, is the least node which dominates one side of the +// merge and is an ancestor of the other. + +typedef unsigned long ctx; +typedef boost::dynamic_bitset<> bitmap; +typedef boost::shared_ptr shared_bitmap; + +static void +ensure_parents_loaded(ctx child, + std::map & parents, + interner & intern, + app_state & app) +{ + if (parents.find(child) != parents.end()) + return; + + L(F("loading parents for node %d\n") % child); + + std::set imm_parents; + app.db.get_revision_parents(revision_id(intern.lookup(child)), imm_parents); + + // The null revision is not a parent for purposes of finding common + // ancestors. + for (std::set::iterator p = imm_parents.begin(); + p != imm_parents.end(); ++p) + { + if (null_id(*p)) + imm_parents.erase(p); + } + + shared_bitmap bits = shared_bitmap(new bitmap(parents.size())); + + for (std::set::const_iterator p = imm_parents.begin(); + p != imm_parents.end(); ++p) + { + ctx pn = intern.intern(p->inner()()); + L(F("parent %s -> node %d\n") % *p % pn); + if (pn >= bits->size()) + bits->resize(pn+1); + bits->set(pn); + } + + parents.insert(std::make_pair(child, bits)); +} + +static bool +expand_dominators(std::map & parents, + std::map & dominators, + interner & intern, + app_state & app) +{ + bool something_changed = false; + std::vector nodes; + + nodes.reserve(dominators.size()); + + // pass 1, pull out all the node numbers we're going to scan this time around + for (std::map::const_iterator e = dominators.begin(); + e != dominators.end(); ++e) + nodes.push_back(e->first); + + // pass 2, update any of the dominator entries we can + for (std::vector::const_iterator n = nodes.begin(); + n != nodes.end(); ++n) + { + shared_bitmap bits = dominators[*n]; + bitmap saved(*bits); + if (bits->size() <= *n) + bits->resize(*n + 1); + bits->set(*n); + + ensure_parents_loaded(*n, parents, intern, app); + shared_bitmap n_parents = parents[*n]; + + bitmap intersection(bits->size()); + + bool first = true; + for (unsigned long parent = 0; + parent != n_parents->size(); ++parent) + { + if (! n_parents->test(parent)) + continue; + + if (dominators.find(parent) == dominators.end()) + dominators.insert(std::make_pair(parent, + shared_bitmap(new bitmap()))); + shared_bitmap pbits = dominators[parent]; + + if (bits->size() > pbits->size()) + pbits->resize(bits->size()); + + if (pbits->size() > bits->size()) + bits->resize(pbits->size()); + + if (first) + { + intersection = (*pbits); + first = false; + } + else + intersection &= (*pbits); + } + + (*bits) |= intersection; + if (*bits != saved) + something_changed = true; + } + return something_changed; +} + + +static bool +expand_ancestors(std::map & parents, + std::map & ancestors, + interner & intern, + app_state & app) +{ + bool something_changed = false; + std::vector nodes; + + nodes.reserve(ancestors.size()); + + // pass 1, pull out all the node numbers we're going to scan this time around + for (std::map::const_iterator e = ancestors.begin(); + e != ancestors.end(); ++e) + nodes.push_back(e->first); + + // pass 2, update any of the ancestor entries we can + for (std::vector::const_iterator n = nodes.begin(); n != nodes.end(); ++n) + { + shared_bitmap bits = ancestors[*n]; + bitmap saved(*bits); + if (bits->size() <= *n) + bits->resize(*n + 1); + bits->set(*n); + + ensure_parents_loaded(*n, parents, intern, app); + shared_bitmap n_parents = parents[*n]; + for (ctx parent = 0; parent != n_parents->size(); ++parent) + { + if (! n_parents->test(parent)) + continue; + + if (bits->size() <= parent) + bits->resize(parent + 1); + bits->set(parent); + + if (ancestors.find(parent) == ancestors.end()) + ancestors.insert(make_pair(parent, + shared_bitmap(new bitmap()))); + shared_bitmap pbits = ancestors[parent]; + + if (bits->size() > pbits->size()) + pbits->resize(bits->size()); + + if (pbits->size() > bits->size()) + bits->resize(pbits->size()); + + (*bits) |= (*pbits); + } + if (*bits != saved) + something_changed = true; + } + return something_changed; +} + +static bool +find_intersecting_node(bitmap & fst, + bitmap & snd, + interner const & intern, + revision_id & anc) +{ + + if (fst.size() > snd.size()) + snd.resize(fst.size()); + else if (snd.size() > fst.size()) + fst.resize(snd.size()); + + bitmap intersection = fst & snd; + if (intersection.any()) + { + L(F("found %d intersecting nodes\n") % intersection.count()); + for (ctx i = 0; i < intersection.size(); ++i) + { + if (intersection.test(i)) + { + anc = revision_id(intern.lookup(i)); + return true; + } + } + } + return false; +} + +// static void +// dump_bitset_map(std::string const & hdr, +// std::map< ctx, shared_bitmap > const & mm) +// { +// L(F("dumping [%s] (%d entries)\n") % hdr % mm.size()); +// for (std::map< ctx, shared_bitmap >::const_iterator i = mm.begin(); +// i != mm.end(); ++i) +// { +// L(F("dump [%s]: %d -> %s\n") % hdr % i->first % (*(i->second))); +// } +// } + +bool +find_common_ancestor_for_merge(revision_id const & left, + revision_id const & right, + revision_id & anc, + app_state & app) +{ + interner intern; + std::map< ctx, shared_bitmap > + parents, ancestors, dominators; + + ctx ln = intern.intern(left.inner()()); + ctx rn = intern.intern(right.inner()()); + + shared_bitmap lanc = shared_bitmap(new bitmap()); + shared_bitmap ranc = shared_bitmap(new bitmap()); + shared_bitmap ldom = shared_bitmap(new bitmap()); + shared_bitmap rdom = shared_bitmap(new bitmap()); + + ancestors.insert(make_pair(ln, lanc)); + ancestors.insert(make_pair(rn, ranc)); + dominators.insert(make_pair(ln, ldom)); + dominators.insert(make_pair(rn, rdom)); + + L(F("searching for common ancestor, left=%s right=%s\n") % left % right); + + while (expand_ancestors(parents, ancestors, intern, app) || + expand_dominators(parents, dominators, intern, app)) + { + L(F("common ancestor scan [par=%d,anc=%d,dom=%d]\n") % + parents.size() % ancestors.size() % dominators.size()); + + if (find_intersecting_node(*lanc, *rdom, intern, anc)) + { + L(F("found node %d, ancestor of left %s and dominating right %s\n") + % anc % left % right); + return true; + } + + else if (find_intersecting_node(*ranc, *ldom, intern, anc)) + { + L(F("found node %d, ancestor of right %s and dominating left %s\n") + % anc % right % left); + return true; + } + } +// dump_bitset_map("ancestors", ancestors); +// dump_bitset_map("dominators", dominators); +// dump_bitset_map("parents", parents); + return false; +} + + +bool +find_least_common_ancestor(revision_id const & left, + revision_id const & right, + revision_id & anc, + app_state & app) +{ + interner intern; + std::map< ctx, shared_bitmap > + parents, ancestors; + + ctx ln = intern.intern(left.inner()()); + ctx rn = intern.intern(right.inner()()); + + shared_bitmap lanc = shared_bitmap(new bitmap()); + shared_bitmap ranc = shared_bitmap(new bitmap()); + + ancestors.insert(make_pair(ln, lanc)); + ancestors.insert(make_pair(rn, ranc)); + + L(F("searching for least common ancestor, left=%s right=%s\n") % left % right); + + while (expand_ancestors(parents, ancestors, intern, app)) + { + L(F("least common ancestor scan [par=%d,anc=%d]\n") % + parents.size() % ancestors.size()); + + if (find_intersecting_node(*lanc, *ranc, intern, anc)) + { + L(F("found node %d, ancestor of left %s and right %s\n") + % anc % left % right); + return true; + } + } +// dump_bitset_map("ancestors", ancestors); +// dump_bitset_map("parents", parents); + return false; +} + + +// FIXME: this algorithm is incredibly inefficient; it's O(n) where n is the +// size of the entire revision graph. + +static bool +is_ancestor(revision_id const & ancestor_id, + revision_id const & descendent_id, + std::multimap const & graph) +{ + + std::set visited; + std::queue queue; + + queue.push(ancestor_id); + + while (!queue.empty()) + { + revision_id current_id = queue.front(); + queue.pop(); + + if (current_id == descendent_id) + return true; + else + { + typedef std::multimap::const_iterator gi; + std::pair children = graph.equal_range(current_id); + for (gi i = children.first; i != children.second; ++i) + { + if (visited.find(i->second) == visited.end()) + { + queue.push(i->second); + visited.insert(i->second); + } + } + } + } + return false; +} + +bool +is_ancestor(revision_id const & ancestor_id, + revision_id const & descendent_id, + app_state & app) +{ + L(F("checking whether %s is an ancestor of %s\n") % ancestor_id % descendent_id); + + std::multimap graph; + app.db.get_revision_ancestry(graph); + return is_ancestor(ancestor_id, descendent_id, graph); +} + + +static void +add_bitset_to_union(shared_bitmap src, + shared_bitmap dst) +{ + if (dst->size() > src->size()) + src->resize(dst->size()); + if (src->size() > dst->size()) + dst->resize(src->size()); + *dst |= *src; +} + + +static void +calculate_ancestors_from_graph(interner & intern, + revision_id const & init, + std::set const & legal, + std::multimap const & graph, + std::map< ctx, shared_bitmap > & ancestors, + shared_bitmap & total_union) +{ + typedef std::multimap::const_iterator gi; + std::stack stk; + + stk.push(intern.intern(init.inner()())); + + while (! stk.empty()) + { + ctx us = stk.top(); + revision_id rev(hexenc(intern.lookup(us))); + + std::pair parents = graph.equal_range(rev); + bool pushed = false; + + // first make sure all parents are done + for (gi i = parents.first; i != parents.second; ++i) + { + ctx parent = intern.intern(i->second.inner()()); + if (ancestors.find(parent) == ancestors.end()) + { + stk.push(parent); + pushed = true; + break; + } + } + + // if we pushed anything we stop now. we'll come back later when all + // the parents are done. + if (pushed) + continue; + + shared_bitmap b = shared_bitmap(new bitmap()); + + for (gi i = parents.first; i != parents.second; ++i) + { + ctx parent = intern.intern(i->second.inner()()); + + // set any parent which is a member of the underlying legal set + if (legal.find(i->second) != legal.end()) + { + if (b->size() <= parent) + b->resize(parent + 1); + b->set(parent); + } + + // ensure all parents are loaded into the ancestor map + I(ancestors.find(parent) != ancestors.end()); + + // union them into our map + std::map< ctx, shared_bitmap >::const_iterator j = ancestors.find(parent); + I(j != ancestors.end()); + add_bitset_to_union(j->second, b); + } + + add_bitset_to_union(b, total_union); + ancestors.insert(std::make_pair(us, b)); + stk.pop(); + } +} + +// This function looks at a set of revisions, and for every pair A, B in that +// set such that A is an ancestor of B, it erases A. + +void +erase_ancestors(std::set & revisions, app_state & app) +{ + typedef std::multimap::const_iterator gi; + std::multimap graph; + std::multimap inverse_graph; + + app.db.get_revision_ancestry(graph); + for (gi i = graph.begin(); i != graph.end(); ++i) + inverse_graph.insert(std::make_pair(i->second, i->first)); + + interner intern; + std::map< ctx, shared_bitmap > ancestors; + + shared_bitmap u = shared_bitmap(new bitmap()); + + for (std::set::const_iterator i = revisions.begin(); + i != revisions.end(); ++i) + { + calculate_ancestors_from_graph(intern, *i, revisions, + inverse_graph, ancestors, u); + } + + std::set tmp; + for (std::set::const_iterator i = revisions.begin(); + i != revisions.end(); ++i) + { + ctx id = intern.intern(i->inner()()); + bool has_ancestor_in_set = id < u->size() && u->test(id); + if (!has_ancestor_in_set) + tmp.insert(*i); + } + + revisions = tmp; +} + +// +// The idea with this algorithm is to walk from child up to ancestor, +// recursively, accumulating all the change_sets associated with +// intermediate nodes into *one big change_set*. +// +// clever readers will realize this is an overlapping-subproblem type +// situation and thus needs to keep a dynamic programming map to keep +// itself in linear complexity. +// +// in fact, we keep two: one which maps to computed results (partial_csets) +// and one which just keeps a set of all nodes we traversed +// (visited_nodes). in theory it could be one map with an extra bool stuck +// on each entry, but I think that would make it even less readable. it's +// already quite ugly. +// + +static bool +calculate_change_sets_recursive(revision_id const & ancestor, + revision_id const & child, + app_state & app, + change_set & cumulative_cset, + std::map > & partial_csets, + std::set & visited_nodes, + std::set const & subgraph) +{ + + if (ancestor == child) + return true; + + if (subgraph.find(child) == subgraph.end()) + return false; + + visited_nodes.insert(child); + + bool relevant_child = false; + + revision_set rev; + app.db.get_revision(child, rev); + + L(F("exploring changesets from parents of %s, seeking towards %s\n") + % child % ancestor); + + for(edge_map::const_iterator i = rev.edges.begin(); i != rev.edges.end(); ++i) + { + bool relevant_parent = false; + revision_id curr_parent = edge_old_revision(i); + + if (curr_parent.inner()().empty()) + continue; + + change_set cset_to_curr_parent; + + L(F("considering parent %s of %s\n") % curr_parent % child); + + std::map >::const_iterator j = + partial_csets.find(curr_parent); + if (j != partial_csets.end()) + { + // a recursive call has traversed this parent before and built an + // existing cset. just reuse that rather than re-traversing + cset_to_curr_parent = *(j->second); + relevant_parent = true; + } + else if (visited_nodes.find(curr_parent) != visited_nodes.end()) + { + // a recursive call has traversed this parent, but there was no + // path from it to the root, so the parent is irrelevant. skip. + relevant_parent = false; + } + else + relevant_parent = calculate_change_sets_recursive(ancestor, curr_parent, app, + cset_to_curr_parent, + partial_csets, + visited_nodes, + subgraph); + + if (relevant_parent) + { + L(F("revision %s is relevant, composing with edge to %s\n") + % curr_parent % child); + concatenate_change_sets(cset_to_curr_parent, edge_changes(i), cumulative_cset); + relevant_child = true; + break; + } + else + L(F("parent %s of %s is not relevant\n") % curr_parent % child); + } + + // store the partial edge from ancestor -> child, so that if anyone + // re-traverses this edge they'll just fetch from the partial_edges + // cache. + if (relevant_child) + partial_csets.insert(std::make_pair(child, + boost::shared_ptr + (new change_set(cumulative_cset)))); + + return relevant_child; +} + +// this finds (by breadth-first search) the set of nodes you'll have to +// walk over in calculate_change_sets_recursive, to build the composite +// changeset. this is to prevent the recursive algorithm from going way +// back in history on an unlucky guess of parent. + +static void +find_subgraph_for_composite_search(revision_id const & ancestor, + revision_id const & child, + app_state & app, + std::set & subgraph) +{ + std::set frontier; + frontier.insert(child); + subgraph.insert(child); + while (!frontier.empty()) + { + std::set next_frontier; + for (std::set::const_iterator i = frontier.begin(); + i != frontier.end(); ++i) + { + revision_set rev; + app.db.get_revision(*i, rev); + L(F("adding parents of %s to subgraph\n") % *i); + + for(edge_map::const_iterator j = rev.edges.begin(); j != rev.edges.end(); ++j) + { + revision_id curr_parent = edge_old_revision(j); + if (null_id(curr_parent)) + continue; + subgraph.insert(curr_parent); + if (curr_parent == ancestor) + { + L(F("found parent %s of %s\n") % curr_parent % *i); + return; + } + else + L(F("adding parent %s to next frontier\n") % curr_parent); + next_frontier.insert(curr_parent); + } + } + frontier = next_frontier; + } +} + +void +calculate_composite_change_set(revision_id const & ancestor, + revision_id const & child, + app_state & app, + change_set & composed) +{ + L(F("calculating composite changeset between %s and %s\n") + % ancestor % child); + std::set visited; + std::set subgraph; + std::map > partial; + find_subgraph_for_composite_search(ancestor, child, app, subgraph); + calculate_change_sets_recursive(ancestor, child, app, composed, partial, + visited, subgraph); +} + + +// Stuff related to rebuilding the revision graph. Unfortunately this is a +// real enough error case that we need support code for it. + + +static void +analyze_manifest_changes(app_state & app, + manifest_id const & parent, + manifest_id const & child, + change_set & cs) +{ + manifest_map m_parent, m_child; + + if (!null_id(parent)) + app.db.get_manifest(parent, m_parent); + + I(!null_id(child)); + app.db.get_manifest(child, m_child); + + L(F("analyzing manifest changes from '%s' -> '%s'\n") % parent % child); + + for (manifest_map::const_iterator i = m_parent.begin(); + i != m_parent.end(); ++i) + { + manifest_map::const_iterator j = m_child.find(manifest_entry_path(i)); + if (j == m_child.end()) + cs.delete_file(manifest_entry_path(i)); + else if (! (manifest_entry_id(i) == manifest_entry_id(j))) + { + cs.apply_delta(manifest_entry_path(i), + manifest_entry_id(i), + manifest_entry_id(j)); + } + } + for (manifest_map::const_iterator i = m_child.begin(); + i != m_child.end(); ++i) + { + manifest_map::const_iterator j = m_parent.find(manifest_entry_path(i)); + if (j == m_parent.end()) + cs.add_file(manifest_entry_path(i), + manifest_entry_id(i)); + } +} + + +struct anc_graph +{ + anc_graph(bool existing, app_state & a) : + existing_graph(existing), + app(a), + max_node(0), + n_nodes("nodes", "n", 1), + n_certs_in("certs in", "c", 1), + n_revs_out("revs out", "r", 1), + n_certs_out("certs out", "C", 1) + {} + + bool existing_graph; + app_state & app; + u64 max_node; + + ticker n_nodes; + ticker n_certs_in; + ticker n_revs_out; + ticker n_certs_out; + + std::map node_to_old_man; + std::map old_man_to_node; + + std::map node_to_old_rev; + std::map old_rev_to_node; + + std::map node_to_new_rev; + std::multimap > certs; + std::multimap ancestry; + + void add_node_ancestry(u64 child, u64 parent); + void write_certs(); + void rebuild_ancestry(); + void get_node_manifest(u64 node, manifest_id & man); + u64 add_node_for_old_manifest(manifest_id const & man); + u64 add_node_for_old_revision(revision_id const & rev); + revision_id construct_revision_from_ancestry(u64 child); +}; + + +void anc_graph::add_node_ancestry(u64 child, u64 parent) +{ + L(F("noting ancestry from child %d -> parent %d\n") % child % parent); + ancestry.insert(std::make_pair(child, parent)); +} + +void anc_graph::get_node_manifest(u64 node, manifest_id & man) +{ + std::map::const_iterator i = node_to_old_man.find(node); + I(i != node_to_old_man.end()); + man = i->second; +} + +void anc_graph::write_certs() +{ + std::set cnames; + cnames.insert(cert_name(branch_cert_name)); + cnames.insert(cert_name(date_cert_name)); + cnames.insert(cert_name(author_cert_name)); + cnames.insert(cert_name(tag_cert_name)); + cnames.insert(cert_name(changelog_cert_name)); + cnames.insert(cert_name(comment_cert_name)); + cnames.insert(cert_name(testresult_cert_name)); + + typedef std::multimap >::const_iterator ci; + + for (std::map::const_iterator i = node_to_new_rev.begin(); + i != node_to_new_rev.end(); ++i) + { + revision_id rev(i->second); + + std::pair range = certs.equal_range(i->first); + + for (ci j = range.first; j != range.second; ++j) + { + cert_name name(j->second.first); + cert_value val(j->second.second); + + if (cnames.find(name) == cnames.end()) + continue; + + cert new_cert; + make_simple_cert(rev.inner(), name, val, app, new_cert); + revision rcert(new_cert); + if (! app.db.revision_cert_exists(rcert)) + { + ++n_certs_out; + app.db.put_revision_cert(rcert); + } + } + } +} + +void +anc_graph::rebuild_ancestry() +{ + P(F("rebuilding %d nodes\n") % max_node); + { + transaction_guard guard(app.db); + if (existing_graph) + app.db.delete_existing_revs_and_certs(); + + std::set parents, children, heads; + for (std::multimap::const_iterator i = ancestry.begin(); + i != ancestry.end(); ++i) + { + children.insert(i->first); + parents.insert(i->second); + } + set_difference(children.begin(), children.end(), + parents.begin(), parents.end(), + std::inserter(heads, heads.begin())); + + // FIXME: should do a depth-first traversal here, or something like, + // instead of being recursive. + for (std::set::const_iterator i = heads.begin(); + i != heads.end(); ++i) + { + construct_revision_from_ancestry(*i); + } + write_certs(); + guard.commit(); + } +} + +u64 anc_graph::add_node_for_old_manifest(manifest_id const & man) +{ + I(!existing_graph); + u64 node = 0; + if (old_man_to_node.find(man) == old_man_to_node.end()) + { + node = max_node++; + ++n_nodes; + L(F("node %d = manifest %s\n") % node % man); + old_man_to_node.insert(std::make_pair(man, node)); + node_to_old_man.insert(std::make_pair(node, man)); + + // load certs + std::vector< manifest > mcerts; + app.db.get_manifest_certs(man, mcerts); + erase_bogus_certs(mcerts, app); + for(std::vector< manifest >::const_iterator i = mcerts.begin(); + i != mcerts.end(); ++i) + { + L(F("loaded '%s' manifest cert for node %s\n") % i->inner().name % node); + cert_value tv; + decode_base64(i->inner().value, tv); + ++n_certs_in; + certs.insert(std::make_pair(node, + std::make_pair(i->inner().name, tv))); + } + } + else + { + node = old_man_to_node[man]; + } + return node; +} + +u64 anc_graph::add_node_for_old_revision(revision_id const & rev) +{ + I(existing_graph); + I(!null_id(rev)); + u64 node = 0; + if (old_rev_to_node.find(rev) == old_rev_to_node.end()) + { + node = max_node++; + ++n_nodes; + + manifest_id man; + app.db.get_revision_manifest(rev, man); + + L(F("node %d = revision %s = manifest %s\n") % node % rev % man); + old_rev_to_node.insert(std::make_pair(rev, node)); + node_to_old_rev.insert(std::make_pair(node, rev)); + node_to_old_man.insert(std::make_pair(node, man)); + + // load certs + std::vector< revision > rcerts; + app.db.get_revision_certs(rev, rcerts); + erase_bogus_certs(rcerts, app); + for(std::vector< revision >::const_iterator i = rcerts.begin(); + i != rcerts.end(); ++i) + { + L(F("loaded '%s' revision cert for node %s\n") % i->inner().name % node); + cert_value tv; + decode_base64(i->inner().value, tv); + ++n_certs_in; + certs.insert(std::make_pair(node, + std::make_pair(i->inner().name, tv))); + } + } + else + { + node = old_rev_to_node[rev]; + } + + return node; +} + +// FIXME: this is recursive -- stack depth grows as ancestry depth -- and will +// overflow the stack on large histories. +revision_id +anc_graph::construct_revision_from_ancestry(u64 child) +{ + L(F("processing node %d\n") % child); + + if (node_to_new_rev.find(child) != node_to_new_rev.end()) + { + L(F("node %d already processed, skipping\n") % child); + return node_to_new_rev.find(child)->second; + } + + manifest_id child_man; + get_node_manifest(child, child_man); + + revision_set rev; + rev.new_manifest = child_man; + + typedef std::multimap::const_iterator ci; + std::pair range = ancestry.equal_range(child); + if (range.first == range.second) + { + L(F("node %d is a root node\n") % child); + revision_id null_rid; + manifest_id null_mid; + boost::shared_ptr cs(new change_set()); + analyze_manifest_changes(app, null_mid, child_man, *cs); + rev.edges.insert(std::make_pair(null_rid, + std::make_pair(null_mid, cs))); + } + else + { + for (ci i = range.first; i != range.second; ++i) + { + I(child == i->first); + u64 parent(i->second); + L(F("processing edge from child %d -> parent %d\n") % child % parent); + + revision_id parent_rid; + std::map::const_iterator + j = node_to_new_rev.find(parent); + + if (j != node_to_new_rev.end()) + parent_rid = j->second; + else + { + parent_rid = construct_revision_from_ancestry(parent); + node_to_new_rev.insert(std::make_pair(parent, parent_rid)); + } + + L(F("parent node %d = revision %s\n") % parent % parent_rid); + manifest_id parent_man; + get_node_manifest(parent, parent_man); + boost::shared_ptr cs(new change_set()); + analyze_manifest_changes(app, parent_man, child_man, *cs); + rev.edges.insert(std::make_pair(parent_rid, + std::make_pair(parent_man, cs))); + } + } + + revision_id rid; + calculate_ident(rev, rid); + node_to_new_rev.insert(std::make_pair(child, rid)); + + if (!app.db.revision_exists (rid)) + { + L(F("mapped node %d to revision %s\n") % child % rid); + app.db.put_revision(rid, rev); + ++n_revs_out; + } + else + { + L(F("skipping already existing revision %s\n") % rid); + } + + return rid; +} + +void +build_changesets_from_existing_revs(app_state & app) +{ + global_sanity.set_relaxed(true); + anc_graph graph(true, app); + + P(F("rebuilding revision graph from existing graph\n")); + std::multimap existing_graph; + + app.db.get_revision_ancestry(existing_graph); + for (std::multimap::const_iterator i = existing_graph.begin(); + i != existing_graph.end(); ++i) + { + if (!null_id(i->first)) + { + u64 parent_node = graph.add_node_for_old_revision(i->first); + u64 child_node = graph.add_node_for_old_revision(i->second); + graph.add_node_ancestry(child_node, parent_node); + } + } + + global_sanity.set_relaxed(false); + graph.rebuild_ancestry(); +} + + +void +build_changesets_from_manifest_ancestry(app_state & app) +{ + anc_graph graph(false, app); + + P(F("rebuilding revision graph from manifest certs\n")); + std::vector< manifest > tmp; + app.db.get_manifest_certs(cert_name("ancestor"), tmp); + erase_bogus_certs(tmp, app); + + for (std::vector< manifest >::const_iterator i = tmp.begin(); + i != tmp.end(); ++i) + { + cert_value tv; + decode_base64(i->inner().value, tv); + manifest_id child, parent; + child = i->inner().ident; + parent = hexenc(tv()); + + u64 parent_node = graph.add_node_for_old_manifest(parent); + u64 child_node = graph.add_node_for_old_manifest(child); + graph.add_node_ancestry(child_node, parent_node); + } + + graph.rebuild_ancestry(); +} + + +// i/o stuff + +std::string revision_file_name("revision"); + +namespace +{ + namespace syms + { + std::string const old_revision("old_revision"); + std::string const new_manifest("new_manifest"); + std::string const old_manifest("old_manifest"); + } +} + + +void +print_edge(basic_io::printer & printer, + edge_entry const & e) +{ + basic_io::stanza st; + st.push_hex_pair(syms::old_revision, edge_old_revision(e).inner()()); + st.push_hex_pair(syms::old_manifest, edge_old_manifest(e).inner()()); + printer.print_stanza(st); + print_change_set(printer, edge_changes(e)); +} + + +void +print_revision(basic_io::printer & printer, + revision_set const & rev) +{ + rev.check_sane(); + basic_io::stanza st; + st.push_hex_pair(syms::new_manifest, rev.new_manifest.inner()()); + printer.print_stanza(st); + for (edge_map::const_iterator edge = rev.edges.begin(); + edge != rev.edges.end(); ++edge) + print_edge(printer, *edge); +} + + +void +parse_edge(basic_io::parser & parser, + edge_map & es) +{ + boost::shared_ptr cs(new change_set()); + manifest_id old_man; + revision_id old_rev; + std::string tmp; + + parser.esym(syms::old_revision); + parser.hex(tmp); + old_rev = revision_id(tmp); + + parser.esym(syms::old_manifest); + parser.hex(tmp); + old_man = manifest_id(tmp); + + parse_change_set(parser, *cs); + + es.insert(std::make_pair(old_rev, std::make_pair(old_man, cs))); +} + + +void +parse_revision(basic_io::parser & parser, + revision_set & rev) +{ + rev.edges.clear(); + std::string tmp; + parser.esym(syms::new_manifest); + parser.hex(tmp); + rev.new_manifest = manifest_id(tmp); + while (parser.symp(syms::old_revision)) + parse_edge(parser, rev.edges); + rev.check_sane(); +} + +void +read_revision_set(data const & dat, + revision_set & rev) +{ + std::istringstream iss(dat()); + basic_io::input_source src(iss, "revision"); + basic_io::tokenizer tok(src); + basic_io::parser pars(tok); + parse_revision(pars, rev); + I(src.lookahead == EOF); + rev.check_sane(); +} + +void +read_revision_set(revision_data const & dat, + revision_set & rev) +{ + data unpacked; + unpack(dat.inner(), unpacked); + read_revision_set(unpacked, rev); + rev.check_sane(); +} + +void +write_revision_set(revision_set const & rev, + data & dat) +{ + rev.check_sane(); + std::ostringstream oss; + basic_io::printer pr(oss); + print_revision(pr, rev); + dat = data(oss.str()); +} + +void +write_revision_set(revision_set const & rev, + revision_data & dat) +{ + rev.check_sane(); + data d; + write_revision_set(rev, d); + base64< gzip > packed; + pack(d, packed); + dat = revision_data(packed); +} + +#ifdef BUILD_UNIT_TESTS +#include "unit_tests.hh" +#include "sanity.hh" + +static void +revision_test() +{ +} + +void +add_revision_tests(test_suite * suite) +{ + I(suite); + suite->add(BOOST_TEST_CASE(&revision_test)); +} + + +#endif // BUILD_UNIT_TESTS ============================================================ --- tests/(todo)_design_approval_semantics/__driver__.lua 753895fefbba1ab18378b41847714c209454a7bc +++ tests/(todo)_design_approval_semantics/__driver__.lua 753895fefbba1ab18378b41847714c209454a7bc @@ -0,0 +1,47 @@ + +-- This test is a bug report. +xfail_if(true, false) + +-- There's a somewhat subtle issue about approval, branch membership, +-- etc. The way I (njs) had been thinking about things originally, a +-- revision R is in a branch B iff there's a valid trusted cert cert(R, +-- "branch", B). So, currently e.g. 'propagate' will skip performing a +-- merge in some cases and instead just stick a new branch cert on the +-- head that's being propagated, and 'update' will skip past non-branch +-- nodes to reach branch nodes. +-- +-- graydon points out, though, that 'update's original semantics, of +-- _not_ skipping past non-branch revisions, was intentional. because +-- branch certs show approval, and in real life people always look at +-- and approve diffs, not tree-states. so update should only follow +-- continuous paths of approval. +-- +-- currently, 'update' still will skip past non-branch revisions, since +-- other parts of the code assume that this is how branches work, and I +-- (njs again) figured it would be better to let things be consistent +-- while we figure out what should really happen. but, that approval +-- inherently happens on edges is a critical point, and we should +-- address it somehow. +-- +-- there are some subtleties to it, though. some things came up on IRC: + +-- --monotone, Jan 23: +-- graydon: here's a question about branch-as-approval +-- graydon: should 'heads' be changed to be 'greatest descendent of a privileged root'? +-- (s/descendent/descendent with a continuous chain of branch certs from that root/) +-- possibly. +-- possibly once we know what "priviledged" means :) +-- or privileged, depending on whether I learn to spell +-- one could do it with the lookaside trust branch model, have a table for each branch specifying which revision is considered the root + +-- if we have A -> B -> C, B -> D -> E, where everything except D has appropriate approval, should "monotone merge" cherrypick D -> E onto C? :-) +-- (on the theory that merge is supposed to gather up all approved revisions into one head) +-- hmm, and if someone does say "disapprove D", they have to also say "approve D" for it to work! +-- heh +-- both interesting issues +-- don't know. I'm not sure about either. + + +-- TODO: figure out how all this should work. +-- solution should support projects with different sorts of +-- workflows/approval requirements... ============================================================ --- tests/(todo)_undo_update_command/__driver__.lua 045bc521e2380813d982f8efdc5476221f266088 +++ tests/(todo)_undo_update_command/__driver__.lua 045bc521e2380813d982f8efdc5476221f266088 @@ -0,0 +1,32 @@ + +-- This test is a bug report. +xfail_if(true, false) + +-- "update" is the only command that modifies the workspace, i.e., +-- it is the only command that may destroy data that cannot be easily +-- recovered from the database. So it should be undo-able. +-- +-- This wouldn't be that hard to do -- before starting an update, make +-- a note of all file modifications being applied, and save copies of +-- them somewhere under _MTN/. The only tricky part is making sure we +-- can undo tree rearrangements. +-- +-- For bonus points, use this to implement "workspace rollback" -- +-- right now, we can't modify the workspace atomically. But if we +-- always saved this information before touching any files, then we +-- could also save a marker file when we start munging the filesystem, +-- that we delete when finished. When monotone starts up, it can check +-- for this marker file, and either rollback automatically or demand +-- the user do so or whatever. +-- +-- Making this work requires some careful thought, of course -- one has +-- to make sure that rollback is idempotent, it'd be nice if rollback +-- information didn't immediately overwrite undo information (so an +-- interrupted update didn't kill undo information after a rollback), +-- etc. +-- +-- It'd also be nice if there was a "redo_update" to un-undo an update, +-- I suppose... + +-- Are there any other operations that mutate the workspace? They +-- should all be reversible somehow... ============================================================ --- tests/(todo)_write_monotone-agent/__driver__.lua 4a0d2d148acb0b7eac94291927ccbbfc07f0b74e +++ tests/(todo)_write_monotone-agent/__driver__.lua 4a0d2d148acb0b7eac94291927ccbbfc07f0b74e @@ -0,0 +1,73 @@ + +mtn_setup() + +-- This test is a bug report. +xfail_if(true, false) + +-- Todo: +-- 1) Write monotone-agent +-- 2) Write tests for it here +-- +-- Desired user experience: +-- - start the agent at login time +-- (it would be nice if we could spawn it automatically the first +-- time monotone is run, but then how would we communicate the +-- agent's address to independent processes? and cause it to exit +-- when the session ends?) +-- - first time monotone needs to sign something, it prompts for a +-- passphrase +-- - second and later times monotone needs to sign something, no +-- passphrase is necessary (even if the second time is in a +-- different monotone process entirely) +-- - probably have some command that does nothing except prompt for a +-- passphrase, so people who want to type it once at the beginning +-- of a session can do that. +-- +-- Internal design: +-- - agent opens an access controlled unix-domain socket (what's the +-- windows equivalent? cf. pageant below) +-- - an environment variable saves the socket (and pid, I suppose, +-- for cleanup), like ssh-agent (again, what's the windows +-- equivalent?) +-- - when monotone wants to sign something, it checks to see if the +-- agent has the private key. +-- - if it does, then monotone sends the agent the data to be +-- signed, and waits for the response +-- - if it doesn't, then monotone requests the passphrase, decrypts +-- the key, sends the key to the agent, and then either goes to +-- the above step (maybe simpler) or just signs the data itself +-- (maybe faster) +-- - so the agent needs protocol packets: +-- - do you have key ? +-- - response +-- - sign data (or just hash of data ) with key +-- - response +-- - add key +-- - response +-- - for debugging, might want 'list keys' and 'remove key'; 'list +-- keys' probably means agent should store key ids too? depends +-- on how debuggish it is, I guess. +-- - note that this design assumes that monotone will have +-- independent access to the list of keys, and so doesn't need to +-- get the ids and hashes from the agent. this seems reasonable, +-- at least for a first pass; it should be simpler to code, and the +-- cases where it fails (e.g., agent forwarding) don't seem very +-- compelling to me. +-- +-- Other similar projects to steal ideas from: +-- - ssh-agent in openssh +-- their protocol is documented in +-- draft-ylonen-ssh-protocol-00.txt. It's simple enough that this +-- isn't that useful, though. (Unfortunately, ssh-agent provides +-- challenge-responses, not signatures, so we can't use it +-- directly. Using it directly would have been much nicer. I +-- suppose in the long run we could try and convince the ssh-agent +-- people that providing signing services for some keys might be +-- generally useful...) +-- It's not clear that signing services are something you +-- want to forward to other machines, though, even if +-- challenge-response services are... +-- - "pageant" in putty is a ssh-agent that works on Windows +-- - how does this work in windows, with no SSH_AUTH_SOCK and no +-- unix-domain sockets? +-- - codeville has an agent implementation too ============================================================ --- tests/_MTN_revision_is_required/__driver__.lua 69297e83d80bddb26d885cac8d9171f1079df2ef +++ tests/_MTN_revision_is_required/__driver__.lua 69297e83d80bddb26d885cac8d9171f1079df2ef @@ -0,0 +1,16 @@ + +mtn_setup() + +addfile("foo", "blah blah") +addfile("bar", "blah blah") +remove("foo") +check(mtn("ls", "missing"), 0, false, false) +check(mtn("drop", "foo"), 0, false, false) +check(mtn("ls", "missing"), 0, false, false) +commit() + +remove("bar") +check(mtn("ls", "missing"), 0, false, false) + +remove("_MTN/revision") +check(mtn("ls", "missing"), 1, false, false) ============================================================ --- tests/automate_automate_version/__driver__.lua 6fdc200cf0f8c01e6174ebb04f521b0e645198ec +++ tests/automate_automate_version/__driver__.lua 6fdc200cf0f8c01e6174ebb04f521b0e645198ec @@ -0,0 +1,10 @@ + +mtn_setup() + +check(mtn("automate", "interface_version"), 0, true, false) +rename("stdout", "a_v") + +-- MinGW's wc produces " 1" as output. Arithmetic comparison works, string comparison doesn't +check(numlines("a_v") == 1) +-- This is really ^[0-9]+\.[0-9]+$, but m4 is obfuscatory. +check(qgrep("^[0-9]+\.[0-9]+$", "a_v")) ============================================================ --- tests/automate_heads/__driver__.lua 388eb896ee29c7d14a5294ebafbc60b43faa1bef +++ tests/automate_heads/__driver__.lua 388eb896ee29c7d14a5294ebafbc60b43faa1bef @@ -0,0 +1,26 @@ + +mtn_setup() +revs = {} + +addfile("foo", "blah") +commit() +revs.base = base_revision() + +for i = 1,4 do + revert_to(revs.base) + addfile(tostring(i), tostring(i)) + commit() + revs[i] = base_revision() +end +table.sort(revs) +for _,x in ipairs(revs) do append("wanted_heads", x.."\n") end + + +check(mtn("automate", "heads", "testbranch"), 0, true, false) +canonicalize("stdout") +check(samefile("wanted_heads", "stdout")) + +check(mtn("automate", "heads", "nosuchbranch"), 0, true, false) +writefile("empty") +check(samefile("empty", "stdout")) + ============================================================ --- tests/branch-based_checkout/__driver__.lua 90dc840151263a46c883a9a489dfbb8ee279a16a +++ tests/branch-based_checkout/__driver__.lua 90dc840151263a46c883a9a489dfbb8ee279a16a @@ -0,0 +1,15 @@ + +mtn_setup() + +writefile("foo.testbranch", "this is the testbranch version") +writefile("foo.otherbranch", "this version goes in otherbranch") + +copyfile("foo.testbranch", "foo") +check(mtn("add", "foo"), 0, false, false) +commit() + +copyfile("foo.otherbranch", "foo") +commit("otherbranch") + +check(mtn("--branch=testbranch", "checkout"), 0, false, false) +check(samefile("testbranch/foo", "foo.testbranch")) ============================================================ --- tests/committing_with_a_non-english_message/__driver__.lua 184d6d737de6b9eb5a2a23b1c6f786cf972f452c +++ tests/committing_with_a_non-english_message/__driver__.lua 184d6d737de6b9eb5a2a23b1c6f786cf972f452c @@ -0,0 +1,30 @@ + +mtn_setup() + +european_utf8 = "\195\182\195\164\195\188\195\159" + -- "\xC3\xB6\xC3\xA4\xC3\xBc\xC3\x9F" +european_8859_1 = "\246\228\252\223" + -- "\xF6\xE4\xFC\xDF" +japanese_utf8 = "\227\129\166\227\129\153\227\129\168" + -- "\xE3\x81\xA6\xE3\x81\x99\xE3\x81\xA8" +japanese_euc_jp = "\164\198\164\185\164\200" + -- "\xA4\xC6\xA4\xB9\xA4\xC8" + +set_env("CHARSET", "UTF-8") +addfile("a", "hello there") +check(mtn("--debug", "commit", "--message", european_utf8), 0, false, false) + + +set_env("CHARSET", "iso-8859-1") +addfile("b", "hello there") +check(mtn("--debug", "commit", "--message", european_8859_1), 0, false, false) + + +set_env("CHARSET", "UTF-8") +addfile("c", "hello there") +check(mtn("--debug", "commit", "--message", japanese_utf8), 0, false, false) + + +set_env("CHARSET", "euc-jp") +addfile("d", "hello there") +check(mtn("--debug", "commit", "--message", japanese_euc_jp), 0, false, false) ============================================================ --- tests/db_load_must_create_a_new_db/__driver__.lua bc64c2372001fc1e56dcb48b77ecea82e8483511 +++ tests/db_load_must_create_a_new_db/__driver__.lua bc64c2372001fc1e56dcb48b77ecea82e8483511 @@ -0,0 +1,19 @@ + +mtn_setup() + +addfile("foo", "stuff") +commit() + +check(mtn("db", "dump"), 0, true, false) +canonicalize("stdout") +rename("stdout", "dump") +copyfile("dump", "stdin") +check(mtn("--db=test2.db", "db", "load"), 0, false, false, true) + +mkdir("test3.db") +copyfile("dump", "stdin") +check(mtn("--db=test3.db", "db", "load"), 1, false, false, true) + +check(mtn("--db=test4.db", "db", "init"), 0, false, false) +copyfile("dump", "stdin") +check(mtn("--db=test4.db", "db", "load"), 1, false, false, true) ============================================================ --- tests/db_missing/__driver__.lua 0f4c308a335dee7535e06b7b7a63d2c5229a779e +++ tests/db_missing/__driver__.lua 0f4c308a335dee7535e06b7b7a63d2c5229a779e @@ -0,0 +1,10 @@ + +mtn_setup() + +mkdir("foo") + +writefile("foo/foo.db", "foo file") + +check(indir("foo", mtn("--db=", "ls", "keys")), 1, false, false) +check(indir("foo", mtn("--db=foo.db", "ls", "keys")), 1, false, false) +check(indir("foo", mtn("--db=missing.db", "ls", "keys")), 1, false, false) ============================================================ --- tests/failed_checkout_is_a_no-op/__driver__.lua a0e0274a4b6f5b96ef1b38a7bddc82432c5597b7 +++ tests/failed_checkout_is_a_no-op/__driver__.lua a0e0274a4b6f5b96ef1b38a7bddc82432c5597b7 @@ -0,0 +1,12 @@ + +mtn_setup() + +-- If a checkout fails, no target directory should be created, and if +-- the checkout directory already exists, no _MTN/ directory should be +-- created. + +check(mtn("checkout", "--revision=bogus-id", "outdir"), 1, false, false) +check(not exists("outdir")) +mkdir("outdir") +check(indir("outdir", mtn("checkout", "--revision=bogus-id", ".")), 1, false, false) +check(not exists("outdir/_MTN")) ============================================================ --- tests/files_with_intermediate__MTN_path_elements/__driver__.lua 8740ff57f33dadbd67b2f008dc84ef0f0b774497 +++ tests/files_with_intermediate__MTN_path_elements/__driver__.lua 8740ff57f33dadbd67b2f008dc84ef0f0b774497 @@ -0,0 +1,64 @@ + +mtn_setup() + +mkdir("dir1") +mkdir("dir1/_MTN") +mkdir("dir2") +mkdir("dir3") + +-- Check both implicit recursive add... +writefile("dir1/_MTN/testfile1", "testfile 1") +writefile("dir2/_MTN", "_MTN file 1") +check(mtn("add", "dir1"), 0, false, false) +check(mtn("add", "dir2"), 0, false, false) +commit() + +-- ...and explicit add. +writefile("dir1/_MTN/testfile2", "testfile 2") +writefile("dir3/_MTN", "_MTN file 2") +check(mtn("add", "dir1/_MTN/testfile2"), 0, false, false) +check(mtn("add", "dir3/_MTN"), 0, false, false) +commit() + +check(mtn("checkout", "outdir1"), 0, false, false) +check(samefile("dir1/_MTN/testfile1", "outdir1/dir1/_MTN/testfile1")) +check(samefile("dir1/_MTN/testfile2", "outdir1/dir1/_MTN/testfile2")) +check(samefile("dir2/_MTN", "outdir1/dir2/_MTN")) +check(samefile("dir3/_MTN", "outdir1/dir3/_MTN")) + +-- renames + +rename("dir1/_MTN/testfile1", "dir1/_MTN/testfile1x") +rename("dir2/_MTN", "dir2/TM") +rename("dir3", "dir3x") + +check(mtn("rename", "dir1/_MTN/testfile1", "dir1/_MTN/testfile1x"), 0, false, false) +check(mtn("rename", "dir2/_MTN", "dir2/TM"), 0, false, false) +check(mtn("rename", "dir3", "dir3x"), 0, false, false) +commit() + +check(mtn("checkout", "outdir2"), 0, false, false) +check(samefile("dir1/_MTN/testfile1x", "outdir2/dir1/_MTN/testfile1x")) +check(samefile("dir1/_MTN/testfile2", "outdir2/dir1/_MTN/testfile2")) +check(samefile("dir2/TM", "outdir2/dir2/TM")) +check(samefile("dir3x/_MTN", "outdir2/dir3x/_MTN")) + +-- explicit drop + +check(mtn("drop", "dir1/_MTN/testfile2"), 0, false, false) +commit() + +check(mtn("checkout", "outdir3"), 0, false, false) +check(samefile("dir1/_MTN/testfile1x", "outdir2/dir1/_MTN/testfile1x")) +check(not exists("outdir3/dir1/_MTN/testfile2")) + +-- recursive drop + +check(mtn("drop", "--recursive", "dir1"), 0, false, false) +commit() + +check(mtn("checkout", "outdir4"), 0, false, false) +check(not exists("outdir4/dir1/_MTN/testfile1x")) +check(not exists("outdir4/dir1/_MTN/testfile2")) +check(not exists("outdir4/dir1/_MTN")) +check(not exists("outdir4/dir1")) ============================================================ --- tests/largish_file/__driver__.lua d879bd8f8dc04288cf3f6f88963f2ff1e725ce72 +++ tests/largish_file/__driver__.lua d879bd8f8dc04288cf3f6f88963f2ff1e725ce72 @@ -0,0 +1,36 @@ + +mtn_setup() + +-- Check that we can handle a 15 meg file in the database + +-- This is only 'largish" -- we should check for >4 gigabytes too, for +-- a real "large file", but that would be kind of rude from the test +-- suite. + +largish = io.open("largish", "wb") +str16k = string.rep("\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0", 1024) +for i=1,15 do + for j=1,64 do + largish:write(str16k) + end +end +largish:close() + +check(mtn("add", "largish"), 0, false, false) +commit() +base = base_revision() + +rename("largish", "largish.orig") + +check(cat("-", "largish.orig"), 0, true, false, "foo\n") +rename_over("stdout", "largish") +append("largish", "bar\n") +commit() +mod = base_revision() + +rename("largish", "largish.modified") + +check(mtn("checkout", "--revision", base, "base"), 0, false, false) +check(samefile("largish.orig", "base/largish")) +check(mtn("checkout", "--revision", mod, "modified"), 0, false, false) +check(samefile("largish.modified", "modified/largish")) ============================================================ --- tests/merge_normalization_edge_case/__driver__.lua 540f5a97e67577161144a624a7b84717d0aef727 +++ tests/merge_normalization_edge_case/__driver__.lua 540f5a97e67577161144a624a7b84717d0aef727 @@ -0,0 +1,28 @@ + +mtn_setup() + +getfile("parent") +getfile("left") +getfile("right") + +parent = "fe24df7edf04cb06161defc10b252c5fa32bf1f7" +left = "f4657ce998dd0e39465a3f345f3540b689fd60ad" +right = "1836ed24710f5b8943bed224cf296689c6a106c2" + +check(sha1("parent") == parent) +check(sha1("left") == left) +check(sha1("right") == right) + +copyfile("parent", "stdin") +check(mtn("fload"), 0, false, false, true) +copyfile("left", "stdin") +check(mtn("fload"), 0, false, false, true) +copyfile("right", "stdin") +check(mtn("fload"), 0, false, false, true) + +check(mtn("fmerge", parent, left, right), 0, true, false) +canonicalize("stdout") +rename("stdout", "merge.monotone") + +-- in this case the output should be the same as right. +check(samefile("merge.monotone", "right")) ============================================================ --- tests/merge_normalization_edge_case/left f4657ce998dd0e39465a3f345f3540b689fd60ad +++ tests/merge_normalization_edge_case/left f4657ce998dd0e39465a3f345f3540b689fd60ad @@ -0,0 +1,400 @@ + +-- this is the standard set of lua hooks for monotone; +-- user-provided files can override it or add to it. + +function temp_file() + local tdir + tdir = os.getenv("TMPDIR") + if tdir == nil then tdir = os.getenv("TMP") end + if tdir == nil then tdir = os.getenv("TEMP") end + if tdir == nil then tdir = "/tmp" end + return mkstemp(string.format("%s/mt.XXXXXX", tdir)) +end + +function execute(path, ...) + local pid = posix.fork() + local ret = -1 + if pid == 0 then + posix.exec(path, unpack(arg)) + else + ret, pid = posix.wait(pid) + end + return ret +end + + + +-- attributes are persistent metadata about files (such as execute +-- bit, ACLs, various special flags) which we want to have set and +-- re-set any time the files are modified. the attributes themselves +-- are stored in a file .mt-attrs, in the working copy (and +-- manifest). each (f,k,v) triple in an atribute file turns into a +-- call to attr_functions[k](f,v) in lua. + +if (attr_functions == nil) then + attr_functions = {} +end + + +attr_functions["execute"] = + function(filename, value) + if (value == "true") then + posix.chmod(filename, "u+x") + end + end + + +function ignore_file(name) + if (string.find(name, "%.a$")) then return true end + if (string.find(name, "%.so$")) then return true end + if (string.find(name, "%.o$")) then return true end + if (string.find(name, "%.la$")) then return true end + if (string.find(name, "%.lo$")) then return true end + if (string.find(name, "%.aux$")) then return true end + if (string.find(name, "%.bak$")) then return true end + if (string.find(name, "%.orig$")) then return true end + if (string.find(name, "%.rej$")) then return true end + if (string.find(name, "%~$")) then return true end + if (string.find(name, "/core$")) then return true end + if (string.find(name, "^CVS/")) then return true end + if (string.find(name, "/CVS/")) then return true end + if (string.find(name, "^%.svn/")) then return true end + if (string.find(name, "/%.svn/")) then return true end + if (string.find(name, "^SCCS/")) then return true end + if (string.find(name, "/SCCS/")) then return true end + return false; +end + + +function edit_comment(basetext) + local exe = "vi" + local visual = os.getenv("VISUAL") + if (visual ~= nil) then exe = visual end + local editor = os.getenv("EDITOR") + if (editor ~= nil) then exe = editor end + + local tmp, tname = temp_file() + if (tmp == nil) then return nil end + basetext = "MT: " .. string.gsub(basetext, "\n", "\nMT: ") .. "\n" + tmp:write(basetext) + io.close(tmp) + + if (execute(exe, tname) ~= 0) then + os.remove(tname) + return nil + end + + tmp = io.open(tname, "r") + if (tmp == nil) then os.remove(tname); return nil end + local res = "" + local line = tmp:read() + while(line ~= nil) do + if (not string.find(line, "^MT:")) then + res = res .. line .. "\n" + end + line = tmp:read() + end + io.close(tmp) + os.remove(tname) + return res +end + + +function non_blocking_rng_ok() + return true +end + + +function persist_phrase_ok() + return true +end + +-- trust evaluation hooks + +function intersection(a,b) + local s={} + local t={} + for k,v in pairs(a) do s[v] = 1 end + for k,v in pairs(b) do if s[v] ~= nil then table.insert(t,v) end end + return t +end + +function get_revision_cert_trust(signers, id, name, val) + return true +end + +function get_manifest_cert_trust(signers, id, name, val) + return true +end + +function get_file_cert_trust(signers, id, name, val) + return true +end + +function accept_testresult_change(old_results, new_results) + for test,res in pairs(old_results) + do + if res == true and new_results[test] ~= true + then + return false + end + end + return true +end + +-- merger support + +function merge2_meld_cmd(lfile, rfile) + return + function() + return execute("meld", lfile, rfile) + end +end + +function merge3_meld_cmd(lfile, afile, rfile) + return + function() + return execute("meld", lfile, afile, rfile) + end +end + + +function merge2_emacs_cmd(emacs, lfile, rfile, outfile) + local elisp = "(ediff-merge-files \"%s\" \"%s\" nil \"%s\")" + return + function() + return execute(emacs, "-no-init-file", "-eval", + string.format(elisp, lfile, rfile, outfile)) + end +end + +function merge3_emacs_cmd(emacs, lfile, afile, rfile, outfile) + local elisp = "(ediff-merge-files-with-ancestor \"%s\" \"%s\" \"%s\" nil \"%s\")" + local cmd_fmt = "%s -no-init-file -eval " .. elisp + return + function() + execute(emacs, "-no-init-file", "-eval", + string.format(elisp, lfile, rfile, afile, outfile)) + end +end + +function merge2_xxdiff_cmd(left_path, right_path, merged_path, lfile, rfile, outfile) + return + function() + return execute("xxdiff", + "--title1", left_path, + "--title2", right_path, + lfile, rfile, + "--merged-filename", outfile) + end +end + +function merge3_xxdiff_cmd(left_path, anc_path, right_path, merged_path, + lfile, afile, rfile, outfile) + return + function() + return execute("xxdiff", + "--title1", left_path, + "--title2", right_path, + "--title3", merged_path, + lfile, afile, rfile, + "--merge", + "--merged-filename", outfile) + end +end + +function write_to_temporary_file(data) + tmp, filename = temp_file() + if (tmp == nil) then + return nil + end; + tmp:write(data) + io.close(tmp) + return filename +end + +function read_contents_of_file(filename) + tmp = io.open(filename, "r") + if (tmp == nil) then + return nil + end + local data = tmp:read("*a") + io.close(tmp) + return data +end + +function program_exists_in_path(program) + return execute("which", program) == 0 +end + +function merge2(left_path, right_path, merged_path, left, right) + local lfile = nil + local rfile = nil + local outfile = nil + local data = nil + local meld_exists = false + + lfile = write_to_temporary_file(left) + rfile = write_to_temporary_file(right) + outfile = write_to_temporary_file("") + + if lfile ~= nil and + rfile ~= nil and + outfile ~= nil + then + local cmd = nil + if program_exists_in_path("meld") then + meld_exists = true + io.write(string.format("\nWARNING: 'meld' was choosen to perform external 2-way merge.\n" .. + "You should merge all changes to *LEFT* file due to limitation of program\n" .. + "arguments.\n\n")) + cmd = merge2_meld_cmd(lfile, rfile) + elseif program_exists_in_path("xxdiff") then + cmd = merge2_xxdiff_cmd(left_path, right_path, merged_path, + lfile, rfile, outfile) + elseif program_exists_in_path("emacs") then + cmd = merge2_emacs_cmd("emacs", lfile, rfile, outfile) + elseif program_exists_in_path("xemacs") then + cmd = merge2_emacs_cmd("xemacs", lfile, rfile, outfile) + end + + if cmd ~= nil + then + io.write(string.format("executing external 2-way merge command\n")) + cmd() + if meld_exists then + data = read_contents_of_file(lfile) + else + data = read_contents_of_file(outfile) + end + if string.len(data) == 0 + then + data = nil + end + else + io.write("no external 2-way merge command found\n") + end + end + + os.remove(lfile) + os.remove(rfile) + os.remove(outfile) + + return data +end + +function merge3(anc_path, left_path, right_path, merged_path, ancestor, left, right) + local afile = nil + local lfile = nil + local rfile = nil + local outfile = nil + local data = nil + local meld_exists = false + + lfile = write_to_temporary_file(left) + afile = write_to_temporary_file(ancestor) + rfile = write_to_temporary_file(right) + outfile = write_to_temporary_file("") + + if lfile ~= nil and + rfile ~= nil and + afile ~= nil and + outfile ~= nil + then + local cmd = nil + if program_exists_in_path("meld") then + meld_exists = true + io.write(string.format("\nWARNING: 'meld' was choosen to perform external 3-way merge.\n" .. + "You should merge all changes to *CENTER* file due to limitation of program\n" .. + "arguments.\n\n")) + cmd = merge3_meld_cmd(lfile, afile, rfile) + elseif program_exists_in_path("xxdiff") then + cmd = merge3_xxdiff_cmd(left_path, anc_path, right_path, merged_path, + lfile, afile, rfile, outfile) + elseif program_exists_in_path("emacs") then + cmd = merge3_emacs_cmd("emacs", lfile, afile, rfile, outfile) + elseif program_exists_in_path("xemacs") then + cmd = merge3_emacs_cmd("xemacs", lfile, afile, rfile, outfile) + end + + if cmd ~= nil + then + io.write(string.format("executing external 3-way merge command\n")) + cmd() + if meld_exists then + data = read_contents_of_file(afile) + else + data = read_contents_of_file(outfile) + end + if string.len(data) == 0 + then + data = nil + end + else + io.write("no external 3-way merge command found\n") + end + end + + os.remove(lfile) + os.remove(rfile) + os.remove(afile) + os.remove(outfile) + + return data +end + + +-- expansion of values used in selector completion + +function expand_selector(str) + + -- simple date patterns + if string.find(str, "^19%d%d%-%d%d") + or string.find(str, "^20%d%d%-%d%d") + then + return ("d:" .. str) + end + + -- something which looks like an email address + if string.find(str, "address@hidden") + then + return ("a:" .. str) + end + + -- something which looks like a branch name + if string.find(str, "[%w%-]+%.[%w%-]+") + then + return ("b:" .. str) + end + + -- a sequence of nothing but hex digits + if string.find(str, "^%x+$") + then + return ("i:" .. str) + end + + -- "yesterday", the source of all hangovers + if str == "yesterday" + then + local t = os.time(os.date('!*t')) + return os.date("d:%F", t - 86400) + end + + -- "CVS style" relative dates such as "3 weeks ago" + local trans = { + minute = 60; + hour = 3600; + day = 86400; + week = 604800; + month = 2678400; + year = 31536000 + } + local pos, len, n, type = string.find(str, "(%d+) ([minutehordaywk]+)s? ago") + if trans[type] ~= nil + then + local t = os.time(os.date('!*t')) + return os.date("d:%F", t - (n * trans[type])) + end + + return nil +end ============================================================ --- tests/merge_normalization_edge_case/parent fe24df7edf04cb06161defc10b252c5fa32bf1f7 +++ tests/merge_normalization_edge_case/parent fe24df7edf04cb06161defc10b252c5fa32bf1f7 @@ -0,0 +1,400 @@ + +-- this is the standard set of lua hooks for monotone; +-- user-provided files can override it or add to it. + +function temp_file() + local tdir + tdir = os.getenv("TMPDIR") + if tdir == nil then tdir = os.getenv("TMP") end + if tdir == nil then tdir = os.getenv("TEMP") end + if tdir == nil then tdir = "/tmp" end + return mkstemp(string.format("%s/mt.XXXXXX", tdir)) +end + +function execute(path, ...) + local pid = posix.fork() + local ret = -1 + if pid == 0 then + posix.exec(path, unpack(arg)) + else + ret, pid = posix.wait(pid) + end + return ret +end + + + +-- attributes are persistent metadata about files (such as execute +-- bit, ACLs, various special flags) which we want to have set and +-- re-set any time the files are modified. the attributes themselves +-- are stored in a file .mt-attrs, in the working copy (and +-- manifest). each (f,k,v) triple in an atribute file turns into a +-- call to attr_functions[k](f,v) in lua. + +if (attr_functions == nil) then + attr_functions = {} +end + + +attr_functions["execute"] = + function(filename, value) + if (value == "true") then + posix.chmod(filename, "u+x") + end + end + + +function ignore_file(name) + if (string.find(name, "%.a$")) then return true end + if (string.find(name, "%.so$")) then return true end + if (string.find(name, "%.o$")) then return true end + if (string.find(name, "%.la$")) then return true end + if (string.find(name, "%.lo$")) then return true end + if (string.find(name, "%.aux$")) then return true end + if (string.find(name, "%.bak$")) then return true end + if (string.find(name, "%.orig$")) then return true end + if (string.find(name, "%.rej$")) then return true end + if (string.find(name, "%~$")) then return true end + if (string.find(name, "/core$")) then return true end + if (string.find(name, "^CVS/")) then return true end + if (string.find(name, "/CVS/")) then return true end + if (string.find(name, "^%.svn/")) then return true end + if (string.find(name, "/%.svn/")) then return true end + if (string.find(name, "^SCCS/")) then return true end + if (string.find(name, "/SCCS/")) then return true end + return false; +end + + +function edit_comment(basetext) + local exe = "vi" + local visual = os.getenv("VISUAL") + if (visual ~= nil) then exe = visual end + local editor = os.getenv("EDITOR") + if (editor ~= nil) then exe = editor end + + local tmp, tname = temp_file() + if (tmp == nil) then return nil end + basetext = "MT: " .. string.gsub(basetext, "\n", "\nMT: ") .. "\n" + tmp:write(basetext) + io.close(tmp) + + if (execute(exe, tname) ~= 0) then + os.remove(tname) + return nil + end + + tmp = io.open(tname, "r") + if (tmp == nil) then os.remove(tname); return nil end + local res = "" + local line = tmp:read() + while(line ~= nil) do + if (not string.find(line, "^MT:")) then + res = res .. line .. "\n" + end + line = tmp:read() + end + io.close(tmp) + os.remove(tname) + return res +end + + +function non_blocking_rng_ok() + return true +end + + +function persist_phrase_ok() + return true +end + +-- trust evaluation hooks + +function intersection(a,b) + local s={} + local t={} + for k,v in pairs(a) do s[v] = 1 end + for k,v in pairs(b) do if s[v] ~= nil then table.insert(t,v) end end + return t +end + +function get_revision_cert_trust(signers, id, name, val) + return true +end + +function get_manifest_cert_trust(signers, id, name, val) + return true +end + +function get_file_cert_trust(signers, id, name, val) + return true +end + +function accept_testresult_change(old_results, new_results) + for test,res in pairs(old_results) + do + if res == true and new_results[test] ~= true + then + return false + end + end + return true +end + +-- merger support + +function merge2_meld_cmd(lfile, rfile) + return + function() + return execute("meld", lfile, rfile) + end +end + +function merge3_meld_cmd(lfile, afile, rfile) + return + function() + return execute("meld", lfile, afile, rfile) + end +end + + +function merge2_emacs_cmd(emacs, lfile, rfile, outfile) + local elisp = "(ediff-merge-files \"%s\" \"%s\" nil \"%s\")" + return + function() + return execute(emacs, "-no-init-file", "-eval", + string.format(elisp, lfile, rfile, outfile)) + end +end + +function merge3_emacs_cmd(emacs, lfile, afile, rfile, outfile) + local elisp = "(ediff-merge-files-with-ancestor \"%s\" \"%s\" \"%s\" nil \"%s\")" + local cmd_fmt = "%s -no-init-file -eval " .. elisp + return + function() + execute(emacs, "-no-init-file", "-eval", + string.format(elisp, lfile, rfile, afile, outfile)) + end +end + +function merge2_xxdiff_cmd(left_path, right_path, merged_path, lfile, rfile, outfile) + return + function() + return execute("xxdiff", + "--title1", left_path, + "--title2", right_path, + lfile, rfile, + "--merged-filename", outfile) + end +end + +function merge3_xxdiff_cmd(left_path, anc_path, right_path, merged_path, + lfile, afile, rfile, outfile) + return + function() + return execute("xxdiff", + "--title1", left_path, + "--title2", right_path, + "--title3", merged_path, + lfile, afile, rfile, + "--merge", + "--merged-filename", outfile) + end +end + +function write_to_temporary_file(data) + tmp, filename = temp_file() + if (tmp == nil) then + return nil + end; + tmp:write(data) + io.close(tmp) + return filename +end + +function read_contents_of_file(filename) + tmp = io.open(filename, "r") + if (tmp == nil) then + return nil + end + local data = tmp:read("*a") + io.close(tmp) + return data +end + +function program_exists_in_path(program) + return execute("which", program) == 0 +end + +function merge2(left_path, right_path, merged_path, left, right) + local lfile = nil + local rfile = nil + local outfile = nil + local data = nil + local meld_exists = false + + lfile = write_to_temporary_file(left) + rfile = write_to_temporary_file(right) + outfile = write_to_temporary_file("") + + if lfile ~= nil and + rfile ~= nil and + outfile ~= nil + then + local cmd = nil + if program_exists_in_path("meld") then + meld_exists = true + io.write(string.format("\nWARNING: 'meld' was choosen to perform external 2-way merge.\n" .. + "You should merge all changes to *LEFT* file due to limitation of program\n" .. + "arguments.\n\n")) + cmd = merge2_meld_cmd(lfile, rfile) + elseif program_exists_in_path("xxdiff") then + cmd = merge2_xxdiff_cmd(left_path, right_path, merged_path, + lfile, rfile, outfile) + elseif program_exists_in_path("emacs") then + cmd = merge2_emacs_cmd("emacs", lfile, rfile, outfile) + elseif program_exists_in_path("xemacs") then + cmd = merge2_emacs_cmd("xemacs", lfile, rfile, outfile) + end + + if cmd ~= nil + then + io.write(string.format("executing external 2-way merge command\n")) + cmd() + if meld_exists then + data = read_contents_of_file(lfile) + else + data = read_contents_of_file(outfile) + end + if string.len(data) == 0 + then + data = nil + end + else + io.write("no external 2-way merge command found\n") + end + end + + os.remove(lfile) + os.remove(rfile) + os.remove(outfile) + + return data +end + +function merge3(anc_path, left_path, right_path, merged_path, ancestor, left, right) + local afile = nil + local lfile = nil + local rfile = nil + local outfile = nil + local data = nil + local meld_exists = false + + lfile = write_to_temporary_file(left) + afile = write_to_temporary_file(ancestor) + rfile = write_to_temporary_file(right) + outfile = write_to_temporary_file("") + + if lfile ~= nil and + rfile ~= nil and + afile ~= nil and + outfile ~= nil + then + local cmd = nil + if program_exists_in_path("meld") then + meld_exists = true + io.write(string.format("\nWARNING: 'meld' was choosen to perform external 3-way merge.\n" .. + "You should merge all changes to *CENTER* file due to limitation of program\n" .. + "arguments.\n\n")) + cmd = merge3_meld_cmd(lfile, afile, rfile) + elseif program_exists_in_path("xxdiff") then + cmd = merge3_xxdiff_cmd(left_path, anc_path, right_path, merged_path, + lfile, afile, rfile, outfile) + elseif program_exists_in_path("emacs") then + cmd = merge3_emacs_cmd("emacs", lfile, afile, rfile, outfile) + elseif program_exists_in_path("xemacs") then + cmd = merge3_emacs_cmd("xemacs", lfile, afile, rfile, outfile) + end + + if cmd ~= nil + then + io.write(string.format("executing external 3-way merge command\n")) + cmd() + if meld_exists then + data = read_contents_of_file(afile) + else + data = read_contents_of_file(outfile) + end + if string.len(data) == 0 + then + data = nil + end + else + io.write("no external 3-way merge command found\n") + end + end + + os.remove(lfile) + os.remove(rfile) + os.remove(afile) + os.remove(outfile) + + return data +end + + +-- expansion of values used in selector completion + +function expand_selector(str) + + -- simple date patterns + if string.find(str, "^19%d%d%-%d%d") + or string.find(str, "^20%d%d%-%d%d") + then + return ("d:" .. str) + end + + -- something which looks like an email address + if string.find(str, "address@hidden") + then + return ("a:" .. str) + end + + -- something which looks like a branch name + if string.find(str, "[%w%-]+%.[%w%-]+") + then + return ("b:" .. str) + end + + -- a sequence of nothing but hex digits + if string.find(str, "^%x+$") + then + return ("i:" .. str) + end + + -- "yesterday", the source of all hangovers + if str == "yesterday" + then + local t = os.time(os.date('!*t')) + return os.date("d:%F", t - 86400) + end + + -- "CVS style" relative dates such as "3 weeks ago" + local trans = { + minute = 60; + hour = 3600; + day = 86400; + week = 604800; + month = 2678400; + year = 31536000 + } + local pos, len, n, type = string.find(str, "(%d+) ([minutehordaywk]+)s? ago") + if trans[type] ~= nil + then + local t = os.time(os.date('!*t')) + return os.date("d:%F", t - (n * trans[type])) + end + + return nil +end ============================================================ --- tests/merge_normalization_edge_case/right 1836ed24710f5b8943bed224cf296689c6a106c2 +++ tests/merge_normalization_edge_case/right 1836ed24710f5b8943bed224cf296689c6a106c2 @@ -0,0 +1,424 @@ + +-- this is the standard set of lua hooks for monotone; +-- user-provided files can override it or add to it. + +function temp_file() + local tdir + tdir = os.getenv("TMPDIR") + if tdir == nil then tdir = os.getenv("TMP") end + if tdir == nil then tdir = os.getenv("TEMP") end + if tdir == nil then tdir = "/tmp" end + return mkstemp(string.format("%s/mt.XXXXXX", tdir)) +end + +function execute(path, ...) + local pid = posix.fork() + local ret = -1 + if pid == 0 then + posix.exec(path, unpack(arg)) + else + ret, pid = posix.wait(pid) + end + return ret +end + + + +-- attributes are persistent metadata about files (such as execute +-- bit, ACLs, various special flags) which we want to have set and +-- re-set any time the files are modified. the attributes themselves +-- are stored in a file .mt-attrs, in the working copy (and +-- manifest). each (f,k,v) triple in an atribute file turns into a +-- call to attr_functions[k](f,v) in lua. + +if (attr_functions == nil) then + attr_functions = {} +end + + +attr_functions["execute"] = + function(filename, value) + if (value == "true") then + posix.chmod(filename, "u+x") + end + end + + +function ignore_file(name) + if (string.find(name, "%.a$")) then return true end + if (string.find(name, "%.so$")) then return true end + if (string.find(name, "%.o$")) then return true end + if (string.find(name, "%.la$")) then return true end + if (string.find(name, "%.lo$")) then return true end + if (string.find(name, "%.aux$")) then return true end + if (string.find(name, "%.bak$")) then return true end + if (string.find(name, "%.orig$")) then return true end + if (string.find(name, "%.rej$")) then return true end + if (string.find(name, "%~$")) then return true end + if (string.find(name, "/core$")) then return true end + if (string.find(name, "^CVS/")) then return true end + if (string.find(name, "/CVS/")) then return true end + if (string.find(name, "^%.svn/")) then return true end + if (string.find(name, "/%.svn/")) then return true end + if (string.find(name, "^SCCS/")) then return true end + if (string.find(name, "/SCCS/")) then return true end + return false; +end + + +function edit_comment(basetext) + local exe = "vi" + local visual = os.getenv("VISUAL") + if (visual ~= nil) then exe = visual end + local editor = os.getenv("EDITOR") + if (editor ~= nil) then exe = editor end + + local tmp, tname = temp_file() + if (tmp == nil) then return nil end + basetext = "MT: " .. string.gsub(basetext, "\n", "\nMT: ") .. "\n" + tmp:write(basetext) + io.close(tmp) + + if (execute(exe, tname) ~= 0) then + os.remove(tname) + return nil + end + + tmp = io.open(tname, "r") + if (tmp == nil) then os.remove(tname); return nil end + local res = "" + local line = tmp:read() + while(line ~= nil) do + if (not string.find(line, "^MT:")) then + res = res .. line .. "\n" + end + line = tmp:read() + end + io.close(tmp) + os.remove(tname) + return res +end + + +function non_blocking_rng_ok() + return true +end + + +function persist_phrase_ok() + return true +end + +-- trust evaluation hooks + +function intersection(a,b) + local s={} + local t={} + for k,v in pairs(a) do s[v] = 1 end + for k,v in pairs(b) do if s[v] ~= nil then table.insert(t,v) end end + return t +end + +function get_revision_cert_trust(signers, id, name, val) + return true +end + +function get_manifest_cert_trust(signers, id, name, val) + return true +end + +function get_file_cert_trust(signers, id, name, val) + return true +end + +function accept_testresult_change(old_results, new_results) + for test,res in pairs(old_results) + do + if res == true and new_results[test] ~= true + then + return false + end + end + return true +end + +-- merger support + +function merge2_meld_cmd(lfile, rfile) + return + function() + return execute("meld", lfile, rfile) + end +end + +function merge3_meld_cmd(lfile, afile, rfile) + return + function() + return execute("meld", lfile, afile, rfile) + end +end + + +function merge2_vim_cmd(vim, lfile, rfile, outfile) + return + function() + return execute(vim, "-f", "-d", "-c", string.format("file %s", outfile), + lfile, rfile) + end +end + +function merge3_vim_cmd(vim, lfile, afile, rfile, outfile) + return + function() + return execute(vim, "-f", "-d", "-c", string.format("file %s", outfile), + lfile, afile, rfile) + end +end + +function merge2_emacs_cmd(emacs, lfile, rfile, outfile) + local elisp = "(ediff-merge-files \"%s\" \"%s\" nil \"%s\")" + return + function() + return execute(emacs, "-no-init-file", "-eval", + string.format(elisp, lfile, rfile, outfile)) + end +end + +function merge3_emacs_cmd(emacs, lfile, afile, rfile, outfile) + local elisp = "(ediff-merge-files-with-ancestor \"%s\" \"%s\" \"%s\" nil \"%s\")" + local cmd_fmt = "%s -no-init-file -eval " .. elisp + return + function() + execute(emacs, "-no-init-file", "-eval", + string.format(elisp, lfile, rfile, afile, outfile)) + end +end + +function merge2_xxdiff_cmd(left_path, right_path, merged_path, lfile, rfile, outfile) + return + function() + return execute("xxdiff", + "--title1", left_path, + "--title2", right_path, + lfile, rfile, + "--merged-filename", outfile) + end +end + +function merge3_xxdiff_cmd(left_path, anc_path, right_path, merged_path, + lfile, afile, rfile, outfile) + return + function() + return execute("xxdiff", + "--title1", left_path, + "--title2", right_path, + "--title3", merged_path, + lfile, afile, rfile, + "--merge", + "--merged-filename", outfile) + end +end + +function write_to_temporary_file(data) + tmp, filename = temp_file() + if (tmp == nil) then + return nil + end; + tmp:write(data) + io.close(tmp) + return filename +end + +function read_contents_of_file(filename) + tmp = io.open(filename, "r") + if (tmp == nil) then + return nil + end + local data = tmp:read("*a") + io.close(tmp) + return data +end + +function program_exists_in_path(program) + return execute("which", program) == 0 +end + +function merge2(left_path, right_path, merged_path, left, right) + local lfile = nil + local rfile = nil + local outfile = nil + local data = nil + local meld_exists = false + + lfile = write_to_temporary_file(left) + rfile = write_to_temporary_file(right) + outfile = write_to_temporary_file("") + + if lfile ~= nil and + rfile ~= nil and + outfile ~= nil + then + local cmd = nil + if program_exists_in_path("meld") then + meld_exists = true + io.write(string.format("\nWARNING: 'meld' was choosen to perform external 2-way merge.\n" .. + "You should merge all changes to *LEFT* file due to limitation of program\n" .. + "arguments.\n\n")) + cmd = merge2_meld_cmd(lfile, rfile) + elseif program_exists_in_path("xxdiff") then + cmd = merge2_xxdiff_cmd(left_path, right_path, merged_path, + lfile, rfile, outfile) + elseif program_exists_in_path("emacs") then + cmd = merge2_emacs_cmd("emacs", lfile, rfile, outfile) + elseif program_exists_in_path("xemacs") then + cmd = merge2_emacs_cmd("xemacs", lfile, rfile, outfile) + elseif program_exists_in_path("gvim") then + cmd = merge2_vim_cmd("gvim", lfile, rfile, outfile) + elseif program_exists_in_path("vim") then + cmd = merge2_vim_cmd("vim", lfile, rfile, outfile) + end + + if cmd ~= nil + then + io.write(string.format("executing external 2-way merge command\n")) + cmd() + if meld_exists then + data = read_contents_of_file(lfile) + else + data = read_contents_of_file(outfile) + end + if string.len(data) == 0 + then + data = nil + end + else + io.write("no external 2-way merge command found\n") + end + end + + os.remove(lfile) + os.remove(rfile) + os.remove(outfile) + + return data +end + +function merge3(anc_path, left_path, right_path, merged_path, ancestor, left, right) + local afile = nil + local lfile = nil + local rfile = nil + local outfile = nil + local data = nil + local meld_exists = false + + lfile = write_to_temporary_file(left) + afile = write_to_temporary_file(ancestor) + rfile = write_to_temporary_file(right) + outfile = write_to_temporary_file("") + + if lfile ~= nil and + rfile ~= nil and + afile ~= nil and + outfile ~= nil + then + local cmd = nil + if program_exists_in_path("meld") then + meld_exists = true + io.write(string.format("\nWARNING: 'meld' was choosen to perform external 3-way merge.\n" .. + "You should merge all changes to *CENTER* file due to limitation of program\n" .. + "arguments.\n\n")) + cmd = merge3_meld_cmd(lfile, afile, rfile) + elseif program_exists_in_path("xxdiff") then + cmd = merge3_xxdiff_cmd(left_path, anc_path, right_path, merged_path, + lfile, afile, rfile, outfile) + elseif program_exists_in_path("emacs") then + cmd = merge3_emacs_cmd("emacs", lfile, afile, rfile, outfile) + elseif program_exists_in_path("xemacs") then + cmd = merge3_emacs_cmd("xemacs", lfile, afile, rfile, outfile) + elseif program_exists_in_path("gvim") then + cmd = merge3_vim_cmd("gvim", lfile, afile, rfile, outfile) + elseif program_exists_in_path("vim") then + cmd = merge3_vim_cmd("vim", lfile, afile, rfile, outfile) + end + + if cmd ~= nil + then + io.write(string.format("executing external 3-way merge command\n")) + cmd() + if meld_exists then + data = read_contents_of_file(afile) + else + data = read_contents_of_file(outfile) + end + if string.len(data) == 0 + then + data = nil + end + else + io.write("no external 3-way merge command found\n") + end + end + + os.remove(lfile) + os.remove(rfile) + os.remove(afile) + os.remove(outfile) + + return data +end + + +-- expansion of values used in selector completion + +function expand_selector(str) + + -- simple date patterns + if string.find(str, "^19%d%d%-%d%d") + or string.find(str, "^20%d%d%-%d%d") + then + return ("d:" .. str) + end + + -- something which looks like an email address + if string.find(str, "address@hidden") + then + return ("a:" .. str) + end + + -- something which looks like a branch name + if string.find(str, "[%w%-]+%.[%w%-]+") + then + return ("b:" .. str) + end + + -- a sequence of nothing but hex digits + if string.find(str, "^%x+$") + then + return ("i:" .. str) + end + + -- "yesterday", the source of all hangovers + if str == "yesterday" + then + local t = os.time(os.date('!*t')) + return os.date("d:%F", t - 86400) + end + + -- "CVS style" relative dates such as "3 weeks ago" + local trans = { + minute = 60; + hour = 3600; + day = 86400; + week = 604800; + month = 2678400; + year = 31536000 + } + local pos, len, n, type = string.find(str, "(%d+) ([minutehordaywk]+)s? ago") + if trans[type] ~= nil + then + local t = os.time(os.date('!*t')) + return os.date("d:%F", t - (n * trans[type])) + end + + return nil +end ============================================================ --- tests/modification_of_an_empty_file/__driver__.lua 9831152a78a69ab9feb3aae2a1399d7574aaecac +++ tests/modification_of_an_empty_file/__driver__.lua 9831152a78a69ab9feb3aae2a1399d7574aaecac @@ -0,0 +1,22 @@ + +mtn_setup() + +-- First of all, add an empty file. +writefile("foo1") +check(mtn("add", "foo1"), 0, false, false) +commit() + +-- Add some contents to the previously added file. +writefile("foo1", "Some contents.") +commit() + +rev = base_revision() + +-- Verify that the latest revision contains a patch, rather than a delete/add +-- sequence (as reported in bug #9964). + +check(mtn("automate", "get_revision", rev), 0, true, false) + +check(grep('^patch "foo1"$', "stdout"), 0, false, false) +check(not qgrep("'add'", "stdout")) +check(not qgrep("'delete'", "stdout")) ============================================================ --- tests/rename_cannot_overwrite_files/__driver__.lua f6e813874fc679c957fecc77f83d0e4ff2ac2c09 +++ tests/rename_cannot_overwrite_files/__driver__.lua f6e813874fc679c957fecc77f83d0e4ff2ac2c09 @@ -0,0 +1,23 @@ + +mtn_setup() + +-- "rename" needs to check that it isn't overwriting existing +-- files/directories. + +addfile("target_file", "blah blah") +mkdir("target_dir") +addfile("target_dir/subfile", "stuff stuff") + +addfile("rename_file", "foo foo") +mkdir("rename_dir") +addfile("rename_dir/file", "bar bar") + +check(mtn("rename", "unknown_file", "other_file"), 1, false, false) +check(mtn("rename", "rename_file", "target_file"), 1, false, false) +check(mtn("rename", "rename_dir", "target_file"), 1, false, false) + +commit() + +check(mtn("rename", "unknown_file", "other_file"), 1, false, false) +check(mtn("rename", "rename_file", "target_file"), 1, false, false) +check(mtn("rename", "rename_dir", "target_file"), 1, false, false) ============================================================ --- tests/revert_directories/__driver__.lua c5ea51f18aae3233942a3269799c1e56593ff59a +++ tests/revert_directories/__driver__.lua c5ea51f18aae3233942a3269799c1e56593ff59a @@ -0,0 +1,16 @@ + +mtn_setup() + +-- reverting files deeper in the directory tree with only some leading +-- components of their relative path specified + +mkdir("abc") mkdir("abc/def") mkdir("abc/def/ghi") +writefile("abc/def/ghi/file", "deep deep snow") +check(mtn("add", "abc/def/ghi/file"), 0, false, false) +commit() +writefile("abc/def/ghi/file", "deep deep mud") +check(mtn("status"), 0, true) +check(qgrep("abc/def/ghi/file", "stdout")) +check(mtn("revert", "abc/def"), 0, false, false) +check(mtn("status"), 0, true) +check(not qgrep("abc/def/ghi/file", "stdout")) ============================================================ --- tests/revert_renames/__driver__.lua 59e1fa11598a797b4a59bd6ee2375e90939ef13c +++ tests/revert_renames/__driver__.lua 59e1fa11598a797b4a59bd6ee2375e90939ef13c @@ -0,0 +1,35 @@ + +mtn_setup() + +writefile("testfile0", "version 0 of first test file") + +check(mtn("add", "testfile0"), 0, false, false) +v1 = sha1("testfile0") +commit() + +writefile("foofile0", "squirrils monkeys dingos") +check(mtn("rename", "testfile0", "foofile0"), 0, false, false) +check(qgrep("testfile0", "_MTN/work")) +check(qgrep("foofile0", "_MTN/work")) +check(mtn("status"), 0, true) +check(qgrep("testfile0", "stdout")) +check(qgrep("foofile0", "stdout")) +check(mtn("revert", "foofile0"), 0, false, false) +check(not exists("_MTN/work")) +check(mtn("status"), 0, true) +check(not qgrep("testfile0", "stdout")) +check(not qgrep("foofile0", "stdout")) + +writefile("foofile0", "squirrils monkeys dingos") +check(mtn("rename", "testfile0", "foofile0"), 0, false, false) +check(qgrep("testfile0", "_MTN/work")) +check(qgrep("foofile0", "_MTN/work")) +check(mtn("status"), 0, true) +check(qgrep("testfile0", "stdout")) +check(qgrep("foofile0", "stdout")) +check(mtn("revert", "testfile0"), 0, false, false) +check(v1 == sha1("testfile0")) +check(not exists("_MTN/work")) +check(mtn("status"), 0, true) +check(not qgrep("testfile0", "stdout")) +check(not qgrep("foofile0", "stdout")) ============================================================ --- tests/revert_unchanged_file_preserves_mtime/__driver__.lua 2f7232fe73c897383827d0e4d01de564eca715ba +++ tests/revert_unchanged_file_preserves_mtime/__driver__.lua 2f7232fe73c897383827d0e4d01de564eca715ba @@ -0,0 +1,32 @@ + +mtn_setup() +times = {} + +writefile("file1", "file 1 version 1") +times[1] = mtime("file1") + +check(mtn("add", "file1"), 0, false, false) +commit() + +-- ensure file modification time changes are detected + +sleep(2) +writefile("file1", "file 1 version 2") +times[2] = mtime("file1") +check(times[2] > times[1]) + +-- revert the file and ensure that its modification time changes + +sleep(2) +check(mtn("revert", "file1"), 0, false, false) + +times[3] = mtime("file1") +check(times[3] > times[2]) + +-- revert the file again and ensure that its modification time does NOT change + + +sleep(2) +check(mtn("revert", "file1"), 0, false, false) + +check(mtime("file1") == times[3]) ============================================================ --- tests/update_no-ops_when_no_parent_revision/__driver__.lua 70cee6aee810a93daf6a9f173b531fdf5992bb25 +++ tests/update_no-ops_when_no_parent_revision/__driver__.lua 70cee6aee810a93daf6a9f173b531fdf5992bb25 @@ -0,0 +1,6 @@ + +mtn_setup() + +addfile("foo", "blah balh") + +check(mtn("update", "--branch=testbranch"), 1, false, false) ============================================================ --- tests/warn_on_bad_restriction/__driver__.lua 37ba16972b88f387c1de33a07495ae1f3dbf07ef +++ tests/warn_on_bad_restriction/__driver__.lua 37ba16972b88f387c1de33a07495ae1f3dbf07ef @@ -0,0 +1,4 @@ + +mtn_setup() + +check(mtn("diff", "bogusdir1", "bogusdir2"), 1, false, true) ============================================================ --- tester.cc 01ceaa6974040125059f3d96204399ef8faa3103 +++ tester.cc 3967bacd4cf0166faf4cf2b3e2cc792cf52c0bb6 @@ -28,6 +28,11 @@ using std::make_pair; using boost::lexical_cast; +#include +#include +// for stat, to get mtime +// maybe factor this out of inodeprint instead? + namespace redirect { enum what {in, out, err}; @@ -313,6 +318,26 @@ } static int + mtime(lua_State *L) + { + char const * file = luaL_checkstring(L, -1); +#ifdef WIN32 + struct _stat st; + int r = _stat(file, &st); +#else + struct stat st; + int r = stat(file, &st); +#endif + if (r != 0) + { + lua_pushnil(L); + return 1; + } + lua_pushnumber(L, st.st_mtime); + return 1; + } + + static int exists(lua_State *L) { char const * name = luaL_checkstring(L, -1); @@ -457,6 +482,7 @@ lua_register(st, "leave_test_dir", leave_test_dir); lua_register(st, "mkdir", make_dir); lua_register(st, "chdir", go_to_dir); + lua_register(st, "mtime", mtime); lua_register(st, "remove_recursive", remove_recursive); lua_register(st, "copy_recursive", copy_recursive); lua_register(st, "exists", exists); ============================================================ --- tester.lua 12fd8a1b610f7db1e60069e9940767b262dc4a7d +++ tester.lua d71188603ab47fded0896bad4d89444fe49205f0 @@ -76,6 +76,13 @@ error(e, level) end +old_mtime = mtime +mtime = function(name) + local x = old_mtime(name) + L(locheader(), "mtime(", name, ") = ", tostring(x), "\n") + return x +end + old_mkdir = mkdir mkdir = function(name) L(locheader(), "mkdir ", name, "\n") @@ -95,6 +102,13 @@ return r end +function numlines(filename) + local n = 0 + for _ in io.lines(filename) do n = n + 1 end + L(locheader(), "numlines(", filename, ") = ", n, "\n") + return n +end + function fsize(filename) local file = io.open(filename, "r") if file == nil then error("Cannot open file " .. filename, 2) end @@ -141,6 +155,19 @@ return writefile_q(filename, dat) end +function append(filename, dat) + L(locheader(), "append to file ", filename, "\n") + local file,e = io.open(filename, "a+") + if file == nil then + L("Cannot open file: ", e, "\n") + return false + else + file:write(dat) + file:close() + return true + end +end + function copyfile(from, to) L(locheader(), "copyfile ", from, " ", to, "\n") local infile = io.open(from, "rb") @@ -315,6 +342,30 @@ return {dogrep, unpack(arg)} end +function cat(...) + local function docat(...) + local bsize = 8*1024 + for _,x in ipairs(arg) do + local infile + if x == "-" then + infile = files.stdin + else + infile = io.open(x, "rb") + end + local block = infile:read(bsize) + while block do + files.stdout:write(block) + block = infile:read(bsize) + end + if x ~= "-" then + infile:close() + end + end + return 0 + end + return {docat, unpack(arg)} +end + function log_file_contents(filename) L(readfile_q(filename)) end ============================================================ --- tests/calculation_of_unidiffs/__driver__.lua 7f47ccd3bd189c2eaca4ae5126635032509195ab +++ tests/calculation_of_unidiffs/__driver__.lua c670daa765bad08b28e1c8b2fe6ae7b0159153a7 @@ -20,7 +20,7 @@ -- see if patch likes that rename("monodiff", "stdin") -check(cmd("patch", "firstfile"), 0, false, false, true) +check({"patch", "firstfile"}, 0, false, false, true) -- see if the resulting file has been properly patched check(samefile("firstfile", "secondfile")) ============================================================ --- tests/checkout_validates_target_directory/__driver__.lua 8e28d8ea40dc00dd3c11342d218c84b96df8ebf8 +++ tests/checkout_validates_target_directory/__driver__.lua 99d066e1edbe5d34e4b41d4311a1634879e04d47 @@ -1,34 +1,31 @@ mtn_setup() addfile("testfile", "foo") commit() -check(mtn("--branch=testbranch", "checkout", "test_dir1"), - 0, false, false) +check(mtn("--branch=testbranch", "checkout", "test_dir1"), 0, false, false) writefile("test_dir2") -check(mtn("--branch=testbranch", "checkout", "test_dir2"), - 1, false, false) +check(mtn("--branch=testbranch", "checkout", "test_dir2"), 1, false, false) mkdir("test_dir3") -check(mtn("--branch=testbranch", "checkout", "test_dir3"), - 1, false, false) +check(mtn("--branch=testbranch", "checkout", "test_dir3"), 1, false, false) if existsonpath("chmod") and existsonpath("test") then -- skip this part if run as root (hi Gentoo!) - if check(cmd("test", "-O", "/"), false, false, false) == 0 then + if check({"test", "-O", "/"}, false, false, false) == 0 then partial_skip = true else mkdir("test_dir4") - check(cmd("chmod", "444", "test_dir4"), 0, false) + check({"chmod", "444", "test_dir4"}, 0, false) check(mtn("--branch=testbranch", "checkout", "test_dir4"), 1, false, false) check(mtn("--branch=testbranch", "checkout", "test_dir4/subdir"), 1, false, false) -- Reset the permissions so Autotest can correctly clean up our -- temporary directory. - check(cmd("chmod", "700", "test_dir4"), 0, false) + check({"chmod", "700", "test_dir4"}, 0, false) end else partial_skip = true ============================================================ --- tests/importing_CVS_files/__driver__.lua c0187ccc14b3b1beb413aeee4361c19d52387b97 +++ tests/importing_CVS_files/__driver__.lua b29bac5aeb02c39454b51e0982c1a9911c85bf63 @@ -19,7 +19,12 @@ -- build the cvs repository cvsroot = test_root .. "/cvs-repository" -check(cmd("cvs", "-q", "-d", cvsroot, "init"), 0, false, false) + +function cvs(...) + return {"cvs", "-d", cvsroot, unpack(arg)} +end + +check(cvs("-q", "init"), 0, false, false) check(exists(cvsroot)) check(exists(cvsroot .. "/CVSROOT")) check(exists(cvsroot .. "/CVSROOT/modules")) @@ -28,18 +33,18 @@ -- note that this has to use copyfile, rather than rename, to update -- the file in cvs. Apparently, cvs uses timestamps or something to track -- file modifications. -check(cmd("cvs", "-d", cvsroot, "co", "."), 0, false, false) +check(cvs("co", "."), 0, false, false) mkdir("testdir") copyfile("importme.0", "testdir/importme") -check(cmd("cvs", "-d", cvsroot, "add", "testdir"), 0, false, false) -check(cmd("cvs", "-d", cvsroot, "add", "testdir/importme"), 0, false, false) -check(cmd("cvs", "-d", cvsroot, "commit", "-m", 'commit 0', "testdir/importme"), 0, false, false) +check(cvs("add", "testdir"), 0, false, false) +check(cvs("add", "testdir/importme"), 0, false, false) +check(cvs("commit", "-m", 'commit 0', "testdir/importme"), 0, false, false) copyfile("importme.1", "testdir/importme") -check(cmd("cvs", "-d", cvsroot, "commit", "-m", 'commit 1', "testdir/importme"), 0, false, false) +check(cvs("commit", "-m", 'commit 1', "testdir/importme"), 0, false, false) copyfile("importme.2", "testdir/importme") -check(cmd("cvs", "-d", cvsroot, "commit", "-m", 'commit 2', "testdir/importme"), 0, false, false) +check(cvs("commit", "-m", 'commit 2', "testdir/importme"), 0, false, false) copyfile("importme.3", "testdir/importme") -check(cmd("cvs", "-d", cvsroot, "commit", "-m", 'commit 3', "testdir/importme"), 0, false, false) +check(cvs("commit", "-m", 'commit 3', "testdir/importme"), 0, false, false) -- import into monotone and check presence of files ============================================================ --- tests/importing_a_CVS_file_with_one_version/__driver__.lua 44497c092858e43cd0dbb2ee1204bb5ec5fa0787 +++ tests/importing_a_CVS_file_with_one_version/__driver__.lua 6c24fc69840226c7d8562d20ed93d4b8930baf97 @@ -9,19 +9,24 @@ -- build the cvs repository cvsroot = test_root .. "/cvs-repository" -check(cmd("cvs", "-q", "-d", cvsroot, "init"), 0, false, false) + +function cvs(...) + return {"cvs", "-d", cvsroot, unpack(arg)} +end + +check(cvs("-q", "init"), 0, false, false) check(exists(cvsroot)) check(exists(cvsroot .. "/CVSROOT")) check(exists(cvsroot .. "/CVSROOT/modules")) -- check out the workspace and make a commit -check(cmd("cvs", "-d", cvsroot, "co", "."), 0, false, false) +check(cvs("co", "."), 0, false, false) mkdir("testdir") os.rename("importme.0", "testdir/importme") -check(cmd("cvs", "-d", cvsroot, "add", "testdir"), 0, false, false) -check(cmd("cvs", "-d", cvsroot, "add", "testdir/importme"), 0, false, false) -check(cmd("cvs", "-d", cvsroot, "commit", "-m", 'commit 0', "testdir/importme"), 0, false, false) +check(cvs("add", "testdir"), 0, false, false) +check(cvs("add", "testdir/importme"), 0, false, false) +check(cvs("commit", "-m", 'commit 0', "testdir/importme"), 0, false, false) -- import into monotone and check presence of file ============================================================ --- testsuite.at 2bcc5a412f1cb76ab24165ad2697e8f881c368a2 +++ testsuite.at 7ac716d85aeee1483af8a1891243a9daa3fa7a53 @@ -608,57 +608,6 @@ # include all the sub-tests we're going to use -#m4_include(tests/t_drop_rename_patch.at) -#m4_include(tests/t_cmdline_options.at) -#m4_include(tests/t_log_nonexistent.at) -#m4_include(tests/t_crlf.at) -#m4_include(tests/t_netsync_diffbranch.at) -#m4_include(tests/t_netsync_nocerts.at) -#m4_include(tests/t_check_same_db_contents.at) -#m4_include(tests/t_merge_ancestor.at) -#m4_include(tests/t_propagate_desc.at) -#m4_include(tests/t_propagate_anc.at) -#m4_include(tests/t_status_missing.at) -#m4_include(tests/t_persistent_server_keys_2.at) -#m4_include(tests/t_update_1.at) -#m4_include(tests/t_vcheck.at) -#m4_include(tests/t_db_with_dots.at) -#m4_include(tests/t_subdir_add.at) -#m4_include(tests/t_subdir_drop.at) -#m4_include(tests/t_subdir_revert.at) -#m4_include(tests/t_subdir_rename.at) -#m4_include(tests/t_subdir_attr.at) -#m4_include(tests/t_lca_1.at) -#m4_include(tests/t_update_2.at) -#m4_include(tests/t_rename_dir_cross_level.at) -#m4_include(tests/t_rename_added_in_rename.at) -#m4_include(tests/t_rename_conflict.at) -#m4_include(tests/t_rename_dir_patch.at) -m4_include(tests/t_delete_dir_patch.at) -m4_include(tests/t_revert_dirs.at) -m4_include(tests/t_revert_rename.at) -m4_include(tests/t_revert_unchanged.at) -m4_include(tests/t_cdiff.at) -m4_include(tests/t_no_rename_overwrite.at) -m4_include(tests/t_checkout_noop_on_fail.at) -m4_include(tests/t_monotone_agent.at) -m4_include(tests/t_approval_semantics.at) -m4_include(tests/t_i18n_changelog.at) -m4_include(tests/t_restrictions_warn_on_unknown.at) -m4_include(tests/t_need_mt_revision.at) -m4_include(tests/t_update_null_revision.at) -m4_include(tests/t_branch_checkout.at) -m4_include(tests/t_load_into_existing.at) -m4_include(tests/t_automate_version.at) -m4_include(tests/t_automate_heads.at) -m4_include(tests/t_merge_normalization_edge_case.at) -m4_include(tests/t_undo_update.at) -m4_include(tests/t_change_empty_file.at) -m4_include(tests/t_largish_file.at) -m4_include(tests/t_add_intermediate__MTN_path.at) -m4_include(tests/t_merge_3.at) -m4_include(tests/t_merge_4.at) -m4_include(tests/t_db_missing.at) m4_include(tests/t_database_check.at) m4_include(tests/t_add_owndb.at) m4_include(tests/t_can_execute.at) ============================================================ --- testsuite.lua 49e9abcd6630b351684304699ffa16ea87b0f993 +++ testsuite.lua f61bf454ad6bfaa304208da8de782c89e9715909 @@ -360,3 +360,28 @@ table.insert(tests, "tests/merge_with_add,_rename_file,_and_rename_dir") table.insert(tests, "tests/merge((rename_a_b),_(rename_a_c))") table.insert(tests, "tests/merge((patch_foo_a),_(rename_foo__bar_))") +table.insert(tests, "tests/(imp)_merge((patch_foo_a),_(delete_foo_))") +table.insert(tests, "tests/revert_directories") +table.insert(tests, "tests/revert_renames") +table.insert(tests, "tests/revert_unchanged_file_preserves_mtime") +table.insert(tests, "tests/(minor)_context_diff") +table.insert(tests, "tests/rename_cannot_overwrite_files") +table.insert(tests, "tests/failed_checkout_is_a_no-op") +table.insert(tests, "tests/(todo)_write_monotone-agent") +table.insert(tests, "tests/(todo)_design_approval_semantics") +table.insert(tests, "tests/committing_with_a_non-english_message") +table.insert(tests, "tests/warn_on_bad_restriction") +table.insert(tests, "tests/_MTN_revision_is_required") +table.insert(tests, "tests/update_no-ops_when_no_parent_revision") +table.insert(tests, "tests/branch-based_checkout") +table.insert(tests, "tests/db_load_must_create_a_new_db") +table.insert(tests, "tests/automate_automate_version") +table.insert(tests, "tests/automate_heads") +table.insert(tests, "tests/merge_normalization_edge_case") +table.insert(tests, "tests/(todo)_undo_update_command") +table.insert(tests, "tests/modification_of_an_empty_file") +table.insert(tests, "tests/largish_file") +table.insert(tests, "tests/files_with_intermediate__MTN_path_elements") +table.insert(tests, "tests/(minor)_test_a_merge_3") +table.insert(tests, "tests/(minor)_test_a_merge_4") +table.insert(tests, "tests/db_missing")