# # patch "ChangeLog" # from [461c25a92640c41f5d01de41ee652f4ba900980b] # to [0b4e42f08d53fd7189d2c51f2069be4aa1a9952c] # # patch "cset.cc" # from [1fb425ad8e367a70b3cd9668c69bcf37b4d4418e] # to [a388ba73b60d8b2b4f9617594f5bc770356bb08e] # =============================================== --- ChangeLog 461c25a92640c41f5d01de41ee652f4ba900980b +++ ChangeLog 0b4e42f08d53fd7189d2c51f2069be4aa1a9952c @@ -1,3 +1,7 @@ +2005-07-16 graydon hoare + + * cset.cc: Bugfixing, copy-on-write, attribute support. + 2005-06-19 graydon hoare * cset.cc: Rewrite-in-progress on change_set.cc. =============================================== --- cset.cc 1fb425ad8e367a70b3cd9668c69bcf37b4d4418e +++ cset.cc a388ba73b60d8b2b4f9617594f5bc770356bb08e @@ -1,158 +1,175 @@ /* - an mfest is a collection of nodes, indexed by ident number and - also held in a tree of nodes. the bucket of identified nodes is - possibly larger than the tree of nodes. +nodes: +~~~~~~ - a change_set is a pair of mfests. every entry in each mfest exists in - the other, though some might be detached from the file tree and just - known by ident. + a node is either a file or a directory. nodes have attributes, an + ident, a parent-ident, and a set of heirs and sires. directory nodes + have a map of children and a map of clobbered-children. file nodes + have a content hash. see below for the definitions of these members. + +mfests: +~~~~~~~ + + an mfest is an index-set of nodes X and a tree T of nodes starting + from a root. the index X maps ident numbers to shared pointers into + T. there may be entries in X which are not in T. T must always be a + well-formed tree. + + an mfest has a normal form, in which: + + - a node is in X iff it is in T + - all nodes in the tree have empty heirs and sires + (see below for definition of these) + + serializing and re-reading an mfest normalizes it (though there is a + faster, in-place function to normalize them as well). + + +csets: +~~~~~~ + + a cset is a pair of mfests A, B. the mfests in a cset are *not* + normalized. it is an invariant that idents(A.X) = idents(B.X), but it + is only sometimes true that idents(A.T) = idents(B.T); some nodes + might be present in one mfest's tree but absent from the other's (if + they were added or deleted). + + +change_consumers: +~~~~~~~~~~~~~~~~~ + a change_consumer -- and the serial form of a change_set -- is the way a human wants to read about some work: organized into a set of - deletes, moves, and adds. + deletes, moves, adds, deltas, and attribute set/clear operations. - there is ambiguity in a change_consumer though, regarding the - resolution of names and the simultineity of operations. specifically, - each entry in a change_set is either looked up in the pre state or - post state mfest of a change. a delete is looked up in the pre - state. a rename's source in the pre state. a rename's destination in - the post state. an add is looked up in the post state. + there is ambiguity in a change_consumer regarding the resolution of + names and the simultineity of operations. specifically, each entry in + a change_set is either looked up in the pre state or post state mfest + of a change. a delete is looked up in the pre state. a rename's + source in the pre state. a rename's destination in the post state. an + add is looked up in the post state. when playing back a change_set, there is a canonical order in which - entries are emitted: {delete,rename,add}{file,dir}. + entries are emitted: set_heir, delete, rename, add, set_sire, + apply_delta, clear_attr, set_attr. furthermore within each type of entry, the order in which the entries are emitted is specified: all entries are emitted in lexicographic order, which happens to induce topological order as well (A is always emitted before A/B). - crucially, deletes are ordered by *source space* and adds and renames - are ordered by *destination space*. we replay these by walking the - tree rooted in each mfest. + crucially, set_heir and delete are ordered by *source space*; rename, + add, set_sire, apply_delta, clear_attr, and set_attr are ordered by + *destination space*. we replay these by walking the tree rooted in + each mfest. -*/ +heirs and sires: +~~~~~~~~~~~~~~~~ -/* + nodes may have heirs or sires. only nodes being deleted in a cset may + have an heir; only nodes being added in a cset may have a sire. the + heir of a node is a target to send future content deltas and + attributes to; it is a "forwarding address" used in cases where + two files with separate histories are considered identical in a merge + and "sutured": one node is deleted, and it marks the other node as an + heir. the name of the heir is looked up in the new manifest. - aliases, splits, joins, nursery, graveyard: - - each node has a set of alias nodes. + only attribute and content-merging passes care about heirs and sires. + they do not affect lifecycle decisions during merging. - if N is a node and A is an alias node of N in the old_manifest of a - cset C, we say that A split from N in C. if A is an alias of N in - the new_manifest of C, we say that A joined N in C. + an added node A has a node S as sire in cset C iff there is a cset C' + in which A was being deleted with heir S, and C' = inverse(C). in + other words, sires exist *only* to preserve information about heirs + during cset inversion. there are no user-accessible primitives for + creating sire relationships. - if N is not an alias, but actually present in a manifest, we say that N - is *live* in that manifest. - - if N is live in the old_manifest of C, we say N is live-in in C. - - if N is live in the new_manifest of C, we say N is live-out in C. +generation numbers: +~~~~~~~~~~~~~~~~~~~ + + every node in an mfest has a generation number. this exists to + implement copy-on-write semantics. the cset has a "write_generation" + number representing the generation which nodes should be updated to + when written to in the new mfest. when you want a node for writing, + you request it from the new mfest and the node-finder walks down the tree + looking. any time it hits an intermediate node which is less than the + write_generation, it makes a copy of that node and replaces the owning + pointer before walking into it. this ensures that the path you modify + is a truly new object, not a pointer into to the old tree. - two special nodes exist *always* in every manifest: - 1. the nursery - 2. the graveyard +attach and detach lists: +~~~~~~~~~~~~~~~~~~~~~~~~ - - if A was split from the nursery in C, we say that A was added in C. - - if A was joined to the graveyard in C, we say that A was deleted in C. + this is a subtle point. read it until it's clear. - for any nodes M, N in C, we must have a proper lifecycle nesting. that is: + csets contain an implicit order to their operations: a block of + set_heir actions, then deletes, renames, adds, set_sires, deltas, and + finally attribute changes. moreover as we've seen above, within each + block there is a topological order: renames landing on A/B happen + before renames landing on A/B/C, for example. - - if M splits from N in C then N must be live-in in C - - if M joins to N in C, then N must be live-out in C + there is still, however, an ordering hazard when you apply renames + "in-place", either to an mfest or a filesystem: it's possible that a + rename-target will land on a name which is *in the process* of being + moved elsewhere -- by another rename in the cset -- but has not yet + been moved due to the serial order chosen. consider this cset: - this is normally trivially satisfied by splitting and joining from the - nursery and graveyard, respectively. + rename_file "A/B" + to "P/Q" - splits exist only in order to serve as the inverse of joins. joins - exist for only one purpose: when merging two trees which have - different (non-identified) files occupying the same name, and the - user decides they *want* to identify the files (eg. 2-way merge). in - this case we want to join one file to the other, to give future edits - on the joiner a target to apply to. + rename_file "P/Q" + to "Y/Z" - yes? hmm.. we want to avoid this: - --------------------------------- + this is the order they will be emitted in. because these names exist + in the same cset, they are to be executed "simultaneously" in theory; + but programs execute sequentially, so naively we would perform the + first rename, then the second. - bob makes a tree and syncs it with jane. - bob adds file foo - bob syncs with sid - jane adds file foo - jane syncs with bob - jane merges with bob. jane's foo is not the same as bob's foo. jane's foo wins. - sid makes a change to bob's foo. - sid syncs with jane. - sid's change is silently eaten. - - + suppose we executed them "sequentially". for the first rename we + would do this: - a point: every file *does* have a GUID. the (rev,path) pair it was added in. - or at least it ought to. the problem is you can add a file in two (rev,path) - pairs, and then later decide you *wanted* them to behave -- for the sake of - merging -- as though they are the same file. iow, behave like they had a - common ancestor immediately before the states they were both added in. + - find the node A/B in the OLD mfest, its file ident is file_0. + - find file_0 in the NEW mfest, its parent is parent_0. + - detach B from parent_0 in the NEW mfest. + - find P in the NEW mfest, its ident is parent_1. + - attach file_0 to parent_1 under the name Q. - so how do joins help here? simply this: when you are working on a merge - between A and B, the first step is to decide which files will be live - in the merge. to do this you take all the files which are live in A and - live in B. then for each file you've decided will be live, you pull out - a subgraph for it, containing all the nodes in which it's alive. then you - add to this subgraph all the nodes *joined* to the live one. that's the - central trick: you will be doing graph contraction on the joiner as well, - so that you do not lose any of its deltas. + this last action will fail. it will fail because parent_1 already has + a child called Q in the second mfest (it's the source of the next + rename, and it hasn't been moved elsewhere yet). we the readers know + that by the time the cset is finished applying, Q will have been + detached from parent_1 in the NEW mfest. but in the meantime we have + a problem. - further restriction: foo/bar can only ever join foo/bar, and this can - only happen in a merge in which one of the incoming edges is - providing foo/bar already; it's actually written (unambiguously) in - the joiner's changeset as 'join "foo/bar"' (inverse: 'split - "foo/bar"'). so, scratch previous idea: adds and deletes are still - expressed as being renamed to or from the nursery or graveyard, not - split/joined with them. + so what we do instead is execute the first "half" of the renames in a + preliminary pass, then the second "half" of them in a second + pass. the change_consumer interface has a special + "finalize_renames()" method which triggers the second half; it is + called after the last rename is processed. here's how we *actually* + process the renames: - - further: suppose in A->C we have "join foo/bar", where there's a B->C - edge containing a foo/bar to join with. all well and good. then - suppose we append D="rename foo/bar foo/baz" to C. then the B->C->D - edge has "rename foo/bar foo/baz", and the A->C->D edge has ... - "join foo/baz"? that makes no sense. + - buffer all the renames into a vector of src/dst pairs, in order. + - when the user calls finalize_renames(): + - build a temporary vector of idents with the same length as the buffer + - walk the vector from end to beginning, detaching each node from + the NEW tree and saving its ident in the temporary vector + - walk the vector from beginning to end, attaching each node to + the NEW tree, using the saved idents - we know a join is essentially a "delete with a forwarding address"; - so like a delete it has to take effect *before* anything else in a - cset. which means it is written in the pre-space of the cset. the - problem is that the name it refers to is clearly a name in the - post-space of the delta: the name it's colliding with! - - perhaps "join" is not the right word. perhaps we want something more - like "doom foo/bar; rename foo/bar foo/baz" in the composite cset: an - acknowledgment that the node named by foo/bar at the *beginning* of - the cset is doomed, even if it gets moved around thereafter: the - proviso of a doomed node eixsting in a revision is that there's a - non-doomed node occupying the same node. a doomed node is not yet - dead, but it adheres to some other node until that other node dies. - - subsequently if you doom the node foo/baz (which requires some other - incoming node in a merge, using that name) then the composite cset - is "doom foo/bar; doom foo/baz; rename foo/bar foo/baz" and you have - *two* doomed nodes in the doomed set of the third node. this should - continue to work until such time as the live node carrying the doomed - ones actually dies. - - actually that reads terribly. it would be better to have "delete_foo" - have a secondary clause, "with_heir", where the heir is a name in the - post-space of the cset. this way the heir can get moved around; and - the natural inverse is "add_foo" ... "with_sire", where the sire is - in the pre-space of the cset. that feels more likely to work. - */ #define __STDC_CONSTANT_MACROS +#define __STDC_LIMIT_MACROS #include #include #include +#include #include #include #include @@ -160,22 +177,26 @@ #include #include +#include #include #include #include "basic_io.hh" +#include "numeric_vocab.hh" #include "sanity.hh" #include "vocab.hh" using std::deque; using std::map; using std::pair; +using std::set; using std::stack; using std::string; using std::vector; using std::inserter; using std::copy; using std::make_pair; +using boost::scoped_ptr; using boost::shared_ptr; using boost::dynamic_pointer_cast; using __gnu_cxx::hash_map; @@ -189,31 +210,50 @@ struct dirent_hash; struct dirent_eq; -typedef uint32_t ident_t; +namespace __gnu_cxx +{ + template<> + struct hash + { + size_t + operator()(u64 __x) const + { return static_cast(__x); } + }; +} + +typedef uint64_t ident_t; +typedef uint64_t gen_t; typedef shared_ptr node_t; typedef shared_ptr dir_t; typedef shared_ptr file_t; typedef shared_ptr dirent_t; // dirmap *must* be a sorted map, do not change to a hashed map -typedef map dirmap_t; +typedef map dirmap_t; typedef hash_set dirset_t; typedef string attr_name; typedef string attr_val; +static ident_t +null_ident = 0; + static ident_t -nursery_ident = 0; +nursery_ident = 1; static ident_t -graveyard_ident = 1; +graveyard_ident = 2; struct ident_source { ident_t ctr; ident_source() : ctr(graveyard_ident + 1) {} - ident_t next() { I(ctr != UINT32_C(0xffffffff)); return ctr++; } + ident_t next() { I(ctr != UINT64_MAX); return ctr++; } }; +//================================================================== +// dirents and paths +//================================================================== + // directory entries are globally interned components // of a file_path @@ -237,18 +277,17 @@ } }; +dirent_t +null_dirent(new dir_entry("")); - -struct -dirent_t_cmp +bool +operator<(dirent_t const & a, + dirent_t const & b) { - bool operator()(dirent_t const & a, - dirent_t const & b) const - { - return *a < *b; - } -}; + return *a < *b; +} + struct dirent_hash { @@ -320,6 +359,10 @@ }; +//================================================================== +// nodes +//================================================================== + // static bucket of shared pointers dirset_t path_vec_t::dset; @@ -327,18 +370,123 @@ struct node { + gen_t generation; ident_t ident; ident_t parent; dirent_t name; - map attributes; - bool unborn() const { return parent == nursery_ident; } - bool live() const { return ! (unborn() || killed()); } - bool killed() const { return parent == graveyard_ident; } + // very few nodes have any attributes, heirs, or sires. a node with + // any of these is "fancy", and we keep fancy parts in a secondary + // struture, under a scoped pointer, to save memory. it only gets a + // value if someone assigns to one of the fancy parts. + + struct fancy_part + { + ident_t sire; + ident_t heir; + map attrs; + }; - node() : name(new dir_entry("")) {} - virtual void flush_clobbered() = 0; - virtual node_t clone() const = 0; + scoped_ptr fancy; + void ensure_fancy() + { + if (!fancy) + { + fancy.reset(new fancy_part()); + fancy->sire = null_ident; + fancy->heir = null_ident; + } + } + + bool has_sire() const + { + return fancy && (fancy->sire != null_ident); + } + + bool has_heir() const + { + return fancy && (fancy->heir != null_ident); + } + + ident_t get_sire() const + { + I(fancy); + return fancy->sire; + } + + ident_t get_heir() const + { + I(fancy); + return fancy->heir; + } + + void set_sire(ident_t s) + { + ensure_fancy(); + fancy->sire = s; + } + + void set_heir(ident_t h) + { + ensure_fancy(); + fancy->heir = h; + } + + bool has_attrs() + { + return fancy && !fancy->attrs.empty(); + } + + bool has_attr(attr_name const & name) + { + return fancy && (fancy->attrs.find(name) != + fancy->attrs.end()); + } + + attr_val const & get_attr(attr_name const & name) + { + I(fancy); + map::const_iterator i = fancy->attrs.find(name); + I(i != fancy->attrs.end()); + return i->second; + } + + void set_attr(attr_name const & name, + attr_val const & val) + { + ensure_fancy(); + fancy->attrs[name] = val; + } + + void clear_attr(attr_name const & name) + { + ensure_fancy(); + fancy->attrs.erase(name); + } + + bool unborn() const + { + return parent == nursery_ident; + } + + bool live() const + { + return ! (unborn() || killed()); + } + + bool killed() const + { + return parent == graveyard_ident; + } + + node() + : generation(0), + ident(null_ident), + parent(null_ident), + name(null_dirent) + {} + + virtual node_t shallow_copy() const = 0; virtual ~node() {} }; @@ -348,21 +496,23 @@ { file_id content; - virtual void flush_clobbered() {} - virtual node_t clone() const; + virtual node_t shallow_copy() const; virtual ~file_node() {} }; node_t -file_node::clone() const +file_node::shallow_copy() const { file_t f = file_t(new file_node()); + f->generation = 0; f->ident = ident; f->parent = parent; f->name = name; + if (fancy) + f->fancy.reset(new fancy_part(*fancy)); + f->content = content; - copy(attributes.begin(), attributes.end(), - inserter(f->attributes, f->attributes.begin())); + return f; } @@ -372,77 +522,22 @@ : public node { dirmap_t entries; - dirmap_t clobbered_entries; // informative bool contains_entry(dirent_t p) const; - bool contains_clobbered_entry(dirent_t p) const; - bool contains_dir(dirent_t p) const; - bool contains_file(dirent_t p) const; // imperative (fault on lookup failures) node_t get_entry(dirent_t p) const; - dir_t get_dir_entry(dirent_t p) const; - file_t get_file_entry(dirent_t p) const; + void add_child(dirent_t name, node_t n); + void drop_child(dirent_t c); - node_t get_clobbered_entry(dirent_t p) const; - dir_t get_clobbered_dir_entry(dirent_t p) const; - file_t get_clobbered_file_entry(dirent_t p) const; - - void add_child(dirent_t name, node_t n, bool clobber = false); - void drop_dir(dirent_t c, bool clobbered_ok = false); - void drop_file(dirent_t c, bool clobbered_ok = false); - - virtual void flush_clobbered(); - virtual node_t clone() const; + virtual node_t shallow_copy() const; virtual ~dir_node() {} }; - void -dir_node::flush_clobbered() +dir_node::add_child(dirent_t name, node_t n) { - dirmap_t::const_iterator - i = clobbered_entries.begin(), - j = clobbered_entries.end(); - while(i != j) - { - i->second->flush_clobbered(); - } - clobbered_entries.clear(); -} - -void -dir_node::add_child(dirent_t name, node_t n, bool clobber) -{ - // this is a bit odd: it is possible that, while processing renames - // (only), we have actually clobbered a directory entry name in the - // destination mfest of a cset. for example if we have "rename_file - // A/X B/Y", where the file in question has ident T. we can find the - // current parent P of T in the second mfest, but we can't tell what - // name to drop from P in order to get T properly detached and moved - // over to its new home in Q=B. - // - // if we tell P to drop X, we risk dropping an entry which is not T; - // it is possible some other entry has already clobbered that name - // in P. so when an entry is added with clobbering turned on (i.e. when - // we are performing a rename) the new entry moves any old entry to - // the clobber map, where future drops (with clobbers accepted) will - // short-circuit on it. - // - // note that in general (in particular, with respect to resolving - // the parent directory of the target of an add or rename) there is - // no risk of clobbers because we emit renames in topological order - // in the target space: by the time we are processing renames - // landing on foo/bar/baz, we have already resolved which node - // "owns" the name bar in foo. - - if (clobber && contains_entry(name)) - { - I(!contains_clobbered_entry(name)); - clobbered_entries.insert(make_pair(name,get_entry(name))); - entries.erase(name); - } I(!contains_entry(name)); L(F("parent %d adding child %d = '%s'\n") % ident % n->ident % name->val); n->name = name; @@ -451,37 +546,12 @@ } void -dir_node::drop_dir(dirent_t c, bool clobbered_ok) +dir_node::drop_child(dirent_t c) { - if (clobbered_ok && contains_clobbered_entry(c)) - { - dir_t tmp = get_clobbered_dir_entry(c); - clobbered_entries.erase(c); - } - else - { - dir_t tmp = get_dir_entry(c); - entries.erase(c); - } + I(contains_entry(c)); + entries.erase(c); } -void -dir_node::drop_file(dirent_t c, bool clobbered_ok) -{ - if (clobbered_ok && contains_clobbered_entry(c)) - { - file_t tmp = get_clobbered_file_entry(c); - clobbered_entries.erase(c); - } - else - { - file_t tmp = get_file_entry(c); - entries.erase(c); - } -} - - - static inline bool is_dir_t(node_t n) { @@ -512,126 +582,170 @@ return f; } -static bool -equal_nodes(node_t const & n1, - node_t const & n2, - bool compare_children = false) +struct +dfs_iter { - deque q1; - deque q2; + stack< pair > stk; - q1.push_back(n1); - q2.push_back(n2); + dfs_iter(dir_t root) + { + stk.push(make_pair(root, root->entries.begin())); + } - while(!q1.empty()) - { + bool finished() + { + return stk.empty(); + } - if (q2.empty()) - return false; + dir_t cwd() + { + return stk.top().first; + } - node_t a = q1.front(); - node_t b = q2.front(); - q1.pop_front(); - q2.pop_front(); + node_t operator*() + { + return stk.top().second->second; + } - if (!(a->ident == b->ident)) - return false; + void operator++() + { + if (stk.empty()) + return; - if (!(a->parent == b->parent)) - return false; - if (! (*(a->name) == *(b->name))) - return false; + node_t ntmp = stk.top().second->second; + if (is_dir_t(ntmp)) + { + dir_t dtmp = downcast_to_dir_t(ntmp); + stk.push(make_pair(dtmp, dtmp->entries.begin())); + } + else + ++(stk.top().second); - if (!(a->attributes == b->attributes)) - return false; + while (!stk.empty() + && stk.top().second == stk.top().first->entries.end()) + { + stk.pop(); + if (!stk.empty()) + ++stk.top().second; + } + } +}; - if (is_file_t(a)) - { - if (! is_file_t(b)) - return false; - file_t af = downcast_to_file_t(a); - file_t bf = downcast_to_file_t(b); +struct +bfs_iter +{ + deque q; + bfs_iter(node_t root) + { + if (is_dir_t(root)) + L(F("bfs_iter starting with node %d (%d children)\n") + % root->ident % downcast_to_dir_t(root)->entries.size()); + else + L(F("bfs_iter starting with node %d\n") + % root->ident); + q.push_back(root); + } - if (!(af->content == bf->content)) - return false; - } - else - { - I(is_dir_t(a)); + bool finished() + { + return q.empty(); + } - if (! is_dir_t(b)) - return false; + node_t operator*() + { + I(!q.empty()); + return q.front(); + } - dir_t ad = downcast_to_dir_t(a); - dir_t bd = downcast_to_dir_t(b); + void operator++() + { + if (q.empty()) + return; - if (ad->entries.size() != bd->entries.size()) - return false; + if (is_dir_t(q.front())) + { + dir_t tmp = downcast_to_dir_t(q.front()); + for (dirmap_t::const_iterator i = tmp->entries.begin(); + i != tmp->entries.end(); ++i) + { + L(F("bfs_iter adding node %d, child fo %d\n") + % i->second->ident % tmp->ident); + q.push_back(i->second); + } + } + q.pop_front(); + } +}; - dirmap_t::const_iterator i = ad->entries.begin(); - dirmap_t::const_iterator j = bd->entries.begin(); - while (i != ad->entries.end()) - { - I(j != bd->entries.end()); +static bool +shallow_equal(node_t const & a, + node_t const & b) +{ + if ((a->ident != b->ident) + || (a->parent != b->parent) + || (a->name != b->name)) + return false; - if (! (*(i->first) == *(j->first))) - return false; + if (a->fancy || b->fancy) + { + if (!(b->fancy && b->fancy)) + return false; + if ((a->fancy->sire != b->fancy->sire) + || (a->fancy->heir != b->fancy->heir) + || (a->fancy->attrs != b->fancy->attrs)) + return false; + } - if (compare_children) - { - q1.push_back(i->second); - q2.push_back(j->second); - } - else - { - if (i->second->ident != i->second->ident) - return false; - } - ++i; - ++j; - } - } + if (is_file_t(a) && is_file_t(b)) + { + file_t fa = downcast_to_file_t(a); + file_t fb = downcast_to_file_t(b); + if (! (fa->content == fb->content)) + return false; + return true; } - return true; -} -bool -dir_node::contains_entry(dirent_t p) const -{ - map::const_iterator i = entries.find(p); - if (i == entries.end()) + else if (is_dir_t(a) && is_dir_t(b)) + { + dir_t da = downcast_to_dir_t(a); + dir_t db = downcast_to_dir_t(b); + if (da->entries != db->entries) + return false; + return true; + } + else return false; - return true; } -bool -dir_node::contains_clobbered_entry(dirent_t p) const +static bool +deep_equal(node_t const & a, + node_t const & b) { - map::const_iterator i = clobbered_entries.find(p); - if (i == clobbered_entries.end()) - return false; - return true; -} + bfs_iter pa(a), pb(b); + while (!(pa.finished() || pb.finished())) + { + if (!shallow_equal(*pa, *pb)) + return false; + ++pa; + ++pb; + } -bool -dir_node::contains_dir(dirent_t p) const -{ - dirmap_t::const_iterator i = entries.find(p); - if (i == entries.end()) + if (! (pa.finished() && pb.finished())) return false; - return is_dir_t(i->second); + + return true; } bool -dir_node::contains_file(dirent_t p) const +dir_node::contains_entry(dirent_t p) const { - dirmap_t::const_iterator i = entries.find(p); + map::const_iterator i = entries.find(p); if (i == entries.end()) return false; - return is_file_t(i->second); + return true; } node_t @@ -643,86 +757,67 @@ return i->second; } -dir_t -dir_node::get_dir_entry(dirent_t p) const -{ - return downcast_to_dir_t(get_entry(p)); -} - -file_t -dir_node::get_file_entry(dirent_t p) const -{ - return downcast_to_file_t(get_entry(p)); -} - node_t -dir_node::get_clobbered_entry(dirent_t p) const +dir_node::shallow_copy() const { - dirmap_t::const_iterator i = clobbered_entries.find(p); - I(i != clobbered_entries.end()); - return i->second; -} + dir_t d = dir_t(new dir_node()); + d->generation = 0; + d->ident = ident; + d->parent = parent; + if (fancy) + d->fancy.reset(new fancy_part(*fancy)); -dir_t -dir_node::get_clobbered_dir_entry(dirent_t p) const -{ - return downcast_to_dir_t(get_clobbered_entry(p)); -} + d->entries = entries; -file_t -dir_node::get_clobbered_file_entry(dirent_t p) const -{ - return downcast_to_file_t(get_clobbered_entry(p)); + return d; } -node_t -dir_node::clone() const +static node_t +deep_copy(node_t n) { - dir_t d = dir_t(new dir_node()); - d->ident = ident; - d->parent = parent; - d->name = name; - copy(attributes.begin(), attributes.end(), - inserter(d->attributes, d->attributes.begin())); - for (map::const_iterator i = entries.begin(); - i != entries.end(); ++i) + if (is_file_t(n)) { - d->entries.insert(make_pair(i->first, i->second->clone())); + return n->shallow_copy(); } - for (map::const_iterator i = clobbered_entries.begin(); - i != clobbered_entries.end(); ++i) + else { - d->clobbered_entries.insert(make_pair(i->first, i->second->clone())); + I(is_dir_t(n)); + deque work; + dir_t new_root = downcast_to_dir_t(n->shallow_copy()); + work.push_back(new_root); + + while (!work.empty()) + { + dir_t curr = work.front(); + work.pop_front(); + dirmap_t new_dirmap; + for (dirmap_t::const_iterator i = curr->entries.begin(); + i != curr->entries.end(); ++i) + { + node_t tmp = i->second->shallow_copy(); + new_dirmap.insert(make_pair(i->first, tmp)); + if (is_dir_t(tmp)) + work.push_back(downcast_to_dir_t(tmp)); + } + curr->entries = new_dirmap; + } + return new_root; } - return d; } + +//================================================================== +// mfests +//================================================================== + typedef hash_map node_map_t; static void -insert_into_map(dir_t d, - node_map_t & nodes) +index_nodes(dir_t d, node_map_t & nodes) { - deque work; - work.push_back(d); - while (!work.empty()) - { - node_t n = work.front(); - work.pop_front(); - - nodes.insert(make_pair(n->ident, n)); - - if (is_dir_t(n)) - { - dir_t tmp = downcast_to_dir_t(n); - for (dirmap_t::const_iterator i = tmp->entries.begin(); - i != tmp->entries.end(); ++i) - { - work.push_back(i->second); - } - } - } + for (bfs_iter i(d); !i.finished(); ++i) + nodes.insert(make_pair((*i)->ident, *i)); } struct @@ -737,12 +832,13 @@ mfest() : max_ident(graveyard_ident+1), - root(make_dir()) + root(new dir_node()) { - insert_into_map(root, nodes); + index_nodes(root, nodes); } mfest(mfest const & other); + void reset(mfest const & other); void check_sane() const; // informative @@ -759,18 +855,37 @@ dir_t get_dir_node(path_vec_t const & d) const; file_t get_file_node(path_vec_t const & f) const; + node_t get_node(ident_t i, gen_t write_generation); + dir_t get_dir_node(ident_t i, gen_t write_generation); + file_t get_file_node(ident_t i, gen_t write_generation); + + node_t get_node(path_vec_t const & n, gen_t write_generation); + dir_t get_dir_node(path_vec_t const & d, gen_t write_generation); + file_t get_file_node(path_vec_t const & f, gen_t write_generation); + dir_t get_containing_dir_node(path_vec_t const & d) const; + dir_t get_containing_dir_node(path_vec_t const & d, + gen_t write_generation); + bool operator==(mfest const & other) const; }; -mfest::mfest(mfest const & other) : - max_ident(other.max_ident), - root(downcast_to_dir_t(other.root->clone())) + +mfest::mfest(mfest const & other) { - insert_into_map(root, nodes); + this->reset(other); } +void +mfest::reset(mfest const & other) +{ + nodes.clear(); + root = other.root; + max_ident = other.max_ident; + index_nodes(root, nodes); +} + dir_t mfest::make_dir() { @@ -795,35 +910,20 @@ mfest::check_sane() const { - deque work; hash_set seen; - work.push_back(root->ident); - + // first go from the top of the tree to the bottom checking each // directory for absence of cycle-forming edges and agreement // between names. - while (!work.empty()) + L(F("mfest sanity check beginning...\n")); + for(bfs_iter i(root); !i.finished(); ++i) { - ident_t tmp = work.front(); - work.pop_front(); - - I(seen.find(tmp) == seen.end()); - seen.insert(tmp); - - node_t tnode = get_node(tmp); - if (is_dir_t(tnode)) - { - dir_t tdir = downcast_to_dir_t(tnode); - for (dirmap_t::const_iterator i = tdir->entries.begin(); - i != tdir->entries.end(); ++i) - { - node_t child = i->second; - I(i->first == child->name); - I(child->parent == tdir->ident); - work.push_back(i->second->ident); - } - } + path_vec_t v; + get_path(*i, v); + L(F("tree iter visiting %s\n") % v.to_file_path()); + I(seen.find((*i)->ident) == seen.end()); + seen.insert((*i)->ident); } // now go through the node map and make sure that we both found @@ -833,13 +933,16 @@ for (node_map_t::const_iterator i = nodes.begin(); i != nodes.end(); ++i) { - if (i->second->parent != nursery_ident - && i->second->parent != graveyard_ident) + path_vec_t v; + get_path(i->second, v); + L(F("set iter visiting %s\n") % v.to_file_path()); + if (i->second->live()) { I(seen.find(i->first) != seen.end()); } I(i->first == i->second->ident); } + L(F("mfest sanity check done")); } bool @@ -850,12 +953,18 @@ vector::const_iterator i = v.dir.begin(), j = v.dir.end(); while(i != j) { - if (!d->contains_dir(*i)) + if (!d->contains_entry(*i)) return false; + + node_t n = d->get_entry(*i); - d = d->get_dir_entry(*i++); + if (!is_dir_t(n)) + return false; + + d = downcast_to_dir_t(n); } - return d->contains_file(v.leaf); + return d->contains_entry(v.leaf) + && is_file_t(d->get_entry(v.leaf)); } bool @@ -866,12 +975,18 @@ vector::const_iterator i = v.dir.begin(), j = v.dir.end(); while(i != j) { - if (!d->contains_dir(*i)) + if (!d->contains_entry(*i)) return false; + + node_t n = d->get_entry(*i); - d = d->get_dir_entry(*i++); + if (!is_dir_t(n)) + return false; + + d = downcast_to_dir_t(n); } - return d->contains_dir(v.leaf); + return d->contains_entry(v.leaf) + && is_dir_t(d->get_entry(v.leaf)); } node_t @@ -911,10 +1026,11 @@ mfest::get_containing_dir_node(path_vec_t const & v) const { dir_t d = root; - vector::const_iterator i = v.dir.begin(), j = v.dir.end(); - while(i != j) + + for (vector::const_iterator i = v.dir.begin(); + i != v.dir.end(); ++i) { - d = d->get_dir_entry(*i++); + d = downcast_to_dir_t(d->get_entry(*i)); } return d; } @@ -927,7 +1043,7 @@ void mfest::get_path(node_t const & n, - path_vec_t & path) const + path_vec_t & path) const { path.dir.clear(); path.leaf = n->name; @@ -941,13 +1057,105 @@ reverse(path.dir.begin(), path.dir.end()); } + +// these versions implement copy-on-write, updating the +// tree to point to nodes meeting write_generation + +static void +ensure_node_meets_generation(mfest & m, + dir_t & d, + node_t & n, + gen_t write_generation) +{ + if (n->generation < write_generation) + { + n = n->shallow_copy(); + n->generation = write_generation; + + m.nodes.erase(n->ident); + m.nodes.insert(make_pair(n->ident, n)); + + d->entries.erase(n->name); + d->entries.insert(make_pair(n->name, n)); + } +} + +dir_t +mfest::get_containing_dir_node(path_vec_t const & v, + gen_t write_generation) +{ + if (root->generation < write_generation) + { + L(F("upgrading root to write generation %d\n") % write_generation); + root = downcast_to_dir_t(root->shallow_copy()); + root->generation = write_generation; + nodes.erase(root->ident); + nodes.insert(make_pair(root->ident, root)); + } + + dir_t d = root; + for (vector::const_iterator i = v.dir.begin(); + i != v.dir.end(); ++i) + { + node_t n = d->get_entry(*i); + ensure_node_meets_generation(*this, d, n, write_generation); + d = downcast_to_dir_t(n); + } + return d; +} + +node_t +mfest::get_node(path_vec_t const & pth, + gen_t write_generation) +{ + dir_t d = get_containing_dir_node(pth, write_generation); + node_t n = d->get_entry(pth.leaf); + ensure_node_meets_generation(*this, d, n, write_generation); + return n; +} + +dir_t +mfest::get_dir_node(path_vec_t const & pth, + gen_t write_generation) +{ + return downcast_to_dir_t(get_node(pth, write_generation)); +} + +file_t +mfest::get_file_node(path_vec_t const & pth, + gen_t write_generation) +{ + return downcast_to_file_t(get_node(pth, write_generation)); +} + +node_t +mfest::get_node(ident_t i, gen_t write_generation) +{ + path_vec_t pth; + node_t n = get_node(i); + get_path(n, pth); + return get_node(pth, write_generation); +} + +dir_t +mfest::get_dir_node(ident_t i, gen_t write_generation) +{ + return downcast_to_dir_t(get_node(i, write_generation)); +} + +file_t +mfest::get_file_node(ident_t i, gen_t write_generation) +{ + return downcast_to_file_t(get_node(i, write_generation)); +} + bool mfest::operator==(mfest const & other) const { if (max_ident != other.max_ident) return false; - if (!equal_nodes(root, other.root, true)) + if (!deep_equal(root, other.root)) return false; for (node_map_t::const_iterator i = nodes.begin(); @@ -958,70 +1166,104 @@ if (j == other.nodes.end()) return false; - if (!equal_nodes(i->second, j->second, false)) + if (!shallow_equal(i->second, j->second)) return false; } return true; } +//================================================================== +// change_consumers +//================================================================== - struct change_consumer { - void delete_dir(file_path const & dp); - void delete_file(file_path const & fp); - void rename_dir(file_path const & src, file_path const & dst); - void rename_file(file_path const & src, file_path const & dst); + + virtual ~change_consumer() {} + + void set_heir(file_path const & dying, + file_path const & heir); + + void delete_node(file_path const & pth); + void rename_node(file_path const & src, + file_path const & dst); + void add_dir(file_path const & dp); void add_file(file_path const & fp); + + void set_sire(file_path const & newborn, + file_path const & sire); + void apply_delta(file_path const & path, file_id const & src, file_id const & dst); - virtual void delete_dir(path_vec_t const & dp) = 0; - virtual void delete_file(path_vec_t const & fp) = 0; - virtual void rename_dir(path_vec_t const & src, path_vec_t const & dst) = 0; - virtual void rename_file(path_vec_t const & src, path_vec_t const & dst) = 0; + void set_attr(file_path const & path, + attr_name const & name, + attr_val const & val); + + void clear_attr(file_path const & path, + attr_name const & name); + + virtual void finalize_renames() {} + + // this part is just a lower-level form of the API above, to avoid + // composing or parsing file_paths in their string-y form + + virtual void set_heir(path_vec_t const & dying, + path_vec_t const & heir) = 0; + + virtual void delete_node(path_vec_t const & path) = 0; + + virtual void rename_node(path_vec_t const & src, + path_vec_t const & dst) = 0; + virtual void add_dir(path_vec_t const & dp) = 0; virtual void add_file(path_vec_t const & fp) = 0; + + virtual void set_sire(path_vec_t const & newborn, + path_vec_t const & sire) = 0; + virtual void apply_delta(path_vec_t const & path, file_id const & src, file_id const & dst) = 0; + + virtual void set_attr(path_vec_t const & path, + attr_name const & name, + attr_val const & val) = 0; + + virtual void clear_attr(path_vec_t const & path, + attr_name const & name) = 0; }; void -change_consumer::delete_dir(file_path const & dp) +change_consumer::set_heir(file_path const & dying, + file_path const & heir) { - L(F("delete_dir('%s')\n") % dp); - this->delete_dir(path_vec_t(dp)); + L(F("set_heir('%s', '%s')\n") % dying % heir); + this->set_heir(path_vec_t(dying), + path_vec_t(heir)); } void -change_consumer::delete_file(file_path const & fp) +change_consumer::delete_node(file_path const & dp) { - L(F("delete_file('%s')\n") % fp); - this->delete_file(path_vec_t(fp)); + L(F("delete_node('%s')\n") % dp); + this->delete_node(path_vec_t(dp)); } void -change_consumer::rename_dir(file_path const & src, - file_path const & dst) +change_consumer::rename_node(file_path const & a, + file_path const & b) { - L(F("rename_dir('%s', '%s')\n") % src % dst); - this->rename_dir(path_vec_t(src), path_vec_t(dst)); + L(F("rename_node('%s', '%s')\n") % a % b); + this->rename_node(path_vec_t(a), + path_vec_t(b)); } void -change_consumer::rename_file(file_path const & src, - file_path const & dst) -{ - L(F("rename_file('%s', '%s')\n") % src % dst); - this->rename_file(path_vec_t(src), path_vec_t(dst)); -} - -void change_consumer::add_dir(file_path const & dp) { L(F("add_dir('%s')\n") % dp); @@ -1036,6 +1278,15 @@ } void +change_consumer::set_sire(file_path const & newborn, + file_path const & sire) +{ + L(F("set_sire('%s', '%s')\n") % newborn % sire); + this->set_sire(path_vec_t(newborn), + path_vec_t(sire)); +} + +void change_consumer::apply_delta(file_path const & path, file_id const & src, file_id const & dst) @@ -1044,21 +1295,106 @@ this->apply_delta(path_vec_t(path), src, dst); } +void +change_consumer::set_attr(file_path const & path, + attr_name const & name, + attr_val const & val) +{ + L(F("set_attr('%s', '%s', '%s')\n") % path % name % val); + this->set_attr(path_vec_t(path), name, val); +} +void +change_consumer::clear_attr(file_path const & path, + attr_name const & name) +{ + L(F("clear_attr('%s', '%s')\n") % path % name); + this->clear_attr(path_vec_t(path), name); +} + + +//================================================================== +// attach_detach_change_consumers +//================================================================== + struct +attach_detach_change_consumer + : public change_consumer +{ + virtual ~attach_detach_change_consumer() {} + + virtual ident_t detach(path_vec_t const & path) = 0; + virtual void attach(path_vec_t const & path, ident_t id) = 0; + + // renames are directed into a temporary buffer, which + // then executes them in two steps using the lower-level API + // "attach" and "detach". + + vector< pair > pending_renames; + + virtual void delete_node(path_vec_t const & src); + virtual void rename_node(path_vec_t const & src, + path_vec_t const & dst); + virtual void finalize_renames(); + +}; + +void +attach_detach_change_consumer::delete_node(path_vec_t const & path) +{ + this->detach(path); +} + +void +attach_detach_change_consumer::rename_node(path_vec_t const & src, + path_vec_t const & dst) +{ + pending_renames.push_back(make_pair(src, dst)); +} + +void +attach_detach_change_consumer::finalize_renames() +{ + vector > tmp; + + for (vector< pair >::reverse_iterator i = pending_renames.rbegin(); + i != pending_renames.rend(); ++i) + { + ident_t t = detach(i->first); + tmp.push_back(make_pair(i->second, t)); + } + + for (vector< pair >::const_iterator i = tmp.begin(); + i != tmp.end(); ++i) + { + attach(i->first, i->second); + } +} + + + +//================================================================== +// csets +//================================================================== + + +struct cset - : public change_consumer + : public attach_detach_change_consumer { mfest old_mfest; mfest new_mfest; + gen_t write_generation; - cset(mfest const & base) : - old_mfest(base), - new_mfest(base) - {} + cset() + { + write_generation = 1; + } - cset() - {} + cset(mfest const & base) + { + this->reset(base); + } virtual ~cset() {} @@ -1068,16 +1404,29 @@ void replay_changes(change_consumer & cc) const; void replay_inverse_changes(change_consumer & cc) const; - virtual void delete_dir(path_vec_t const & dp); - virtual void delete_file(path_vec_t const & fp); - virtual void rename_dir(path_vec_t const & src, path_vec_t const & dst); - virtual void rename_file(path_vec_t const & src, path_vec_t const & dst); + virtual void set_heir(path_vec_t const & dying, + path_vec_t const & heir); + + virtual ident_t detach(path_vec_t const & path); + virtual void attach(path_vec_t const & path, ident_t id); + virtual void add_dir(path_vec_t const & dp); virtual void add_file(path_vec_t const & fp); + + virtual void set_sire(path_vec_t const & newborn, + path_vec_t const & sire); + virtual void apply_delta(path_vec_t const & path, file_id const & src, file_id const & dst); + virtual void set_attr(path_vec_t const & path, + attr_name const & name, + attr_val const & val); + + virtual void clear_attr(path_vec_t const & path, + attr_name const & name); + bool operator==(cset const & other) const { if (! (old_mfest == other.old_mfest)) @@ -1089,12 +1438,24 @@ }; +static gen_t +find_max_write_generation(dir_t d) +{ + gen_t m = 0; + for (dfs_iter i(d); !i.finished(); ++i) + if ((*i)->generation > m) + m = (*i)->generation; + return m; +} void cset::reset(mfest const & m) { - old_mfest = m; - new_mfest = m; + old_mfest.reset(m); + new_mfest.reset(m); + write_generation = find_max_write_generation(m.root); + I(write_generation != UINT64_MAX); + ++write_generation; } @@ -1127,14 +1488,16 @@ check_mfests_agree(old_mfest, new_mfest); } + void -cset::delete_dir(path_vec_t const & dp) +cset::set_heir(path_vec_t const & dying, + path_vec_t const & heir) { - dir_t src = old_mfest.get_dir_node(dp); - dir_t dst = new_mfest.get_dir_node(src->ident); - dir_t parent_of_dst = new_mfest.get_dir_node(dst->parent); - parent_of_dst->drop_dir(dp.leaf); - dst->parent = graveyard_ident; + node_t n = new_mfest.get_node(dying, write_generation); + node_t h = new_mfest.get_node(heir); + n->ensure_fancy(); + n->fancy->heir = h->ident; + if (global_sanity.debug) { check_sane(); @@ -1142,297 +1505,566 @@ } void -cset::delete_file(path_vec_t const & fp) +cset::set_sire(path_vec_t const & newborn, + path_vec_t const & sire) { - file_t src = old_mfest.get_file_node(fp); - file_t dst = new_mfest.get_file_node(src->ident); - dir_t parent_of_dst = new_mfest.get_dir_node(dst->parent); - parent_of_dst->drop_file(fp.leaf); - dst->parent = graveyard_ident; + node_t n = new_mfest.get_node(newborn, write_generation); + node_t s = new_mfest.get_node(sire); + n->ensure_fancy(); + n->fancy->sire = s->ident; + if (global_sanity.debug) { check_sane(); } } -void -cset::rename_dir(path_vec_t const & s, - path_vec_t const & d) +ident_t +cset::detach(path_vec_t const & path) { - dir_t src = old_mfest.get_dir_node(s); - dir_t dst = new_mfest.get_dir_node(src->ident); - dir_t old_dst_parent = new_mfest.get_dir_node(dst->parent); - dir_t new_dst_parent = new_mfest.get_containing_dir_node(d); - old_dst_parent->drop_dir(dst->name, true); - new_dst_parent->add_child(d.leaf, dst, true); + node_t src = old_mfest.get_node(path); + node_t dst = new_mfest.get_node(src->ident, write_generation); + dir_t parent_of_dst = new_mfest.get_dir_node(dst->parent, write_generation); + + parent_of_dst->drop_child(dst->name); + dst->parent = graveyard_ident; + if (global_sanity.debug) { check_sane(); } + + return dst->ident; } void -cset::rename_file(path_vec_t const & s, - path_vec_t const & d) +cset::attach(path_vec_t const & path, ident_t id) { - file_t src = old_mfest.get_file_node(s); - file_t dst = new_mfest.get_file_node(src->ident); - dir_t old_dst_parent = new_mfest.get_dir_node(dst->parent); - dir_t new_dst_parent = new_mfest.get_containing_dir_node(d); - old_dst_parent->drop_file(dst->name, true); - new_dst_parent->add_child(d.leaf, dst, true); + node_t n = new_mfest.get_node(id); + dir_t d = new_mfest.get_containing_dir_node(path, write_generation); + d->add_child(path.leaf, n); + if (global_sanity.debug) { check_sane(); } } -void cset::add_dir(path_vec_t const & dp) +void +cset::add_dir(path_vec_t const & dp) { - dir_t new_dst_parent = new_mfest.get_containing_dir_node(dp); + dir_t new_dst_parent = new_mfest.get_containing_dir_node(dp, write_generation); dir_t new_dir = old_mfest.make_dir(); - dir_t new_clone = downcast_to_dir_t(new_dir->clone()); - new_dst_parent->add_child(dp.leaf, new_clone); - new_mfest.nodes.insert(make_pair(new_dir->ident, new_clone)); + node_t new_dir_in_new_mfest = new_dir->shallow_copy(); + + new_dst_parent->add_child(dp.leaf, new_dir_in_new_mfest); + new_mfest.nodes.insert(make_pair(new_dir->ident, new_dir_in_new_mfest)); + if (global_sanity.debug) { check_sane(); } } -void cset::add_file(path_vec_t const & fp) +void +cset::add_file(path_vec_t const & fp) { - dir_t new_dst_parent = new_mfest.get_containing_dir_node(fp); + dir_t new_dst_parent = new_mfest.get_containing_dir_node(fp, write_generation); file_t new_file = old_mfest.make_file(); - file_t new_clone = downcast_to_file_t(new_file->clone()); - new_dst_parent->add_child(fp.leaf, new_clone); - new_mfest.nodes.insert(make_pair(new_file->ident, new_clone)); + node_t new_file_in_new_mfest = new_file->shallow_copy(); + + new_dst_parent->add_child(fp.leaf, new_file_in_new_mfest); + new_mfest.nodes.insert(make_pair(new_file->ident, new_file_in_new_mfest)); + if (global_sanity.debug) { check_sane(); } } -void cset::apply_delta(path_vec_t const & path, - file_id const & s, - file_id const & d) +void +cset::apply_delta(path_vec_t const & path, + file_id const & s, + file_id const & d) { - file_t dst = new_mfest.get_file_node(path); + file_t dst = new_mfest.get_file_node(path, write_generation); file_t src = old_mfest.get_file_node(dst->ident); - src->content = s; + + I(src->content == s); dst->content = d; + if (global_sanity.debug) { check_sane(); } } -static void -play_changes(change_consumer & cc, - mfest const & a, - mfest const & b); - void -cset::replay_changes(change_consumer & cc) const +cset::set_attr(path_vec_t const & path, + attr_name const & name, + attr_val const & val) { - check_sane(); - play_changes(cc, old_mfest, new_mfest); + node_t n = new_mfest.get_node(path, write_generation); + n->set_attr(name, val); } void -cset::replay_inverse_changes(change_consumer & cc) const +cset::clear_attr(path_vec_t const & path, + string const & name) { - check_sane(); - play_changes(cc, new_mfest, old_mfest); + node_t n = new_mfest.get_node(path, write_generation); + n->clear_attr(name); } -enum -replay_mode_t - { - replay_delete_dir, - replay_delete_file, - replay_rename_dir, - replay_rename_file, - replay_add_dir, - replay_add_file, - replay_apply_delta - }; +typedef vector > node_pair_vec; +typedef vector > file_pair_vec; +typedef vector, + shared_ptr > > > +node_attr_name_vec; +struct replay_record +{ + node_pair_vec heirs_set; + node_pair_vec dirs_deleted; + node_pair_vec files_deleted; + node_pair_vec nodes_renamed; + node_pair_vec dirs_added; + node_pair_vec files_added; + node_pair_vec sires_set; + file_pair_vec deltas_applied; + node_attr_name_vec attrs_changed; +}; + static void -play_changes_from_dir(change_consumer & cc, - mfest const & src, - mfest const & dst, - replay_mode_t mode, - dir_t cwd) +play_back_replay_record(replay_record const & rr, + mfest const & src, + mfest const & dst, + change_consumer & cc) { + path_vec_t v1, v2; - typedef pair state; - stack stk; + for (node_pair_vec::const_iterator i = rr.heirs_set.begin(); + i != rr.heirs_set.end(); ++i) + { + src.get_path(i->first, v1); + dst.get_path(i->second, v2); + cc.set_heir(v1, v2); + } - stk.push(make_pair(cwd, cwd->entries.begin())); + for (node_pair_vec::const_iterator i = rr.files_deleted.begin(); + i != rr.files_deleted.end(); ++i) + { + src.get_path(i->first, v1); + cc.delete_node(v1); + } - while (! stk.empty()) + for (node_pair_vec::const_iterator i = rr.dirs_deleted.begin(); + i != rr.dirs_deleted.end(); ++i) { - bool pushed = false; + src.get_path(i->first, v1); + cc.delete_node(v1); + } - for (state & curr = stk.top(); - curr.second != curr.first->entries.end() && !pushed; - ++curr.second) - { + for (node_pair_vec::const_iterator i = rr.nodes_renamed.begin(); + i != rr.nodes_renamed.end(); ++i) + { + src.get_path(i->first, v1); + dst.get_path(i->second, v2); + cc.rename_node(v1, v2); + } - cwd = curr.first; - L(F("replaying %s = %d\n") % cwd->name->val % cwd->ident); - node_t self = curr.second->second, other; - path_vec_t v1, v2; + cc.finalize_renames(); - switch (mode) - { - case replay_delete_dir: - // cwd is a directory in src space - other = dst.get_node(self->ident); - if (is_dir_t(self) - && self->live() - && other->killed()) - { - I(is_dir_t(other)); - src.get_path(self, v1); - cc.delete_dir(v1); - } - break; + for (node_pair_vec::const_iterator i = rr.dirs_added.begin(); + i != rr.dirs_added.end(); ++i) + { + dst.get_path(i->second, v2); + cc.add_dir(v2); + } - case replay_delete_file: - // cwd is a directory in src space - other = dst.get_node(self->ident); - if (is_file_t(self) - && self->live() - && other->killed()) + for (node_pair_vec::const_iterator i = rr.files_added.begin(); + i != rr.files_added.end(); ++i) + { + dst.get_path(i->second, v2); + cc.add_file(v2); + } + + for (node_pair_vec::const_iterator i = rr.sires_set.begin(); + i != rr.sires_set.end(); ++i) + { + src.get_path(i->first, v1); + dst.get_path(i->second, v2); + cc.set_sire(v2, v1); + } + + for (file_pair_vec::const_iterator i = rr.deltas_applied.begin(); + i != rr.deltas_applied.end(); ++i) + { + dst.get_path(i->second, v2); + cc.apply_delta(v2, + i->first->content, + i->second->content); + } + + // attr pass #1: emit all the cleared attrs + for (node_attr_name_vec::const_iterator i = rr.attrs_changed.begin(); + i != rr.attrs_changed.end(); ++i) + { + node_t a = i->first.first; + node_t b = i->first.second; + shared_ptr< set > attr_names = i->second; + for (set::const_iterator j = attr_names->begin(); + j != attr_names->end(); ++j) + { + if (a->has_attr(*j) && !b->has_attr(*j)) + { + dst.get_path(b, v2); + cc.clear_attr(v2, *j); + } + } + } + + // attr pass #2: emit all the set attrs + for (node_attr_name_vec::const_iterator i = rr.attrs_changed.begin(); + i != rr.attrs_changed.end(); ++i) + { + node_t a = i->first.first; + node_t b = i->first.second; + shared_ptr< set > attr_names = i->second; + for (set::const_iterator j = attr_names->begin(); + j != attr_names->end(); ++j) + { + if (b->has_attr(*j)) + { + if ((!a->has_attr(*j)) + || (a->has_attr(*j) && + a->get_attr(*j) != b->get_attr(*j))) { - I(is_file_t(other)); - src.get_path(self, v1); - cc.delete_file(v1); + dst.get_path(b, v2); + cc.set_attr(v2, *j, b->get_attr(*j)); } - break; + } + } + } +} - case replay_rename_dir: - // cwd is a directory in dst space - other = src.get_node(self->ident); - if (is_dir_t(self) - && self->live() - && other->live() - && ((other->name->val != self->name->val) - || (other->parent != self->parent))) - { - I(is_dir_t(other)); - src.get_path(other, v1); - dst.get_path(self, v2); - cc.rename_dir(v1, v2); - } - break; - case replay_rename_file: - // cwd is a directory in dst space - other = src.get_node(self->ident); - if (is_file_t(self) - && self->live() - && other->live() - && ((other->name->val != self->name->val) - || (other->parent != self->parent))) - { - I(is_file_t(other)); - src.get_path(other, v1); - dst.get_path(self, v2); - cc.rename_file(v1, v2); - } - break; +static void +play_back_replay_record_inverse(replay_record const & rr, + mfest const & src, + mfest const & dst, + change_consumer & cc) +{ + path_vec_t v1, v2; - case replay_add_dir: - // cwd is a directory in dst space - other = src.get_node(self->ident); - if (is_dir_t(self) - && other->unborn() - && self->live()) - { - I(is_dir_t(other)); - dst.get_path(self, v2); - cc.add_dir(v2); - } - break; + for (node_pair_vec::const_iterator i = rr.sires_set.begin(); + i != rr.sires_set.end(); ++i) + { + // the forward cset goes from src->dst, and had + // set_sire(newborn, sire) with newborn in dst and sire in src + // + // the inverse cset goes from src<-dst, and has + // set_heir(dying, heir) with dying in dst and heir in src - case replay_add_file: - // cwd is a directory in dst space - other = src.get_node(self->ident); - if (is_file_t(self) - && other->unborn() - && self->live()) + src.get_path(i->first, v1); + dst.get_path(i->second, v2); + cc.set_heir(v2, v1); + } + + for (node_pair_vec::const_iterator i = rr.files_added.begin(); + i != rr.files_added.end(); ++i) + { + // the forward cset goes from src->dst, and had + // add_file(newborn) with newborn in dst + // + // the inverse cset goes from src<-dst, and has + // delete_node(dying) with dying in dst + + dst.get_path(i->second, v2); + cc.delete_node(v2); + } + + for (node_pair_vec::const_iterator i = rr.dirs_added.begin(); + i != rr.dirs_added.end(); ++i) + { + // the forward cset goes from src->dst, and had + // add_dir(newborn) with newborn in dst + // + // the inverse cset goes from src<-dst, and has + // delete_node(dying) with dying in dst + + dst.get_path(i->second, v2); + cc.delete_node(v2); + } + + for (node_pair_vec::const_iterator i = rr.nodes_renamed.begin(); + i != rr.nodes_renamed.end(); ++i) + { + // the forward cset goes from src->dst, and had + // rename_node(a, b) with a in src in b in dst + // + // the inverse cset goes from src<-dst, and has + // rename_node(b, a) with a in src in b in dst + + src.get_path(i->first, v1); + dst.get_path(i->second, v2); + cc.rename_node(v2, v1); + } + + cc.finalize_renames(); + + + for (node_pair_vec::const_iterator i = rr.dirs_deleted.begin(); + i != rr.dirs_deleted.end(); ++i) + { + // the forward cset goes from src->dst, and had + // delete_node(dying) with dying in src + // + // the inverse cset goes from src<-dst, and has + // add_dir(newborn) with newborn in src + + dst.get_path(i->first, v1); + cc.add_dir(v1); + } + + for (node_pair_vec::const_iterator i = rr.files_deleted.begin(); + i != rr.files_deleted.end(); ++i) + { + // the forward cset goes from src->dst, and had + // delete_node(dying) with dying in src + // + // the inverse cset goes from src<-dst, and has + // add_file(newborn) with newborn in src + + dst.get_path(i->first, v1); + cc.add_file(v1); + } + + for (node_pair_vec::const_iterator i = rr.heirs_set.begin(); + i != rr.heirs_set.end(); ++i) + { + // the forward cset goes from src->dst, and had + // set_heir(dying, heir) with dying in src and heir in dst + // + // the inverse cset goes from src<-dst, and has + // set_sire(newborn, sire) with newborn in src and sire in dst + + src.get_path(i->first, v1); + dst.get_path(i->second, v2); + cc.set_sire(v1, v2); + } + + for (file_pair_vec::const_iterator i = rr.deltas_applied.begin(); + i != rr.deltas_applied.end(); ++i) + { + // the forward cset goes from src->dst, and had + // apply_delta(pth, a, b) with a in src, and pth and b in dst + // + // the inverse cset goes from src<-dst, and has + // apply_delta(pth, a, b) with a in dst, and pth and b in src + + src.get_path(i->first, v1); + cc.apply_delta(v1, + i->second->content, + i->first->content); + } + + + // attr pass #1: emit all the cleared attrs + for (node_attr_name_vec::const_iterator i = rr.attrs_changed.begin(); + i != rr.attrs_changed.end(); ++i) + { + // the forward cset goes from src->dst, nodes a->b, and had + // clear_attr(b, attr) with b in dst + // + // the inverse cset goes from src<-dst, nodes a<-b, and has + // clear_attr(a, attr) with a in src + + node_t a = i->first.first; + node_t b = i->first.second; + shared_ptr< set > attr_names = i->second; + for (set::const_iterator j = attr_names->begin(); + j != attr_names->end(); ++j) + { + if (b->has_attr(*j) && !a->has_attr(*j)) + { + dst.get_path(a, v1); + cc.clear_attr(v1, *j); + } + } + } + + // attr pass #2: emit all the set attrs + for (node_attr_name_vec::const_iterator i = rr.attrs_changed.begin(); + i != rr.attrs_changed.end(); ++i) + { + // the forward cset goes from src->dst, nodes a->b, and had + // set_attr(b, attr, val) with b in dst + // + // the inverse cset goes from src<-dst, nodes a<-b, and has + // set_attr(a, attr, val) with a in src + + node_t a = i->first.first; + node_t b = i->first.second; + shared_ptr< set > attr_names = i->second; + for (set::const_iterator j = attr_names->begin(); + j != attr_names->end(); ++j) + { + if (a->has_attr(*j)) + { + if ((!b->has_attr(*j)) + || (b->has_attr(*j) && + b->get_attr(*j) != a->get_attr(*j))) { - I(is_file_t(other)); - dst.get_path(self, v2); - cc.add_file(v2); + dst.get_path(a, v1); + cc.set_attr(v1, *j, a->get_attr(*j)); } - break; + } + } + } +} - case replay_apply_delta: - // cwd is a directory in dst space - other = src.get_node(self->ident); - if (is_file_t(self)) - { - file_t self_f = downcast_to_file_t(self); - file_t other_f = downcast_to_file_t(other); - dst.get_path(self, v2); - - if (other->unborn() - && self->live()) - { - // deltas accompanying additions - cc.apply_delta(v2, file_id(), self_f->content); - } - else if (self->live() - && other->live() - && !(self_f->content == other_f->content)) - { - // normal deltas - cc.apply_delta(v2, - other_f->content, - self_f->content); - } - } - break; +static void +build_replay_record(mfest const & src, + mfest const & dst, + replay_record & rr) +{ + // we do two passes accumulating nodes: the first pass accumulates + // nodes in the topological order they appear in the src map, the + // second accumulates nodes in the toplogical order they appear in + // the dst map. in both cases we append any interesting nodes to + // vectors which we then replay as blocks of related changes. + + for (dfs_iter i(src.root); !i.finished(); ++i) + { + // in this pass we accumulate heirs_set and nodes_deleted. + // the "self" node is a member of the "src" directory tree. + node_t self = *i; + node_t other = dst.get_node(self->ident); + + I(self->live()); + + if (other->killed()) + { + if (is_dir_t(self)) + rr.dirs_deleted.push_back(make_pair(self, other)); + else + rr.files_deleted.push_back(make_pair(self, other)); + + if (self->has_heir()) + { + node_t heir = dst.get_node(self->fancy->heir); + rr.heirs_set.push_back(make_pair(self, heir)); } + } + } + for (dfs_iter i(dst.root); !i.finished(); ++i) + { + // in this pass we accumulate nodes_renamed, files_added, + // dirs_added, sires_set, deltas_applied, attrs_cleared, + // and attrs_set. + // + // the "self" node is a member of the "dst" directory tree. + node_t self = *i; + + I(self->live()); + + node_t other = src.get_node(self->ident); + if (other->unborn()) + { if (is_dir_t(self)) { - dir_t dtmp = downcast_to_dir_t(self); - stk.push(make_pair(dtmp, dtmp->entries.begin())); - pushed = true; - L(F("pushed %s = %d\n") % self->name->val % self->ident); + I(is_dir_t(other)); + rr.dirs_added.push_back(make_pair(other, self)); } + else + { + I(is_file_t(self)); + I(is_file_t(other)); + rr.files_added.push_back(make_pair(other, self)); + } + if (self->has_sire()) + { + node_t sire = src.get_node(self->fancy->sire); + rr.sires_set.push_back(make_pair(self, sire)); + } + } + else if (other->live()) + { + if ((other->name->val != self->name->val) + || (other->parent != self->parent)) + { + rr.nodes_renamed.push_back(make_pair(other, self)); + } } + + // content deltas + if (is_file_t(self)) + { + I(is_file_t(other)); + file_t f_other = downcast_to_file_t(other); + file_t f_self = downcast_to_file_t(self); + if (!(f_other->content == f_self->content)) + { + rr.deltas_applied.push_back(make_pair(f_other, f_self)); + } + } - if (!pushed) - stk.pop(); - } -} + // attrs which were changed + if (other->has_attrs() || self->has_attrs()) + { + map self_attrs; + map other_attrs; -static void -play_changes(change_consumer & cc, - mfest const & a, - mfest const & b) -{ - play_changes_from_dir(cc, a, b, replay_delete_dir, a.root); - play_changes_from_dir(cc, a, b, replay_delete_file, a.root); + if (self->has_attrs()) + self_attrs = self->fancy->attrs; - play_changes_from_dir(cc, a, b, replay_rename_dir, b.root); - play_changes_from_dir(cc, a, b, replay_rename_file, b.root); + if (other->has_attrs()) + other_attrs = other->fancy->attrs; - play_changes_from_dir(cc, a, b, replay_add_dir, b.root); - play_changes_from_dir(cc, a, b, replay_add_file, b.root); + shared_ptr< set > attr_set = shared_ptr< set >(new set); - play_changes_from_dir(cc, a, b, replay_apply_delta, b.root); + for (map::const_iterator a = self_attrs.begin(); + a != self_attrs.end(); ++a) + { + if (other_attrs[a->first] != a->second) + attr_set->insert(a->first); + } + + for (map::const_iterator b = other_attrs.begin(); + b != other_attrs.end(); ++b) + { + if (self_attrs[b->first] != b->second) + attr_set->insert(b->first); + } + + if (!attr_set->empty()) + rr.attrs_changed.push_back(make_pair(make_pair(self, other), attr_set)); + } + } } +void +cset::replay_changes(change_consumer & cc) const +{ + replay_record rr; + check_sane(); + build_replay_record(old_mfest, new_mfest, rr); + play_back_replay_record(rr, old_mfest, new_mfest, cc); +} +void +cset::replay_inverse_changes(change_consumer & cc) const +{ + replay_record rr; + check_sane(); + build_replay_record(old_mfest, new_mfest, rr); + play_back_replay_record_inverse(rr, old_mfest, new_mfest, cc); +} + void concatenate_changesets(cset const & a, cset const & b, @@ -1442,8 +2074,6 @@ b.check_sane(); c.reset(a.new_mfest); b.replay_changes(c); - c.old_mfest.root->flush_clobbered(); - c.new_mfest.root->flush_clobbered(); c.check_sane(); } @@ -1500,15 +2130,19 @@ { namespace syms { + std::string const set_heir("set_heir"); + std::string const delete_node("delete"); + std::string const rename_node("rename"); + std::string const add_file("add_file"); + std::string const add_dir("add_dir"); + std::string const set_sire("set_sire"); std::string const patch("patch"); std::string const from("from"); std::string const to("to"); - std::string const add_file("add_file"); - std::string const add_dir("add_dir"); - std::string const delete_file("delete_file"); - std::string const delete_dir("delete_dir"); - std::string const rename_file("rename_file"); - std::string const rename_dir("rename_dir"); + std::string const clear("clear"); + std::string const set("set"); + std::string const attr("attr"); + std::string const value("value"); } } @@ -1520,47 +2154,50 @@ while (parser.symp()) { std::string t1, t2, t3; - if (parser.symp(syms::add_file)) + if (parser.symp(syms::set_heir)) { parser.sym(); parser.str(t1); - cc.add_file(file_path(t1)); + parser.esym(syms::to); + parser.str(t2); + cc.set_heir(file_path(t1), + file_path(t2)); } - else if (parser.symp(syms::add_dir)) + else if (parser.symp(syms::delete_node)) { parser.sym(); parser.str(t1); - cc.add_dir(file_path(t1)); + cc.delete_node(file_path(t1)); } - else if (parser.symp(syms::delete_file)) + else if (parser.symp(syms::rename_node)) { parser.sym(); parser.str(t1); - cc.delete_file(file_path(t1)); + parser.esym(syms::to); + parser.str(t2); + cc.rename_node(file_path(t1), + file_path(t2)); } - else if (parser.symp(syms::delete_dir)) + else if (parser.symp(syms::add_file)) { parser.sym(); parser.str(t1); - cc.delete_dir(file_path(t1)); + cc.add_file(file_path(t1)); } - else if (parser.symp(syms::rename_file)) + else if (parser.symp(syms::add_dir)) { parser.sym(); parser.str(t1); - parser.esym(syms::to); - parser.str(t2); - cc.rename_file(file_path(t1), - file_path(t2)); + cc.add_dir(file_path(t1)); } - else if (parser.symp(syms::rename_dir)) + else if (parser.symp(syms::set_sire)) { parser.sym(); parser.str(t1); - parser.esym(syms::to); + parser.esym(syms::from); parser.str(t2); - cc.rename_dir(file_path(t1), - file_path(t2)); + cc.set_sire(file_path(t1), + file_path(t2)); } else if (parser.symp(syms::patch)) { @@ -1572,6 +2209,26 @@ parser.hex(t3); cc.apply_delta(file_path(t1), file_id(t2), file_id(t3)); } + else if (parser.symp(syms::clear)) + { + parser.sym(); + parser.str(t1); + parser.esym(syms::attr); + parser.str(t2); + cc.clear_attr(file_path(t1), attr_name(t2)); + } + else if (parser.symp(syms::set)) + { + parser.sym(); + parser.str(t1); + parser.esym(syms::attr); + parser.str(t2); + parser.esym(syms::value); + parser.str(t3); + cc.set_attr(file_path(t1), + attr_name(t2), + attr_val(t3)); + } else break; } @@ -1583,68 +2240,76 @@ { basic_io::printer & printer; cset_printer(basic_io::printer & p) : printer(p) {} - virtual void delete_dir(path_vec_t const & dp); - virtual void delete_file(path_vec_t const & fp); - virtual void rename_dir(path_vec_t const & src, - path_vec_t const & dst); - virtual void rename_file(path_vec_t const & src, + virtual void set_heir(path_vec_t const & dying, + path_vec_t const & heir); + virtual void delete_node(path_vec_t const & fp); + virtual void rename_node(path_vec_t const & src, path_vec_t const & dst); virtual void add_dir(path_vec_t const & dp); virtual void add_file(path_vec_t const & fp); - virtual void apply_delta(path_vec_t const & path, - file_id const & src, + virtual void set_sire(path_vec_t const & newborn, + path_vec_t const & sire); + virtual void apply_delta(path_vec_t const & path, + file_id const & src, file_id const & dst); + virtual void clear_attr(path_vec_t const & path, + attr_name const & attr); + virtual void set_attr(path_vec_t const & path, + attr_name const & attr, + attr_val const & val); }; void -cset_printer::delete_dir(path_vec_t const & dp) +cset_printer::set_heir(path_vec_t const & dying, + path_vec_t const & heir) { basic_io::stanza st; - st.push_str_pair(syms::delete_dir, dp.to_file_path()()); + st.push_str_pair(syms::set_heir, dying.to_file_path()()); + st.push_str_pair(syms::to, heir.to_file_path()()); printer.print_stanza(st); } void -cset_printer::delete_file(path_vec_t const & fp) +cset_printer::delete_node(path_vec_t const & dp) { basic_io::stanza st; - st.push_str_pair(syms::delete_file, fp.to_file_path()()); + st.push_str_pair(syms::delete_node, dp.to_file_path()()); printer.print_stanza(st); } void -cset_printer::rename_dir(path_vec_t const & src, - path_vec_t const & dst) +cset_printer::rename_node(path_vec_t const & src, + path_vec_t const & dst) { basic_io::stanza st; - st.push_str_pair(syms::rename_dir, src.to_file_path()()); + st.push_str_pair(syms::rename_node, src.to_file_path()()); st.push_str_pair(syms::to, dst.to_file_path()()); printer.print_stanza(st); } void -cset_printer::rename_file(path_vec_t const & src, - path_vec_t const & dst) +cset_printer::add_dir(path_vec_t const & dp) { basic_io::stanza st; - st.push_str_pair(syms::rename_file, src.to_file_path()()); - st.push_str_pair(syms::to, dst.to_file_path()()); + st.push_str_pair(syms::add_dir, dp.to_file_path()()); printer.print_stanza(st); } void -cset_printer::add_dir(path_vec_t const & dp) +cset_printer::add_file(path_vec_t const & fp) { basic_io::stanza st; - st.push_str_pair(syms::add_dir, dp.to_file_path()()); + st.push_str_pair(syms::add_file, fp.to_file_path()()); printer.print_stanza(st); } void -cset_printer::add_file(path_vec_t const & fp) +cset_printer::set_sire(path_vec_t const & newborn, + path_vec_t const & sire) { basic_io::stanza st; - st.push_str_pair(syms::add_file, fp.to_file_path()()); + st.push_str_pair(syms::set_sire, newborn.to_file_path()()); + st.push_str_pair(syms::from, sire.to_file_path()()); printer.print_stanza(st); } @@ -1660,7 +2325,29 @@ printer.print_stanza(st); } +void +cset_printer::clear_attr(path_vec_t const & path, + attr_name const & attr) +{ + basic_io::stanza st; + st.push_str_pair(syms::set, path.to_file_path()()); + st.push_str_pair(syms::attr, attr); + printer.print_stanza(st); +} +void +cset_printer::set_attr(path_vec_t const & path, + attr_name const & attr, + attr_val const & val) +{ + basic_io::stanza st; + st.push_str_pair(syms::set, path.to_file_path()()); + st.push_str_pair(syms::attr, attr); + st.push_str_pair(syms::value, val); + printer.print_stanza(st); +} + + void read_cset(data const & dat, cset & cs)