# # # patch "annotate.cc" # from [da2b9e34836d772f45c7214d268ebeca7044c0c7] # to [8bbd9b80c34de6deea8edd520ab30ac3bcd76239] # # patch "commands.cc" # from [119a3d642f8c1e38fa2e455ee708523b805d1faf] # to [66330663af473d50b55b7ebf9a07709fe530650d] # # patch "database.cc" # from [0f90adb5a765051661760d886a200529e8fd4b42] # to [0f63a02fc5568b71794b048331d7c9b4a2953b7f] # # patch "database.hh" # from [65e7daaaa720624144572538ee668d33d1a0385e] # to [7f490cac7286919c98f0db58bd2f6acaefe4aff9] # # patch "revision.cc" # from [8cda143f1b14ea5d67e40c9dc325b5ec6f2fd1a7] # to [f79fa1c1effc96ca5ab24ece821949e264f5ea3c] # # patch "revision.hh" # from [5e3733994a6c92c4f15f0d78aa26669458443d46] # to [a9b0f0e05fb2f60797bbb38e39361d4caa409623] # # patch "roster.cc" # from [67380c22a3dfef07d49edbc75646a4b0668de5cb] # to [feb806d62ea6a3ac1524bf7bfbbaa62e26b40fbc] # # patch "schema.sql" # from [f44144278a4158695818d8f7e1901ac6f89e39bb] # to [513cc3e840683d4eed63e9958cd992916f9607eb] # # patch "schema_migration.cc" # from [a13abc3a8c750aadb1ad07114c8f5b0cfc0cc67b] # to [b673af9241475ad9fbe4efb5a8f8e30cae0b0bc4] # ============================================================ --- annotate.cc da2b9e34836d772f45c7214d268ebeca7044c0c7 +++ annotate.cc 8bbd9b80c34de6deea8edd520ab30ac3bcd76239 @@ -617,21 +617,14 @@ std::set parents; - // If we have content-marks which are *not* equal to the current rev, - // we can jump back to them directly. If we have only a content-mark - // equal to the current rev, it means we made a decision here, and - // we must move to the immediate parent revs. - // - // Unfortunately, while this algorithm *could* use the marking - // information as suggested above, it seems to work much better - // (especially wrt. merges) when it goes rev-by-rev, so we leave it that - // way for now. - - // if (marks.file_content.size() == 1 - // && *(marks.file_content.begin()) == work_unit.revision) - app.db.get_revision_parents(work_unit.revision, parents); - // else - // parents = marks.file_content; + app.db.get_node_revision_parents(work_unit.fid, work_unit.revision, parents); + static bool donespecial = false; + if (!donespecial) + { + donespecial = true; + if (parents.size() == 0) + parents.insert(marks.file_content.begin(), marks.file_content.end()); + } size_t added_in_parent_count = 0; @@ -739,18 +732,53 @@ void -find_ancestors(app_state &app, revision_id rid, std::map &paths_to_nodes) +find_ancestors(app_state &app, revision_id rid, std::map &paths_to_nodes, node_id node) { std::vector frontier; - frontier.push_back(rid); + std::set parents; + // if the revision is question is not one which modified the file + // we're interested in, it won't have an edge in the node_revision_ancestry + // table. here we figure out by hand what the set of node_revision_ancestry + // parents appropriate for this file in this revision are and put them in the + // frontier. + app.db.get_node_revision_parents(node, rid, parents); + if (parents.size() == 0) + { + roster_t roster; + marking_map mm; + app.db.get_roster(rid, roster, mm); + marking_map::const_iterator i = mm.find(node); + I(i != mm.end()); + marking_t const & nodemarks = i->second; + if (nodemarks.birth_revision == rid) + return; + I(nodemarks.file_content.size() > 0); + I(nodemarks.file_content.find(rid) == nodemarks.file_content.end()); + for (std::set::const_iterator i = nodemarks.file_content.begin(); + i != nodemarks.file_content.end(); i++) + { + std::map::iterator found = paths_to_nodes.find(*i); + if (found == paths_to_nodes.end()) + { + frontier.push_back(*i); + paths_to_nodes.insert(std::make_pair(*i, 1)); + } + else + (found->second)++; + } + } + else + frontier.push_back(rid); + + // now we can proceed normally by just following the parents back while (!frontier.empty()) { revision_id rid = frontier.back(); frontier.pop_back(); if(!null_id(rid)) { std::set parents; - app.db.get_revision_parents(rid, parents); + app.db.get_node_revision_parents(node, rid, parents); for (std::set::const_iterator i = parents.begin(); i != parents.end(); ++i) { @@ -780,7 +808,7 @@ std::set nodes_complete; std::map paths_to_nodes; std::map pending_merge_nodes; - find_ancestors(app, rid, paths_to_nodes); + find_ancestors(app, rid, paths_to_nodes, file_node->self); // build node work unit std::deque nodes_to_process; ============================================================ --- commands.cc 119a3d642f8c1e38fa2e455ee708523b805d1faf +++ commands.cc 66330663af473d50b55b7ebf9a07709fe530650d @@ -2102,6 +2102,7 @@ "check\n" "changesetify\n" "rosterify\n" + "filedagify\n" "set_epoch BRANCH EPOCH\n"), N_("manipulate database state"), OPT_DROP_ATTR) @@ -2126,6 +2127,8 @@ build_changesets_from_manifest_ancestry(app); else if (idx(args, 0)() == "rosterify") build_roster_style_revs_from_manifest_style_revs(app); + else if (idx(args, 0)() == "filedagify") + build_per_file_dags(app); else throw usage(name); } ============================================================ --- database.cc 0f90adb5a765051661760d886a200529e8fd4b42 +++ database.cc 0f63a02fc5568b71794b048331d7c9b4a2953b7f @@ -34,6 +34,7 @@ #include "vocab.hh" #include "xdelta.hh" #include "epoch.hh" +#include "roster.hh" // defined in schema.sql, converted to header: #include "schema.h" @@ -75,7 +76,11 @@ // non-alphabetic ordering of tables in sql source files. we could create // a temporary db, write our intended schema into it, and read it back, // but this seems like it would be too rude. possibly revisit this issue. - schema("1db80c7cee8fa966913db1a463ed50bf1b0e5b0e"), + // + // (the id you need was printed in the error message you got when you + // ran monotone without updating this.) + // + schema("0a5615e37448e0671655afe4c536cc45680fbcc4"), __sql(NULL), transaction_level(0) {} @@ -1356,6 +1361,44 @@ } void +database::get_node_revision_ancestry(node_id node, std::multimap & graph) +{ + results res; + graph.clear(); + fetch(res, 2, any_rows, + "SELECT parent,child FROM node_revision_ancestry WHERE node = ?", lexical_cast(node).c_str()); + for (size_t i = 0; i < res.size(); ++i) + graph.insert(std::make_pair(revision_id(res[i][0]), + revision_id(res[i][1]))); +} + +void +database::get_node_revision_parents(node_id node, revision_id const & rev, std::set & parents) +{ + I(!null_id(rev)); + results res; + parents.clear(); + fetch(res, one_col, any_rows, + "SELECT parent from node_revision_ancestry WHERE node = ? AND child = ?", + lexical_cast(node).c_str(), rev.inner()().c_str()); + for (size_t i = 0; i < res.size(); i++) + parents.insert(revision_id(res[i][0])); +} + +void +database::get_node_revision_children(node_id node, revision_id const & rev, std::set & children) +{ + I(!null_id(rev)); + results res; + children.clear(); + fetch(res, one_col, any_rows, + "SELECT child from node_revision_ancestry WHERE node = ? AND parent = ?", + lexical_cast(node).c_str(), rev.inner()().c_str()); + for (size_t i = 0; i < res.size(); i++) + children.insert(revision_id(res[i][0])); +} + +void database::get_revision_parents(revision_id const & id, set & parents) { @@ -2595,6 +2638,27 @@ void +database::put_node_revision_ancestry(node_id node, revision_id const & parent, revision_id const & child) +{ + execute("INSERT into node_revision_ancestry VALUES (?, ?, ?)", + lexical_cast(node).c_str(), + parent.inner()().c_str(), child.inner()().c_str()); +} + + +void +database::put_node_revision_ancestry_edges(std::map > const & edges) +{ + for (std::map >::const_iterator i = edges.begin(); + i != edges.end(); i++) + { + for (std::multimap::const_iterator j = i->second.begin(); j != i->second.end(); j++) + put_node_revision_ancestry(i->first, j->first, j->second); + } +} + + +void database::put_roster(revision_id const & rev_id, roster_t & roster, marking_map & marks) @@ -2607,6 +2671,9 @@ write_roster_and_marking(roster, marks, new_data); calculate_ident(new_data, new_id); + std::map > node_ancestry_edges; + calculate_node_revision_ancestry(rev_id, marks, node_ancestry_edges); + // First: find the "old" revision; if there are multiple old // revisions, we just pick the first. It probably doesn't matter for // the sake of delta-encoding. @@ -2620,6 +2687,8 @@ rev_id.inner()().c_str(), new_id().c_str()); + put_node_revision_ancestry_edges(node_ancestry_edges); + if (exists(new_id, data_table) || delta_exists(new_id, delta_table)) { @@ -2656,6 +2725,69 @@ } +void +get_parent_marks_set(database & db, + node_id const & node, + revision_id const & rev, + revision_id const & birthrev, + std::set & parent_marks) +{ + if (rev == birthrev) + return; + + // for each parent of rev, add in the set of revisions in that rosters markmap file_contents set + std::set parents; + db.get_revision_parents(rev, parents); + for (std::set::const_iterator i = parents.begin(); i != parents.end(); i++) + { + roster_t roster; + marking_map marks; + db.get_roster(*i, roster, marks); + marking_map::const_iterator node_marks = marks.find(node); + I(node_marks != marks.end()); + std::set const & content_marks = node_marks->second.file_content; + parent_marks.insert(content_marks.begin(), content_marks.end()); + + split_path file_split_path; + roster.get_name(node, file_split_path); + //L(FL("get_parent_marks_set for file %s in revision %s vs parent %s found %d marks:") + // % file_path(file_split_path).as_external() % rev % (*i) % content_marks.size()); + //for (std::set::const_iterator j = content_marks.begin(); j != content_marks.end(); j++) + // L(FL("mark %s") % (*j)); + } + + return; +} + + +void +database::calculate_node_revision_ancestry(revision_id const & rev, + marking_map const & mm, + std::map > & anc) +{ + for (std::map::const_iterator i = mm.begin(); i != mm.end(); i++) + { + std::set const & node_marks = i->second.file_content; + + // if the marking map file_content marks is not this revision, + // we don't have to do anything since we didn't just change the + // file + if (node_marks.find(rev) == node_marks.end()) + continue; + + // otherwise, we have to get the parents of this revision to find + // the old set of marks, and form an edge from each of + // union(parent revisions content marks) -> this revision + std::set parent_marks; + get_parent_marks_set(*this, i->first, rev, i->second.birth_revision, parent_marks); + for (std::set::const_iterator j = parent_marks.begin(); j != parent_marks.end(); j++) + { + anc[i->first].insert(std::make_pair(*j, rev)); + } + } +} + + typedef hashmap::hash_multimap ancestry_map; static void ============================================================ --- database.hh 65e7daaaa720624144572538ee668d33d1a0385e +++ database.hh 7f490cac7286919c98f0db58bd2f6acaefe4aff9 @@ -198,6 +198,10 @@ roster_t & roster, marking_map & marks); + void calculate_node_revision_ancestry(revision_id const & rev, + marking_map const & mm, + std::map > & anc); + void check_filename(); void check_db_exists(); void open(); @@ -276,6 +280,18 @@ void put_revision(revision_id const & new_id, revision_data const & dat); + + void get_node_revision_ancestry(node_id node, std::multimap & graph); + + void get_node_revision_parents(node_id node, revision_id const & rev, std::set & parents); + + void get_node_revision_children(node_id node, revision_id const & rev, std::set & children); + + void put_node_revision_ancestry(node_id node, revision_id const & parent, revision_id const & child); + + void put_node_revision_ancestry_edges(std::map > const & edges); + + void delete_existing_revs_and_certs(); void delete_existing_manifests(); ============================================================ --- revision.cc 8cda143f1b14ea5d67e40c9dc325b5ec6f2fd1a7 +++ revision.cc f79fa1c1effc96ca5ab24ece821949e264f5ea3c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -1394,7 +1395,7 @@ i != existing_graph.end(); ++i) { // FIXME: insert for the null id as well, and do the same for the - // changesetify code, and then reach rebuild_ancestry how to deal with + // changesetify code, and then teach rebuild_ancestry how to deal with // such things. (I guess u64(0) should represent the null parent?) if (!null_id(i->first)) { @@ -1454,6 +1455,105 @@ graph.rebuild_ancestry(); } + +void +build_per_file_dags(app_state & app) +{ + P(F("building per-file-DAGs structures for file content modifications\n")); + + std::set processed; + std::vector frontier; + std::auto_ptr revs_ticker; + revs_ticker.reset(new ticker(_("revs done"), "r", 1)); + { + std::set all_revs; + app.db.get_revision_ids(all_revs); + revs_ticker->set_total(all_revs.size()); + } + + // get all revisions with no descendents and put them in the frontier + // stolen from automate_leaves + { + std::set leaves; + app.db.get_revision_ids(leaves); + std::multimap graph; + app.db.get_revision_ancestry(graph); + for (std::multimap::const_iterator i = graph.begin(); + i != graph.end(); ++i) + leaves.erase(i->first); + frontier.insert(frontier.end(), leaves.begin(), leaves.end()); + } + + transaction_guard guard(app.db); + while (!frontier.empty()) + { + revision_id rev = frontier.back(); + frontier.pop_back(); + MM(rev); + + if (processed.find(rev) != processed.end()) + continue; + + roster_t roster; + marking_map mm; + // FIX? we do get_roster() multiple times per roster (once for each child, once when it is a child) + // do some caching to speed up? + app.db.get_roster(rev, roster, mm); + + std::set parents; + app.db.get_revision_parents(rev, parents); + revision_id nullid; // default? + std::set::iterator null_parent = parents.find(nullid); + if (null_parent != parents.end()) + parents.erase(null_parent); + std::vector parent_mm; + for (std::set::const_iterator i = parents.begin(), end = parents.end(); i != end; i++) + { + roster_t pr; + marking_map pmm; + app.db.get_roster(*i, pr, pmm); + parent_mm.push_back(pmm); + } + + for (marking_map::const_iterator ni = mm.begin(), end = mm.end(); ni != end; ni++) + { + node_id node = ni->first; + marking_t const & node_marks(ni->second); + if (node_marks.file_content.find(rev) == node_marks.file_content.end()) + continue; + + if (node_marks.birth_revision == rev) + continue; + + // if this node *is* in the file_content marks (it should be the only rev there) + // then we find the set of all the last revs it was marked as the union + // of the marks for that node in all our parents + I(node_marks.file_content.size() == 1); + std::set marked_parent_revs; + for (std::vector::const_iterator pmmi = parent_mm.begin(), end = parent_mm.end(); pmmi != end; pmmi++) + { + marking_map::const_iterator parent_node_marks = pmmi->find(node); + //I(parent_node_marks != pmmi->end()); + // uh, I think this means that you had a file added and later changed on one side of + // a fork, and then the sides merge. + if (parent_node_marks != pmmi->end()) + marked_parent_revs.insert((parent_node_marks->second).file_content.begin(), + (parent_node_marks->second).file_content.end()); + } + + // insert the node_revision_ancestry entries + for (std::set::const_iterator mpr = marked_parent_revs.begin(), end = marked_parent_revs.end(); mpr != end; mpr++) + app.db.put_node_revision_ancestry(node, *mpr, rev); + } + + frontier.insert(frontier.begin(), parents.begin(), parents.end()); + processed.insert(rev); + ++(*revs_ticker); + } + guard.commit(); +} + + // i/o stuff namespace ============================================================ --- revision.hh 5e3733994a6c92c4f15f0d78aa26669458443d46 +++ revision.hh a9b0f0e05fb2f60797bbb38e39361d4caa409623 @@ -166,6 +166,9 @@ void build_roster_style_revs_from_manifest_style_revs(app_state & app); +void +build_per_file_dags(app_state & app); + // basic_io access to printers and parsers namespace basic_io { struct printer; struct parser; } ============================================================ --- roster.cc 67380c22a3dfef07d49edbc75646a4b0668de5cb +++ roster.cc feb806d62ea6a3ac1524bf7bfbbaa62e26b40fbc @@ -1,3 +1,4 @@ +// -*- mode: C++; c-file-style: "gnu"; indent-tabs-mode: nil -*- // copyright (C) 2005 nathaniel smith // copyright (C) 2005 graydon hoare // all rights reserved. ============================================================ --- schema.sql f44144278a4158695818d8f7e1901ac6f89e39bb +++ schema.sql 513cc3e840683d4eed63e9958cd992916f9607eb @@ -60,6 +60,14 @@ unique(parent, child) ); +CREATE TABLE node_revision_ancestry + ( + node not null, -- internal node + parent not null, -- joins with revisions.id + child not null, -- joins with revisions.id + unique(node, parent, child) + ); + CREATE TABLE rosters ( id primary key, -- strong hash of the roster ============================================================ --- schema_migration.cc a13abc3a8c750aadb1ad07114c8f5b0cfc0cc67b +++ schema_migration.cc b673af9241475ad9fbe4efb5a8f8e30cae0b0bc4 @@ -1,3 +1,4 @@ +// -*- mode: C++; c-file-style: "gnu"; indent-tabs-mode: nil -*- // copyright (C) 2002, 2003 graydon hoare // all rights reserved. // licensed to the public under the terms of the GNU GPL (>= 2) @@ -908,6 +909,28 @@ return true; } +static bool +migrate_client_to_per_file_dag(sqlite3 * sql, + char ** errmsg, + app_state *app) +{ + int res; + + res = logged_sqlite3_exec(sql, + "CREATE TABLE node_revision_ancestry\n" + "(\n" + "node not null, -- internal node\n" + "parent not null, -- joins with revisions.id\n" + "child not null, -- joins with revisions.id\n" + "unique(node, parent, child)\n" + ");", + NULL, NULL, errmsg); + if (res != SQLITE_OK) + return false; + + return true; +} + void migrate_monotone_schema(sqlite3 *sql, app_state *app) { @@ -939,9 +962,12 @@ m.add("bd86f9a90b5d552f0be1fa9aee847ea0f317778b", &migrate_client_to_add_rosters); + m.add("1db80c7cee8fa966913db1a463ed50bf1b0e5b0e", + &migrate_client_to_per_file_dag); + // IMPORTANT: whenever you modify this to add a new schema version, you must // also add a new migration test for the new schema version. See // tests/t_migrate_schema.at for details. - m.migrate(sql, "1db80c7cee8fa966913db1a463ed50bf1b0e5b0e"); + m.migrate(sql, "0a5615e37448e0671655afe4c536cc45680fbcc4"); }