# # patch "ChangeLog" # from [252971a077b4684246addf834f6e62fb7f8a67bf] # to [a59353eba9a3615c55816df2870714b3dce6f9f1] # # patch "database.cc" # from [43754a69a972b2b821550cec3372295f97fb7725] # to [7fbf000b889c6a9e67179e405279c17c08f8b1ff] # ======================================================================== --- ChangeLog 252971a077b4684246addf834f6e62fb7f8a67bf +++ ChangeLog a59353eba9a3615c55816df2870714b3dce6f9f1 @@ -1,5 +1,11 @@ 2005-11-25 graydon hoare + * database.cc (get_version): Another important fix: make sure to + cancel overlapping paths, to avoid exponential memory requirement + in deep, heavily-forked-and-merged storage paths. + +2005-11-25 graydon hoare + * database.cc (get_version): Crucial fix! Rewrite the delta-reconstruction algorithm to work with a DAG storage topology, rather than assuming an inverted tree. Counterpart to the ======================================================================== --- database.cc 43754a69a972b2b821550cec3372295f97fb7725 +++ database.cc 7fbf000b889c6a9e67179e405279c17c08f8b1ff @@ -905,15 +905,24 @@ static void extend_path_if_not_cycle(string table_name, - version_path & p, hexenc const & ext) + shared_ptr p, + hexenc const & ext, + set< hexenc > seen_nodes, + vector< shared_ptr > & next_paths) { - for (version_path::const_iterator i = p.begin(); i != p.end(); ++i) + for (version_path::const_iterator i = p->begin(); i != p->end(); ++i) { if ((*i)() == ext()) throw oops("cycle in table '" + table_name + "', at node " + (*i)() + " <- " + ext()); } - p.push_back(ext); + + if (seen_nodes.find(ext) == seen_nodes.end()) + { + p->push_back(ext); + next_paths.push_back(p); + seen_nodes.insert(ext); + } } void @@ -962,27 +971,34 @@ // // On each iteration, we extend every active path by one step. If our // extension involves a fork, we duplicate the path. If any path - // contains a cycle, we fault.1 + // contains a cycle, we fault. + // + // If, by extending a path C, we enter a node which another path + // D has already seen, we kill path C. This avoids the possibility of + // exponential growth in the number of paths due to extensive forking + // and merging. - vector< shared_ptr > paths; + vector< shared_ptr > live_paths; string delta_query = "SELECT base FROM " + delta_table + " WHERE id = ?"; { shared_ptr pth0 = shared_ptr(new version_path()); pth0->push_back(ident); - paths.push_back(pth0); + live_paths.push_back(pth0); } shared_ptr selected_path; + set< hexenc > seen_nodes; while (!selected_path) { - // NB: use size() here, not a const_iterator, because paths may - // grow during iteration. - for (size_t i = 0; i < paths.size(); ++i) + vector< shared_ptr > next_paths; + + for (vector >::const_iterator i = live_paths.begin(); + i != live_paths.end(); ++i) { - shared_ptr pth = idx(paths,i); + shared_ptr pth = *i; hexenc tip = pth->back(); if (vcache.exists(tip) || exists(tip, data_table)) @@ -997,24 +1013,26 @@ fetch(res, one_col, any_rows, delta_query.c_str(), tip().c_str()); - if (res.size() == 0) - continue; + I(res.size() != 0); // Replicate the path if there's a fork. for (size_t k = 1; k < res.size(); ++k) { shared_ptr pthN = shared_ptr(new version_path(*pth)); - extend_path_if_not_cycle(delta_table, *pthN, - hexenc(res[k][0])); - paths.push_back(pthN); + extend_path_if_not_cycle(delta_table, pthN, + hexenc(res[k][0]), + seen_nodes, next_paths); } // And extend the base path we're examining. - extend_path_if_not_cycle(delta_table, *pth, - hexenc(res[0][0])); + extend_path_if_not_cycle(delta_table, pth, + hexenc(res[0][0]), + seen_nodes, next_paths); } } + + live_paths = next_paths; } // Found a root, now trace it back along the path.