# # patch "ChangeLog" # from [32b8f5570af81938c9d9ec18cc1266014cf06d17] # to [237c4f810a60e8085f044b7ef7af10b7ef5845a0] # # patch "pcdv.cc" # from [9e57cd05ee0304343cc53eb98a64aa0d202e4efd] # to [57f77609eec9cfaf8e2534644d3ca32dffd2d21c] # # patch "pcdv.hh" # from [8a115dd67f2d05156cf6193834eaab0b3378f870] # to [4e541a7ea48ee65f3510dcd58c7e1a4b173cfe15] # --- ChangeLog +++ ChangeLog @@ -1,3 +1,7 @@ +2005-07-01 Timothy Brownawell + + * pcdv.cc: Various speedups. + 2005-06-29 Timothy Brownawell * pcdv.cc: more stuff in test code (same as tests in reference impl) --- pcdv.cc +++ pcdv.cc @@ -23,7 +23,7 @@ file_state::~file_state() { - if (false && weave.unique()) + if (weave.unique()) { states.reset(); std::cout<<"Destroyed file_state of "<size() @@ -40,29 +40,36 @@ void unique_lcs(vector const & a, vector const & b, + int alo, + int blo, + int ahi, + int bhi, vector > & res) { + res.clear(); + if (alo == ahi || blo == bhi) + return; // index[line in a] = position of line // if line is a duplicate, index[line] = -1 map index; - for (int i = 0; (unsigned int)(i) < a.size(); ++i) + for (int i = 0; i < ahi - alo; ++i) { - map::iterator j = index.find(idx(a,i)); + map::iterator j = index.find(idx(a,i + alo)); if (j != index.end()) j->second=-1; else - index.insert(make_pair(idx(a,i), i)); + index.insert(make_pair(idx(a,i + alo), i)); } // btoa[i] = a.find(b[i]), if b[i] is unique in both // otherwise, btoa[i] = -1 map index2; - vector btoa(b.size(), -1); - for (int i = 0; (unsigned int)(i) < b.size(); ++i) + vector btoa(bhi - blo, -1); + for (int i = 0; i < bhi - blo; ++i) { - map::iterator j = index.find(idx(b,i)); + map::iterator j = index.find(idx(b,i + blo)); if (j != index.end()) { - map::iterator k = index2.find(idx(b,i)); + map::iterator k = index2.find(idx(b,i + blo)); if (k != index2.end()) { btoa[k->second] = -1; @@ -70,14 +77,14 @@ } else { - index2.insert(make_pair(idx(b,i), i)); + index2.insert(make_pair(idx(b,i + blo), i)); btoa[i] = j->second; } } } // Patience sorting // http://en.wikipedia.org/wiki/Patience_sorting - vector backpointers(b.size(), -1); + vector backpointers(bhi - blo, -1); vector stacks; vector lasts; int k = 0; @@ -118,13 +125,12 @@ lasts.push_back(bpos); } } - res.clear(); if (lasts.empty()) return; k = lasts.back(); while (k != -1) { - res.push_back(make_pair(idx(btoa, k), k)); + res.push_back(make_pair(idx(btoa, k) + alo, k + blo)); k = idx(backpointers, k); } reverse(res.begin(), res.end()); @@ -134,6 +140,8 @@ void recurse_matches(vector const & a, vector const & b, + int alo, + int blo, int ahi, int bhi, vector > & answer, @@ -142,26 +150,14 @@ if (maxrecursion < 0) return; unsigned int oldlength = answer.size(); - int alo = 0, blo = 0; - if (oldlength != 0) - { - alo = answer.back().first + 1; - blo = answer.back().second + 1; - } // extend line matches into section matches vector > linematches; - vector a2, b2; - for(int i = alo; i < ahi; ++i) - a2.push_back(idx(a, i)); - for(int i = blo; i < bhi; ++i) - b2.push_back(idx(b, i)); - - unique_lcs(a2, b2, linematches); + unique_lcs(a, b, alo, blo, ahi, bhi, linematches); for (vector >::iterator i = linematches.begin(); i != linematches.end(); ++i) { - int apos = i->first + alo; - int bpos = i->second + blo; + int apos = i->first; + int bpos = i->second; int lasta = -1, lastb = -1; if (answer.size()) { @@ -182,7 +178,8 @@ apos = newapos; --bpos; } - recurse_matches(a, b, apos, bpos, answer, maxrecursion-1); + recurse_matches(a, b, ((lasta==-1)?0:lasta), ((lastb==-1)?0:lastb), + apos, bpos, answer, maxrecursion-1); answer.push_back(make_pair(apos, bpos)); // extend as far forward as possible while (apos < ahi - 1 && bpos < bhi - 1) @@ -198,8 +195,9 @@ } } if (answer.size() > oldlength) - // find matches between the laswt match and the end - recurse_matches(a, b, ahi, bhi, answer, maxrecursion - 1); + // find matches between the last match and the end + recurse_matches(a, b, answer.back().first, answer.back().second, + ahi, bhi, answer, maxrecursion - 1); } @@ -332,26 +330,26 @@ { I(weave == other.weave); file_state newstate(weave); - for (map, living_status>::const_iterator i - = states->begin(); i != states->end(); ++i) + map, living_status>::const_iterator l, r; + l = states->begin(); + r = other.states->begin(); + while (l != states->end() || r != other.states->end()) { - map, living_status>::const_iterator j - = other.states->find(i->first); - if (j != other.states->end()) - (*newstate.states)[i->first] = i->second.merge(j->second); + if (l == states->end()) + newstate.states->insert(*(r++)); + else if (r == other.states->end()) + newstate.states->insert(*(l++)); + else if (std::less >()(l->first, r->first)) + newstate.states->insert(*(r++)); + else if (std::less >()(r->first, l->first)) + newstate.states->insert(*(l++)); else - (*newstate.states)[i->first] = i->second.merge(living_status()); + { + newstate.states->insert(make_pair(l->first, + l->second.merge(r->second))); + ++l, ++r; + } } - for (map, living_status>::const_iterator i - = other.states->begin(); i != other.states->end(); ++i) - { - map, living_status>::const_iterator j - = states->find(i->first); - if (j != states->end()) - (*newstate.states)[i->first] = i->second.merge(j->second); - else - (*newstate.states)[i->first] = i->second.merge(living_status()); - } return newstate; } @@ -454,7 +452,7 @@ lines.push_back(string()); } vector > matches; - recurse_matches(lines, result, + recurse_matches(lines, result, 0, 0, lines.size(), result.size(), matches, 10); lines.clear(); @@ -466,7 +464,14 @@ for (vector >::iterator i = matches.begin(); i != matches.end(); ++i) { - recurse_matches(lines, result, i->first, i->second, matches2, 10); + int alo=0, blo=0; + if (matches2.size()) + { + alo = matches2.back().first; + blo = matches2.back().second; + } + recurse_matches(lines, result, alo, blo, + i->first, i->second, matches2, 10); if ((unsigned int)(i->first) != lines.size()) matches2.push_back(make_pair(i->first, i->second)); } @@ -495,21 +500,38 @@ lastb = i->second; } reverse(toinsert.begin(), toinsert.end()); - for (vector > > >::iterator i - = toinsert.begin(); i != toinsert.end(); ++i) - weave->insert(weave->begin() + i->first, i->second); + // toinsert is now in last-first order + +// for (vector > > >::iterator i +// = toinsert.begin(); i != toinsert.end(); ++i) +// weave->insert(weave->begin() + i->first, i->second); + if (toinsert.size()) + { + weave->insert(weave->begin()+toinsert.front().first, toinsert.size(), + make_pair(string(), make_pair(string(), 0))); + int tpos = 0; + int wfrom = toinsert.front().first - 1; + int wpos = wfrom + toinsert.size(); + while ((unsigned int)(tpos) < toinsert.size()) + { + if (idx(toinsert, tpos).first == wfrom+1) + idx(*weave, wpos--) = idx(toinsert, tpos++).second; + else + idx(*weave, wpos--) = idx(*weave, wfrom--); + } + } + file_state out(weave); for (vector > >::iterator i = weave->begin(); i != weave->end(); ++i) { + bool live = living.find(i->second) != living.end(); + living_status orig; map, living_status>::const_iterator j = states->find(i->second); if (j != states->end()) - (*out.states)[i->second] = j->second.set_living(revision, - living.find(i->second) != living.end()); - else - (*out.states)[i->second] = living_status().set_living(revision, - living.find(i->second) != living.end()); + orig = j->second; + out.states->insert(make_pair(i->second, orig.set_living(revision, live))); } return out; } @@ -602,42 +624,42 @@ // unique_lcs vector l, r; vector > res; - unique_lcs(l,r, res); + unique_lcs(l, r, 0, 0, 0, 0, res); I(res == vp()); l = vectorize("a"); r = vectorize("a"); - unique_lcs(l,r, res); + unique_lcs(l, r, 0, 0, 1, 1, res); I(res == vp().pb(0, 0)); l = vectorize("a"); r = vectorize("b"); - unique_lcs(l,r, res); + unique_lcs(l, r, 0, 0, 1, 1, res); I(res == vp()); l = vectorize("ab"); r = vectorize("ab"); - unique_lcs(l,r, res); + unique_lcs(l, r, 0, 0, 2, 2, res); I(res == vp().pb(0, 0).pb(1, 1)); l = vectorize("abcde"); r = vectorize("cdeab"); - unique_lcs(l,r, res); + unique_lcs(l, r, 0, 0, 5, 5, res); I(res == vp().pb(2, 0).pb(3, 1).pb(4, 2)); l = vectorize("cdeab"); r = vectorize("abcde"); - unique_lcs(l,r, res); + unique_lcs(l, r, 0, 0, 5, 5, res); I(res == vp().pb(0, 2).pb(1, 3).pb(2, 4)); l = vectorize("abXde"); r = vectorize("abYde"); - unique_lcs(l,r, res); + unique_lcs(l, r, 0, 0, 5, 5, res); I(res == vp().pb(0, 0).pb(1, 1).pb(3, 3).pb(4, 4)); l = vectorize("acbac"); r = vectorize("abc"); - unique_lcs(l,r, res); + unique_lcs(l, r, 0, 0, 5, 3, res); I(res == vp().pb(2, 1)); } @@ -654,13 +676,13 @@ l.push_back("c\n"); r = vectorize("aabcc"); - recurse_matches(l, r, 5, 5, res, 10); + recurse_matches(l, r, 0, 0, 5, 5, res, 10); I(res == vp().pb(0, 1).pb(2, 2).pb(4, 3)); res.clear(); l = vectorize("acbac"); r = vectorize("abc"); - recurse_matches(l, r, 5, 3, res, 10); + recurse_matches(l, r, 0, 0, 5, 3, res, 10); I(res == vp().pb(0, 0).pb(2, 1).pb(4, 2)); } --- pcdv.hh +++ pcdv.hh @@ -49,6 +49,8 @@ show_conflict(vector const & result); // This is a const object type; there are no modifiers. +// There are likely to be many, many copies of each object. Since objects +// don't change, share internal data between copies. struct living_status { boost::shared_ptr > > overrides;