# # # patch "rcs_import.cc" # from [8446a0d1e6c4d5967db20d1e4f2252c23180335f] # to [c2f916be9c2f1abb1e0fb9fd94fe4f7ffe01d565] # # patch "tests/importing_cvs_cycle_splitter4/__driver__.lua" # from [eaca3c39813695e48b739290346ce5cc912d02a0] # to [6b6845c43162b61283d7937b4cb3dcb9c6317ae2] # ============================================================ --- rcs_import.cc 8446a0d1e6c4d5967db20d1e4f2252c23180335f +++ rcs_import.cc c2f916be9c2f1abb1e0fb9fd94fe4f7ffe01d565 @@ -3418,6 +3418,7 @@ split_cycle(cvs_history & cvs, vector > t1events; map > t2events; map > t3events; map > t4events; @@ -3432,6 +3433,8 @@ split_cycle(cvs_history & cvs, vector & blob_events = cvs.blobs[*cc].get_events(); + vector & type1events = safe_insert(t1events, + make_pair(*cc, vector()))->second; vector & type2events = safe_insert(t2events, make_pair(*cc, vector()))->second; vector & type3events = safe_insert(t3events, @@ -3440,8 +3443,7 @@ split_cycle(cvs_history & cvs, vector()))->second; // Loop over every event of every blob again and collect events of - // type 2 and 3, but abort as soon as we hit a type 4 one. Type 1 - // is of no interest here. + // type 2, 3 and 4. Type 1 is of no interest here. for (blob_event_iter ity = blob_events.begin(); ity != blob_events.end(); ++ity) { @@ -3470,7 +3472,7 @@ split_cycle(cvs_history & cvs, vector largest_gap_candidates; - // For now, we simply collect blobs we cannot split by timestamp, but - // which could also be split somehow to resolve the cycle. - vector splittable_blobs; + // If there are no type 1 events, we can split between the type 2 and + // type 3 event without the need for a clear timestamp. + vector, set > > event_splits; + // We collect all blobs (or sets of blobs) which can be split to resolve + // the cycle. This includes candidates for which we couldn't find a + // simple timestamp to split at. + vector > splittable_blob_sets; + unsigned int no_blobs_to_split; + for (no_blobs_to_split = 1; no_blobs_to_split < cycle_members.size(); + ++no_blobs_to_split) + { + // FIXME: indentation! + L(FL("Checking for valid split points with %d blob(s) to split") + % no_blobs_to_split); + for (cm_ity cc = cycle_members.begin(); cc != cycle_members.end(); ++cc) { - L(FL(" blob: %d\n %s\n height:%d, size:%d\n") - % *cc - % date_t::from_unix_epoch(cvs.blobs[*cc].get_avg_time() / 100) - % cvs.blobs[*cc].height - % cvs.blobs[*cc].get_events().size()); + // depending on the number of blobs to split in this iteration, + // we collect split candidates. + set candidates; + cm_ity next = cc; + bool set_is_splittable = true; + for (unsigned int i = 0; i < no_blobs_to_split; ++i) + { + // We never split branch starts, instead we split the underlying + // symbol. Thus simply skip them here. + if (cvs.blobs[*next].get_digest().is_branch_start()) + set_is_splittable = false; - // We never split branch starts, instead we split the underlying - // symbol. Thus simply skip them here. - if (cvs.blobs[*cc].get_digest().is_branch_start()) + // Skip blobs which consist of just one event, or sets of blobs + // which contain such a blob. Those cannot be split any further + // anyway. + if (cvs.blobs[*cc].get_events().size() < 2) + set_is_splittable = false; + + candidates.insert(*next); + next++; + if (next == cycle_members.end()) + next = cycle_members.begin(); + } + + if (!set_is_splittable) { - L(FL(" decision: not splitting because it's a branch start")); + L(FL(" blob: %d:\tcannot split this set") % *cc); continue; } - vector & blob_events = cvs.blobs[*cc].get_events(); - // skip blobs which consist of just one event, those cannot be - // split any further anyway. - if (blob_events.size() < 2) + vector type1events, type2events, type3events, + type4events; + + for (set::iterator i = candidates.begin(); + i != candidates.end(); ++i) { - L(FL(" decision: not splitting because of its size")); - continue; + copy(t1events[*i].begin(), t1events[*i].end(), + back_inserter(type1events)); + copy(t2events[*i].begin(), t2events[*i].end(), + back_inserter(type2events)); + copy(t3events[*i].begin(), t3events[*i].end(), + back_inserter(type3events)); + copy(t4events[*i].begin(), t4events[*i].end(), + back_inserter(type4events)); } + for (vector::iterator i = type4events.begin(); + i != type4events.end(); ) + { + cvs_event_ptr ev = *i; - vector & type2events = t2events[*cc]; - vector & type3events = t3events[*cc]; - vector & type4events = t4events[*cc]; + int deps_from = 0, deps_to = 0; + typedef multimap::const_iterator mm_ity; + pair range; + // check if this type4 events is only a dependency into any of + // our split candidate blobs. + range = in_cycle_dependencies.equal_range(ev); + for (; range.first != range.second; ++range.first) + { + cvs_event_ptr dep_ev = range.first->second; + if (candidates.find(dep_ev->bi) == candidates.end()) + deps_from++; + } - // skip blobs with type 4 events, splitting them doesn't help or - // isn't possible at all. + range = in_cycle_dependents.equal_range(ev); + for (; range.first != range.second; ++range.first) + { + cvs_event_ptr dep_ev = range.first->second; + if (candidates.find(dep_ev->bi) == candidates.end()) + deps_to++; + } + + I(deps_from > 0 || deps_to > 0); + + if (deps_from == 0) + { + // downgrade this to a type3 event + type3events.push_back(*i); + i = type4events.erase(i); + } + else if (deps_to == 0) + { + // downgrade this to a type2 event + type2events.push_back(*i); + i = type4events.erase(i); + } + else // keep it as a type4 event + i++; + } + + // skip blobs or sets with type 4 events, splitting them doesn't help + // or isn't possible at all. if (!type4events.empty()) { - L(FL(" cannot be split due to type4 events.")); + L(FL(" blob: %d:\tsplitting here doesn't help.") % *cc); continue; } - // it's a cycle, so every blob must have at least one incomming and - // one outgoing dependency. + // it's a cycle, so every blob or every set of candidate blobs must + // have at least one incomming and one outgoing dependency. I(!type2events.empty()); I(!type3events.empty()); - // this blob is potentionally splittable... - splittable_blobs.push_back(*cc); - // Calculate the lower and upper bounds for both kind of events. If - // we are going to split this blob, we need to split it between any + // we are going to split these blobs, we need to split it between any // of these two bounds to resolve the cycle. time_i t2_upper_bound = 0, t2_lower_bound = (time_i) -1; @@ -3589,7 +3660,7 @@ split_cycle(cvs_history & cvs, vector!")); lower_bound = t2_upper_bound; upper_bound = t3_lower_bound; - L(FL(" can be split between type2 and type3 events.")); + L(FL(" blob: %d:\tstrange, but can split by timestamp.") % *cc); } else { - // We cannot split this blob by timestamp, because there's no - // reasonable split point. - L(FL(" cannot be split between type2 and type3 events, no reasonable timestamp.")); + // The timestamps of type 2 and type 3 events are overlapping, so + // that we cannot use a timestamp to split the blob(s), because + // its unclear where to put the remaining type 1 events. + // + // However, if there are no type 1 events in any of the candidate + // blobs, we can simply split between type 2 and type 3 events, + // without having to compare timestamps. + if (type1events.empty()) + { + L(FL(" blob: %d:\tcan split between type2 and type3 events.") + % *cc); + vector, set > >:: + iterator i; + + i = event_splits.insert(event_splits.end(), + make_pair(candidates, set())); + + // fill the set with the type 2 events + for (vector::iterator j = type2events.begin(); + j != type2events.end(); ++j) + safe_insert(i->second, *j); + } + else + { + L(FL(" blob: %d:\tcould split, but dunno where.") % *cc); + splittable_blob_sets.push_back(candidates); + } continue; } - // Now we have a lower and an upper time boundary, between which we - // can split this blob to resolve the cycle. We now try to find the - // largest time gap between any type 1 events within this boundary. - // That helps preventing splitting things which should better remain - // together. + // can split this blob or this set of candidate blobs to resolve the + // cycle. We now try to find the largest time gap between any events + // within this boundary. That helps prevent splitting things which + // should better remain together. - // First, make sure the blob's events are sorted by timestamp - cvs.blobs[*cc].sort_events(); + // first, collect all events of all candidates + vector candidate_events; + for (set::iterator i = candidates.begin(); + i != candidates.end(); ++i) + { + vector & blob_events = cvs.blobs[*cc].get_events(); + copy(blob_events.begin(), blob_events.end(), + back_inserter(candidate_events)); + } + // then make sure the events are sorted by timestamp + { + event_ptr_time_cmp cmp; + sort(candidate_events.begin(), candidate_events.end(), cmp); + } + blob_event_iter ity; - ity = blob_events.begin(); + ity = candidate_events.begin(); cvs_event_ptr this_ev, last_ev; - I(ity != blob_events.end()); // we have 2+ events here + I(ity != candidate_events.end()); // we have 2+ events here this_ev = *ity++; // skip the first event - for (; ity != blob_events.end(); ++ity) + for (; ity != candidate_events.end(); ++ity) { last_ev = this_ev; this_ev = *ity; @@ -3648,29 +3755,60 @@ split_cycle(cvs_history & cvs, vector 0); - L(FL("splitting blob %d by time %s") - % largest_gap_blob - % date_t::from_unix_epoch(largest_gap_at)); + L(FL("splitting %d blobs by time %s") + % largest_gap_candidates.size() + % date_t::from_unix_epoch(largest_gap_at / 100)); - I(!cvs.blobs[largest_gap_blob].get_digest().is_branch_start()); - I(!cvs.blobs[largest_gap_blob].get_digest().is_tag()); + split_by_time func(largest_gap_at); + for (set::iterator i = largest_gap_candidates.begin(); + i != largest_gap_candidates.end(); ++i) + { + I(!cvs.blobs[*i].get_digest().is_branch_start()); + I(!cvs.blobs[*i].get_digest().is_tag()); - split_by_time func(largest_gap_at); - split_blob_at(cvs, largest_gap_blob, func); + split_blob_at(cvs, *i, func); + } } + else if (!event_splits.empty()) + { + // simple take the first option we've found and split there. + set & candidates = event_splits.begin()->first; + set & split_events = event_splits.begin()->second; + + split_by_event_set func(split_events); + for (set::iterator i = candidates.begin(); + i != candidates.end(); ++i) + { + I(!cvs.blobs[*i].get_digest().is_branch_start()); + I(!cvs.blobs[*i].get_digest().is_tag()); + + split_blob_at(cvs, *i, func); + } + } + else if (!splittable_blob_sets.empty()) + { + L(FL("could theoretically split the cycle, but where to put " + "the type 1 events?")); + I(false); + } else { L(FL("Unable to split the cycle!")); ============================================================ --- tests/importing_cvs_cycle_splitter4/__driver__.lua eaca3c39813695e48b739290346ce5cc912d02a0 +++ tests/importing_cvs_cycle_splitter4/__driver__.lua 6b6845c43162b61283d7937b4cb3dcb9c6317ae2 @@ -9,10 +9,7 @@ check(get("cvs-repository")) -- fileA: A -> B -> C -- file2: B -> C -> A -- file3: C -> A -> B --- --- Currently, this test fails because it creates a cycle with only type4 --- events (i.e. no single blob can be split to resolve the cycle). -- import into monotone and check presence of files -xfail(mtn("--branch=test", "cvs_import", "--debug", "cvs-repository/test"), 0, false, false) +check(mtn("--branch=test", "cvs_import", "--debug", "cvs-repository/test"), 0, false, false)