[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[rdiff-backup-users] [PATCH] Use --include and --exclude with --remove-o
From: |
Josh Nisly |
Subject: |
[rdiff-backup-users] [PATCH] Use --include and --exclude with --remove-older-than |
Date: |
Sun, 26 Oct 2008 21:42:55 -0500 |
User-agent: |
Thunderbird 2.0.0.17 (X11/20080925) |
Attached is a work-in-progress patch to support the --include and
--exclude selection options in concert with --remove-older-than. This
provides the functionality described in
http://wiki.rdiff-backup.org/wiki/index.php/RemoveOlderThanAllowsSubdirectories
and http://wiki.rdiff-backup.org/wiki/index.php/RemoveSpecifiedFiles.
I'm mainly posting this for feedback on the implementation, particularly
the changes in selection.py and metadata.py.
AFAICT, the main pieces left are mungering the file_statistics files and
loosening validation. Regarding the loosened validation, currently there
is validation with --remove-older-than that it doesn't remove more than
one increment. I think that this should be loosened when using
--remove-older-than with selection options, but other than simply
removing the check, I don't have a lot of ideas. Thoughts?
Thanks,
JoshN
--- rdiff_backup/Main.py 12 Oct 2008 02:21:29 -0000 1.121
+++ rdiff_backup/Main.py 27 Oct 2008 02:06:49 -0000
@@ -716,10 +716,17 @@
rootrp = require_root_set(rootrp, 0)
rot_require_rbdir_base(rootrp)
+ # Validate that the selection options are valid
+ for select_opt in select_opts:
+ if select_opt[0] != '--include' and \
+ select_opt[0] != '--exclude':
+ Log.FatalError("Only --include and --exclude are "
+ "supported with --remove-older-than.")
+
time = rot_check_time(remove_older_than_string)
if time is None: return
Log("Actual remove older than time: %s" % (time,), 6)
- manage.delete_earlier_than(Globals.rbdir, time)
+ manage.delete_earlier_than(Globals.rbdir, time, select_opts)
def rot_check_time(time_string):
"""Check remove older than time_string, return time in seconds"""
--- rdiff_backup/manage.py 7 Jul 2007 22:43:34 -0000 1.13
+++ rdiff_backup/manage.py 27 Oct 2008 02:06:49 -0000
@@ -22,6 +22,7 @@
from __future__ import generators
from log import Log
import Globals, Time, static, statistics, restore, selection, FilenameMapping
+import metadata
class ManageException(Exception): pass
@@ -80,7 +81,7 @@
result.append("Current mirror: %s" % Time.timetopretty(mirror_time))
return "\n".join(result)
-def delete_earlier_than(baserp, time):
+def delete_earlier_than(baserp, time, select_opts):
"""Deleting increments older than time in directory baserp
time is in seconds. It will then delete any empty directories
@@ -88,9 +89,9 @@
rdiff-backup-data directory should be the root of the tree.
"""
- baserp.conn.manage.delete_earlier_than_local(baserp, time)
+ baserp.conn.manage.delete_earlier_than_local(baserp, time, select_opts)
-def delete_earlier_than_local(baserp, time):
+def delete_earlier_than_local(baserp, time, select_opts):
"""Like delete_earlier_than, but run on local connection for speed"""
assert baserp.conn is Globals.local_connection
def yield_files(rp):
@@ -100,13 +101,37 @@
yield sub_rp
yield rp
- for rp in yield_files(baserp):
- if ((rp.isincfile() and rp.getinctime() < time) or
- (rp.isdir() and not rp.listdir())):
- Log("Deleting increment file %s" % rp.path, 5)
- rp.delete()
-
+ if not select_opts:
+ # Simple remove. Delete all files with timestamp older
+ # than time.
+ for rp in yield_files(baserp):
+ if ((rp.isincfile() and rp.getinctime() < time) or
+ (rp.isdir() and not rp.listdir())):
+ Log("Deleting increment file %s" % rp.path, 5)
+ rp.delete()
+ else:
+ # Remove with selection options. Remove all increments
+ # that match, then modify metadata files to match.
+ select = selection.Select(baserp.append_path('increments'),
True)
+ select.ParseArgs(select_opts, [])
+
+ for rp in yield_files(baserp.append_path('increments')):
+ if ((rp.isincfile() and rp.getinctime() < time) or
+ (rp.isdir() and not rp.listdir())):
+
+ if select.Select(rp) == 1: # File matched
+ Log("Deleting increment file %s" %
rp.path, 5)
+ rp.delete()
+
+ # Process metadata
+ select = selection.Select(baserp)
+ select.ParseArgs(select_opts, [])
+ metadata.SetManager()
+ def callback(rorp):
+ return select.Select(rorp) == 1
+ metadata.rewrite_meta_files(time, callback)
+
class IncObj:
"""Increment object - represent a completed increment"""
def __init__(self, incrp):
--- rdiff_backup/metadata.py 27 Sep 2008 00:17:24 -0000 1.32
+++ rdiff_backup/metadata.py 27 Oct 2008 02:06:49 -0000
@@ -429,6 +429,61 @@
_extractor = RorpExtractor
_object_to_record = staticmethod(RORP2Record)
+def rewrite_meta_files(beforetime, callback):
+ """ Rewrites the various metadata files, removing historical
+ increments for rorp's where callback returns False.
+
+ This function is more complicated because of the metadata
+ snapshot files. rewrite_meta_files goes from the latest
+ metadata information, and works earlier. Whenever it encounters
+ an entry in a snapshot file, it finds what that information
+ should be, based on later metadata files, and puts that in
+ the new file."""
+
+ meta_base = Globals.rbdir.append_path('mirror_metadata')
+ metatimes = restore.get_inclist(meta_base)
+ metatimes = [file.getinctime() for file in metatimes]
+ metatimes.sort()
+ metatimes.reverse()
+
+ single_manager = Manager()
+
+ prevtime = None
+ for time in metatimes:
+ if time < beforetime:
+ inc_type = ManagerObj.get_meta_inctype(time)
+ reader = single_manager.GetAtTime(time, None)
+ writer = ManagerObj.GetWriter(inc_type, time)
+ if inc_type != 'snapshot':
+ # We're rewriting a diff file. Since the
entries in these
+ # files work like the increments (they only
exist if
+ # there's a change), we can just write the
entries that
+ # don't match the callback.
+ for rorp in reader:
+ if not callback(rorp):
+ writer.write_object(rorp)
+ else:
+ # We're rewriting a snapshot file. Iterate
through both
+ # this snapshot and the metadata as it existed
at the
+ # previous backup. For each rorp, if it matches
the
+ # callback, use the previous version
(effectively
+ # removing this backup's increment), otherwise
use this
+ # backup's version.
+ assert not prevtime is None
+ prev_reader = ManagerObj.GetAtTime(prevtime,
None)
+ iter = rorpiter.Collate2Iters(reader,
prev_reader)
+ for this_rorp, prev_rorp in iter:
+ rorp = this_rorp or prev_rorp
+ if callback(rorp):
+ # Use previous version
+ if prev_rorp:
+
writer.write_object(prev_rorp)
+ else:
+ if this_rorp:
+
writer.write_object(this_rorp)
+
+ writer.close()
+ prevtime = time
class CombinedWriter:
"""Used for simultaneously writting metadata, eas, and acls"""
@@ -482,6 +537,12 @@
if self.prefixmap.has_key(incbase):
self.prefixmap[incbase].append(rp)
else: self.prefixmap[incbase] = [rp]
+ def get_meta_inctype(self, time):
+ metas = filter(lambda x: x.getinctime() == time,
+ self.prefixmap['mirror_metadata'])
+ assert len(metas) == 1, metas
+ return metas[0].getinctype()
+
def _iter_helper(self, prefix, flatfileclass, time, restrict_index):
"""Used below to find the right kind of file by time"""
if not self.timerpmap.has_key(time): return None
@@ -690,3 +751,4 @@
import eas_acls, win_acls # put at bottom to avoid python circularity bug
+import restore
--- rdiff_backup/selection.py 4 Sep 2008 23:36:20 -0000 1.47
+++ rdiff_backup/selection.py 27 Oct 2008 02:06:49 -0000
@@ -79,12 +79,13 @@
# This re should not match normal filenames, but usually just globs
glob_re = re.compile("(.*[*?[\\\\]|ignorecase\\:)", re.I | re.S)
- def __init__(self, rootrp):
+ def __init__(self, rootrp, use_incr_name=False):
"""Select initializer. rpath is the root directory"""
assert isinstance(rootrp, rpath.RPath)
self.selection_functions = []
self.rpath = rootrp
self.prefix = self.rpath.path
+ self.use_incr_name = use_incr_name
def set_iter(self, sel_func = None):
"""Initialize more variables, get ready to iterate
@@ -537,6 +538,15 @@
sel_func.name = "%s size %d" % (min_max and "Maximum" or
"Minimum", size)
return sel_func
+ def get_filename(self, rp):
+ if self.use_incr_name and rp.isincfile():
+ return rp.getincbase().path
+ else:
+ if hasattr(rp, 'path'):
+ return rp.path
+ else:
+ return self.prefix + '/'.join(rp.index)
+
def glob_get_sf(self, glob_str, include):
"""Return selection function given by glob string"""
assert include == 0 or include == 1
@@ -614,12 +624,12 @@
"|".join(self.glob_get_prefix_res(glob_str)))
def include_sel_func(rp):
- if glob_comp_re.match(rp.path): return 1
- elif scan_comp_re.match(rp.path): return 2
+ if glob_comp_re.match(self.get_filename(rp)): return 1
+ elif scan_comp_re.match(self.get_filename(rp)): return 2
else: return None
def exclude_sel_func(rp):
- if glob_comp_re.match(rp.path): return 0
+ if glob_comp_re.match(self.get_filename(rp)): return 0
else: return None
# Check to make sure prefix is ok
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [rdiff-backup-users] [PATCH] Use --include and --exclude with --remove-older-than,
Josh Nisly <=