rdiff-backup-users
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[rdiff-backup-users] [PATCH] Unicode support on Windows


From: Josh Nisly
Subject: [rdiff-backup-users] [PATCH] Unicode support on Windows
Date: Wed, 08 Apr 2009 21:48:19 -0500
User-agent: Thunderbird 2.0.0.21 (X11/20090318)

Attached is a patch to support Unicode on Windows. This fixes support for filenames with non-ascii characters, and paves the way for long filename support. Also attached is a patch to prove the correct behavior, which can be run on Windows or Unix.

Josh Nisly
#!/usr/bin/python

import os
import shutil
import subprocess
import time

if os.name == 'nt':
    RDIFF_EXE='output\\rdiff-backup.exe'
else:
    RDIFF_EXE='./rdiff-backup'

TEMP_DIR=u'unicode_test'
SRC_DIR=os.path.join(TEMP_DIR, 'src')
DEST_DIR=os.path.join(TEMP_DIR, 'dest')
RESTORE_DIR=os.path.join(TEMP_DIR, 'restore')

UNICODE_NAME=u'\u3046\u3069\u3093\u5c4b.txt'

def write_file(path, text):
    file = open(path, 'w')
    file.write(text)
    file.close()

def run_backup(new_metadata_format = True):
    cmd = [RDIFF_EXE]
    cmd += [SRC_DIR, DEST_DIR]
    print cmd
    proc = subprocess.Popen(cmd)
    proc.wait()
    if proc.returncode:
        raise ValueError, 'Backup failed!'
    # Ugly hack: sleep for 1 second so that the next backup
    # doesn't run at the same time
    time.sleep(1)

def run_restore(time='now'):
    cmd = [RDIFF_EXE, '-r', time, DEST_DIR, RESTORE_DIR]
    print cmd
    proc = subprocess.Popen(cmd)
    proc.wait()


def test_with_file(filename):
    print 'Testing', filename.encode('ascii', 'replace')
    is_unicode_filename = type(filename) == unicode

    for dir in [TEMP_DIR, SRC_DIR, DEST_DIR]:
        if os.path.exists(dir):
            shutil.rmtree(dir)
        if not os.path.exists(dir):
            os.mkdir(dir)

    src_file_path=os.path.join(SRC_DIR, filename)
    write_file(src_file_path, '')
    if os.name != 'nt':
        os.system('setfattr -n user.foo -v bar "%s"' % 
src_file_path.encode('utf-8'))
        os.system('setfacl -m user:joshn:rwx "%s"' % 
src_file_path.encode('utf-8'))
    run_backup()

    write_file(src_file_path, 'first change')
    run_backup(is_unicode_filename)

    # Try a restore, and make sure that the directory listing looks right
    run_restore()
    dir_listing = os.listdir(RESTORE_DIR)
    assert dir_listing == [filename], dir_listing

    shutil.rmtree(RESTORE_DIR)
    run_restore('1B')
    dir_listing = os.listdir(RESTORE_DIR)

test_with_file('simple.txt')
test_with_file(u'\u3046\u3069\u3093\u5c4b.txt')
if os.name != 'nt':
    test_with_file('line1\tline2')
    test_with_file('line1\nline2')

--- rdiff_backup/FilenameMapping.py     3 Jan 2009 21:32:40 -0000       1.19
+++ rdiff_backup/FilenameMapping.py     8 Apr 2009 19:15:05 -0000
@@ -158,7 +158,10 @@
                correctly and append()ed to the currect QuotedRPath.
 
                """
-               return map(unquote, self.conn.os.listdir(self.path))
+               path = self.path
+               if type(path) != unicode:
+                       path = unicode(path, 'utf-8')
+               return map(unquote, self.conn.os.listdir(path))
 
        def __str__(self):
                return "QuotedPath: %s\nIndex: %s\nData: %s" % \
--- rdiff_backup/eas_acls.py    2 Mar 2009 18:02:58 -0000       1.43
+++ rdiff_backup/eas_acls.py    8 Apr 2009 23:42:53 -0000
@@ -57,7 +57,8 @@
        def read_from_rp(self, rp):
                """Set the extended attributes from an rpath"""
                try:
-                       attr_list = rp.conn.xattr.listxattr(rp.path, rp.issym())
+                       attr_list = 
rp.conn.xattr.listxattr(rp.path.encode('utf-8'),
+                                                                               
                rp.issym())
                except IOError, exc:
                        if exc[0] in (errno.EOPNOTSUPP, errno.EPERM, 
errno.ETXTBSY):
                                return # if not supported, consider empty
@@ -74,7 +75,8 @@
                                continue
                        try:
                                self.attr_dict[attr] = \
-                                       rp.conn.xattr.getxattr(rp.path, attr, 
rp.issym())
+                                       
rp.conn.xattr.getxattr(rp.path.encode('utf-8'),
+                                                                               
        attr, rp.issym())
                        except IOError, exc:
                                # File probably modified while reading, just 
continue
                                if exc[0] == errno.ENODATA: continue
@@ -86,9 +88,11 @@
        def clear_rp(self, rp):
                """Delete all the extended attributes in rpath"""
                try:
-                       for name in rp.conn.xattr.listxattr(rp.path, 
rp.issym()):
+                       for name in 
rp.conn.xattr.listxattr(rp.path.encode('utf-8'),
+                                                                               
                rp.issym()):
                                try:
-                                       rp.conn.xattr.removexattr(rp.path, 
name, rp.issym())
+                                       
rp.conn.xattr.removexattr(rp.path.encode('utf-8'),
+                                                                               
        name, rp.issym())
                                except IOError, exc:
                                        # SELinux attributes cannot be removed, 
and we don't want
                                        # to bail out or be too noisy at low 
log levels.
@@ -111,7 +115,8 @@
                self.clear_rp(rp)
                for (name, value) in self.attr_dict.iteritems():
                        try:
-                               rp.conn.xattr.setxattr(rp.path, name, value, 0, 
rp.issym())
+                               rp.conn.xattr.setxattr(rp.path.encode('utf-8'), 
name,
+                                                                               
value, 0, rp.issym())
                        except IOError, exc:
                                # Mac and Linux attributes have different 
namespaces, so
                                # fail gracefully if can't call setxattr
@@ -149,13 +154,14 @@
 
 def EA2Record(ea):
        """Convert ExtendedAttributes object to text record"""
-       str_list = ['# file: %s' % C.acl_quote(ea.get_indexpath())]
+       str_list = ['# file: %s' % 
C.acl_quote(ea.get_indexpath().encode('utf-8'))]
        for (name, val) in ea.attr_dict.iteritems():
                if not val: str_list.append(name)
                else:
                        encoded_val = base64.encodestring(val).replace('\n', '')
                        try:
-                               str_list.append('%s=0s%s' % (C.acl_quote(name), 
encoded_val))
+                               str_list.append('%s=0s%s' % 
(C.acl_quote(name.encode('utf-8')),
+                                                                               
        encoded_val))
                        except UnicodeEncodeError:
                                log.Log("Warning: unable to store Unicode 
extended attribute %s"
                                                        % repr(name), 3)
@@ -169,7 +175,8 @@
                raise metadata.ParsingError("Bad record beginning: " + 
first[:8])
        filename = first[8:]
        if filename == '.': index = ()
-       else: index = tuple(C.acl_unquote(filename).split('/'))
+       else: index = tuple(unicode(C.acl_unquote(filename.encode('utf-8')),
+                                                                               
        'utf-8').split('/'))
        ea = ExtendedAttributes(index)
 
        for line in lines:
@@ -194,7 +201,7 @@
        def filename_to_index(self, filename):
                """Convert possibly quoted filename to index tuple"""
                if filename == '.': return ()
-               else: return tuple(C.acl_unquote(filename).split('/'))
+               else: return 
tuple(C.acl_unquote(filename.encode('utf-8')).split('/'))
 
 class ExtendedAttributesFile(metadata.FlatFile):
        """Store/retrieve EAs from extended_attributes file"""
@@ -379,7 +386,7 @@
        else: acl = posix1e.ACL()
 
        try:
-               acl.applyto(rp.path)
+               acl.applyto(rp.path.encode('utf-8'))
        except IOError, exc:
                if exc[0] == errno.EOPNOTSUPP:
                        log.Log("Warning: unable to set ACL on %s: %s" % 
@@ -391,12 +398,12 @@
                if default_entry_list:
                        def_acl = list_to_acl(default_entry_list, map_names)
                else: def_acl = posix1e.ACL()
-               def_acl.applyto(rp.path, posix1e.ACL_TYPE_DEFAULT)
+               def_acl.applyto(rp.path.encode('utf-8'), 
posix1e.ACL_TYPE_DEFAULT)
 
 def get_acl_lists_from_rp(rp):
        """Returns (acl_list, def_acl_list) from an rpath.  Call locally"""
        assert rp.conn is Globals.local_connection
-       try: acl = posix1e.ACL(file=rp.path)
+       try: acl = posix1e.ACL(file=rp.path.encode('utf-8'))
        except IOError, exc:
                if exc[0] == errno.EOPNOTSUPP:
                        acl = None
@@ -406,7 +413,7 @@
                        acl = None
                else: raise
        if rp.isdir():
-               try: def_acl = posix1e.ACL(filedef=rp.path)
+               try: def_acl = posix1e.ACL(filedef=rp.path.encode('utf-8'))
                except IOError, exc:
                        if exc[0] == errno.EOPNOTSUPP:
                                def_acl = None
@@ -533,7 +540,8 @@
 
 def ACL2Record(acl):
        """Convert an AccessControlLists object into a text record"""
-       return '# file: %s\n%s\n' % (C.acl_quote(acl.get_indexpath()), str(acl))
+       return '# file: %s\n%s\n' % \
+               (C.acl_quote(acl.get_indexpath().encode('utf-8')), str(acl))
 
 def Record2ACL(record):
        """Convert text record to an AccessControlLists object"""
@@ -543,7 +551,8 @@
                raise metadata.ParsingError("Bad record beginning: "+ 
first_line)
        filename = first_line[8:]
        if filename == '.': index = ()
-       else: index = tuple(C.acl_unquote(filename).split('/'))
+       else: index = tuple(unicode(C.acl_unquote(filename.encode('utf-8')),
+                                               'utf-8').split('/'))
        return AccessControlLists(index, record[newline_pos:])
 
 class ACLExtractor(EAExtractor):
--- rdiff_backup/log.py 14 Aug 2007 02:58:20 -0000      1.25
+++ rdiff_backup/log.py 8 Apr 2009 21:51:57 -0000
@@ -125,7 +125,11 @@
                """Write the message to the log file, if possible"""
                if self.log_file_open:
                        if self.log_file_local:
-                               self.logfp.write(self.format(message, 
self.verbosity))
+                               str = self.format(message, self.verbosity)
+                               if type(str) != unicode:
+                                       str = unicode(str, 'utf-8')
+                               str = str.encode('utf-8')
+                               self.logfp.write(str)
                                self.logfp.flush()
                        else: self.log_file_conn.log.Log.log_to_file(message)
 
@@ -133,7 +137,10 @@
                """Write message to stdout/stderr"""
                if verbosity <= 2 or Globals.server: termfp = sys.stderr
                else: termfp = sys.stdout
-               termfp.write(self.format(message, self.term_verbosity))
+               str = self.format(message, self.term_verbosity)
+               if type(str) != unicode:
+                       str = unicode(str, 'utf-8')
+               termfp.write(str.encode('ascii', 'replace'))
 
        def conn(self, direction, result, req_num):
                """Log some data on the connection
@@ -165,10 +172,17 @@
        def exception_to_string(self, arglist = []):
                """Return string version of current exception plus what's in 
arglist"""
                type, value, tb = sys.exc_info()
-               s = ("Exception '%s' raised of class '%s':\n%s" %
-                        (value, type, "".join(traceback.format_tb(tb))))
+               s = (u"Exception '%s' raised of class '%s':\n%s" %
+                        (value, type, u"".join(traceback.format_tb(tb))))
+               s = s.encode('ascii', 'replace')
                if arglist:
-                       s += "__Arguments:\n" + "\n".join(map(str, arglist))
+                       s += "__Arguments:"
+                       for arg in arglist:
+                               s += "\n"
+                               try:
+                                       s += str(arg)
+                               except UnicodeError:
+                                       s += unicode(arg).encode('ascii', 
'replace')
                return s
 
        def exception(self, only_terminal = 0, verbosity = 5):
@@ -259,7 +273,8 @@
                """Return log string to put in error log"""
                assert (error_type == "ListError" or error_type == 
"UpdateError" or
                                error_type == "SpecialFileError"), "Unknown 
type "+error_type
-               return "%s %s %s" % (error_type, cls.get_indexpath(rp), 
str(exc))
+               str = u"%s %s %s" % (error_type, cls.get_indexpath(rp), 
unicode(exc))
+               return str.encode('utf-8')
 
        def close(cls):
                """Close the error log file"""
--- rdiff_backup/metadata.py    27 Sep 2008 00:17:24 -0000      1.32
+++ rdiff_backup/metadata.py    8 Apr 2009 17:58:42 -0000
@@ -55,7 +55,7 @@
 """
 
 from __future__ import generators
-import re, gzip, os, binascii
+import re, gzip, os, binascii, codecs
 import log, Globals, rpath, Time, robust, increment, static, rorpiter
 
 class ParsingError(Exception):
@@ -376,16 +376,18 @@
                        compress = 1
                if mode == 'r':
                        self.rp = rp_base
-                       self.fileobj = self.rp.open("rb", compress)
+                       self.fileobj = rpath.UnicodeFile(self.rp.open("rb", 
compress))
                else:
                        assert mode == 'w'
                        if compress and check_path and not 
rp_base.isinccompressed():
                                def callback(rp): self.rp = rp
-                               self.fileobj = rpath.MaybeGzip(rp_base, 
callback)
+                               self.fileobj = 
rpath.UnicodeFile(rpath.MaybeGzip(rp_base,
+                                                                               
                callback))
                        else:
                                self.rp = rp_base
                                assert not self.rp.lstat(), self.rp
-                               self.fileobj = self.rp.open("wb", compress = 
compress)
+                               self.fileobj = 
rpath.UnicodeFile(self.rp.open("wb", 
+                                                                               
                compress = compress))
 
        def write_record(self, record):
                """Write a (text) record into the file"""
--- rdiff_backup/rpath.py       8 Mar 2009 17:20:16 -0000       1.138
+++ rdiff_backup/rpath.py       8 Apr 2009 23:36:36 -0000
@@ -35,7 +35,7 @@
 
 """
 
-import os, stat, re, sys, shutil, gzip, socket, time, errno
+import os, stat, re, sys, shutil, gzip, socket, time, errno, codecs
 import Globals, Time, static, log, user_group, C
 
 try:
@@ -284,6 +284,8 @@
        """
        if os.name != 'nt':
                try:
+                       if type(filename) == unicode:
+                               filename = filename.encode('utf-8')
                        return C.make_file_dict(filename)
                except OSError, error:
                        # Unicode filenames should be process by the Python 
version 
@@ -333,7 +335,7 @@
        data['nlink'] = statblock[stat.ST_NLINK]
 
        if os.name == 'nt':
-               attribs = win32file.GetFileAttributes(filename)
+               attribs = win32file.GetFileAttributesW(filename)
                if attribs & winnt.FILE_ATTRIBUTE_REPARSE_POINT:
                        data['type'] = 'sym'
                        data['linkname'] = None
@@ -995,7 +997,10 @@
 
        def listdir(self):
                """Return list of string paths returned by os.listdir"""
-               return self.conn.os.listdir(self.path)
+               path = self.path
+               if type(path) != unicode:
+                       path = unicode(path, 'utf-8')
+               return self.conn.os.listdir(path)
 
        def symlink(self, linktext):
                """Make symlink at self.path pointing to linktext"""
@@ -1406,6 +1411,23 @@
                write_win_acl(self, acl)
                self.data['win_acl'] = acl
 
+class UnicodeFile:
+       """ Wraps a RPath and reads/writes unicode. """
+
+       def __init__(self, fileobj):
+               self.fileobj = fileobj
+
+       def read(self, length = -1):
+               return unicode(self.fileobj.read(length), 'utf-8')
+
+       def write(self, buf):
+               if type(buf) != unicode:
+                       buf = unicode(buf, 'utf-8')
+               return self.fileobj.write(buf.encode('utf-8'))
+
+       def close(self):
+               return self.fileobj.close()
+
 class RPathFileHook:
        """Look like a file, but add closing hook"""
        def __init__(self, file, closing_thunk):
@@ -1429,6 +1451,18 @@
        messages.  Use this class instead to clean those up.
 
        """
+       def __init__(self, filename=None, mode=None):
+               """ This is needed because we need to write an
+               encoded filename to the file, but use normal
+               unicode with the filename."""
+               if mode and 'b' not in mode:
+                       mode += 'b'
+               if type(filename) != unicode:
+                       filename = unicode(filename, 'utf-8')
+               fileobj = open(filename, mode or 'rb')
+               gzip.GzipFile.__init__(self, filename.encode('utf-8'),
+                                                       mode=mode, 
fileobj=fileobj)
+
        def __del__(self): pass
        def __getattr__(self, name):
                if name == 'fileno': return self.fileobj.fileno
--- rdiff_backup/statistics.py  3 Jan 2009 21:35:59 -0000       1.23
+++ rdiff_backup/statistics.py  8 Apr 2009 17:58:42 -0000
@@ -20,7 +20,7 @@
 """Generate and process aggregated backup information"""
 
 import re, os, time
-import Globals, Time, increment, log, static, metadata
+import Globals, Time, increment, log, static, metadata, rpath
 
 class StatsException(Exception): pass
 
@@ -219,13 +219,13 @@
 
        def write_stats_to_rp(self, rp):
                """Write statistics string to given rpath"""
-               fp = rp.open("wb")
+               fp = rpath.UnicodeFile(rp.open("wb"))
                fp.write(self.get_stats_string())
                assert not fp.close()
 
        def read_stats_from_rp(self, rp):
                """Set statistics from rpath, return self for convenience"""
-               fp = rp.open("r")
+               fp = rpath.UnicodeFile(rp.open("r"))
                self.set_stats_from_string(fp.read())
                fp.close()
                return self
@@ -364,7 +364,8 @@
                suffix = Globals.compression and 'data.gz' or 'data'
                cls._rp = increment.get_inc(rpbase, suffix, Time.curtime)
                assert not cls._rp.lstat()
-               cls._fileobj = cls._rp.open("wb", compress = 
Globals.compression)
+               cls._fileobj = rpath.UnicodeFile(cls._rp.open("wb", 
+                                                       compress = 
Globals.compression))
 
                cls._line_sep = Globals.null_separator and '\0' or '\n'
                cls.write_docstring()
--- rdiff_backup/win_acls.py    24 Nov 2008 22:07:32 -0000      1.4
+++ rdiff_backup/win_acls.py    8 Apr 2009 18:21:51 -0000
@@ -181,7 +181,7 @@
 
        def __str__(self):
                return '# file: %s\n%s\n' % \
-                               (C.acl_quote(self.get_indexpath()), 
unicode(self.__acl))
+                                       (self.get_indexpath(), 
unicode(self.__acl))
 
        def from_string(self, acl_str):
                lines = acl_str.splitlines()
@@ -189,7 +189,7 @@
                        raise metadata.ParsingError("Bad record beginning: " + 
lines[0][:8])
                filename = lines[0][8:]
                if filename == '.': self.index = ()
-               else: self.index = tuple(C.acl_unquote(filename).split('/'))
+               else: self.index = tuple(filename.split('/'))
                self.__acl = lines[1]
 
 def Record2WACL(record):

reply via email to

[Prev in Thread] Current Thread [Next in Thread]