[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[rdiff-backup-users] [PATCH] Unicode support on Windows
From: |
Josh Nisly |
Subject: |
[rdiff-backup-users] [PATCH] Unicode support on Windows |
Date: |
Wed, 08 Apr 2009 21:48:19 -0500 |
User-agent: |
Thunderbird 2.0.0.21 (X11/20090318) |
Attached is a patch to support Unicode on Windows. This fixes support
for filenames with non-ascii characters, and paves the way for long
filename support. Also attached is a patch to prove the correct
behavior, which can be run on Windows or Unix.
Josh Nisly
#!/usr/bin/python
import os
import shutil
import subprocess
import time
if os.name == 'nt':
RDIFF_EXE='output\\rdiff-backup.exe'
else:
RDIFF_EXE='./rdiff-backup'
TEMP_DIR=u'unicode_test'
SRC_DIR=os.path.join(TEMP_DIR, 'src')
DEST_DIR=os.path.join(TEMP_DIR, 'dest')
RESTORE_DIR=os.path.join(TEMP_DIR, 'restore')
UNICODE_NAME=u'\u3046\u3069\u3093\u5c4b.txt'
def write_file(path, text):
file = open(path, 'w')
file.write(text)
file.close()
def run_backup(new_metadata_format = True):
cmd = [RDIFF_EXE]
cmd += [SRC_DIR, DEST_DIR]
print cmd
proc = subprocess.Popen(cmd)
proc.wait()
if proc.returncode:
raise ValueError, 'Backup failed!'
# Ugly hack: sleep for 1 second so that the next backup
# doesn't run at the same time
time.sleep(1)
def run_restore(time='now'):
cmd = [RDIFF_EXE, '-r', time, DEST_DIR, RESTORE_DIR]
print cmd
proc = subprocess.Popen(cmd)
proc.wait()
def test_with_file(filename):
print 'Testing', filename.encode('ascii', 'replace')
is_unicode_filename = type(filename) == unicode
for dir in [TEMP_DIR, SRC_DIR, DEST_DIR]:
if os.path.exists(dir):
shutil.rmtree(dir)
if not os.path.exists(dir):
os.mkdir(dir)
src_file_path=os.path.join(SRC_DIR, filename)
write_file(src_file_path, '')
if os.name != 'nt':
os.system('setfattr -n user.foo -v bar "%s"' %
src_file_path.encode('utf-8'))
os.system('setfacl -m user:joshn:rwx "%s"' %
src_file_path.encode('utf-8'))
run_backup()
write_file(src_file_path, 'first change')
run_backup(is_unicode_filename)
# Try a restore, and make sure that the directory listing looks right
run_restore()
dir_listing = os.listdir(RESTORE_DIR)
assert dir_listing == [filename], dir_listing
shutil.rmtree(RESTORE_DIR)
run_restore('1B')
dir_listing = os.listdir(RESTORE_DIR)
test_with_file('simple.txt')
test_with_file(u'\u3046\u3069\u3093\u5c4b.txt')
if os.name != 'nt':
test_with_file('line1\tline2')
test_with_file('line1\nline2')
--- rdiff_backup/FilenameMapping.py 3 Jan 2009 21:32:40 -0000 1.19
+++ rdiff_backup/FilenameMapping.py 8 Apr 2009 19:15:05 -0000
@@ -158,7 +158,10 @@
correctly and append()ed to the currect QuotedRPath.
"""
- return map(unquote, self.conn.os.listdir(self.path))
+ path = self.path
+ if type(path) != unicode:
+ path = unicode(path, 'utf-8')
+ return map(unquote, self.conn.os.listdir(path))
def __str__(self):
return "QuotedPath: %s\nIndex: %s\nData: %s" % \
--- rdiff_backup/eas_acls.py 2 Mar 2009 18:02:58 -0000 1.43
+++ rdiff_backup/eas_acls.py 8 Apr 2009 23:42:53 -0000
@@ -57,7 +57,8 @@
def read_from_rp(self, rp):
"""Set the extended attributes from an rpath"""
try:
- attr_list = rp.conn.xattr.listxattr(rp.path, rp.issym())
+ attr_list =
rp.conn.xattr.listxattr(rp.path.encode('utf-8'),
+
rp.issym())
except IOError, exc:
if exc[0] in (errno.EOPNOTSUPP, errno.EPERM,
errno.ETXTBSY):
return # if not supported, consider empty
@@ -74,7 +75,8 @@
continue
try:
self.attr_dict[attr] = \
- rp.conn.xattr.getxattr(rp.path, attr,
rp.issym())
+
rp.conn.xattr.getxattr(rp.path.encode('utf-8'),
+
attr, rp.issym())
except IOError, exc:
# File probably modified while reading, just
continue
if exc[0] == errno.ENODATA: continue
@@ -86,9 +88,11 @@
def clear_rp(self, rp):
"""Delete all the extended attributes in rpath"""
try:
- for name in rp.conn.xattr.listxattr(rp.path,
rp.issym()):
+ for name in
rp.conn.xattr.listxattr(rp.path.encode('utf-8'),
+
rp.issym()):
try:
- rp.conn.xattr.removexattr(rp.path,
name, rp.issym())
+
rp.conn.xattr.removexattr(rp.path.encode('utf-8'),
+
name, rp.issym())
except IOError, exc:
# SELinux attributes cannot be removed,
and we don't want
# to bail out or be too noisy at low
log levels.
@@ -111,7 +115,8 @@
self.clear_rp(rp)
for (name, value) in self.attr_dict.iteritems():
try:
- rp.conn.xattr.setxattr(rp.path, name, value, 0,
rp.issym())
+ rp.conn.xattr.setxattr(rp.path.encode('utf-8'),
name,
+
value, 0, rp.issym())
except IOError, exc:
# Mac and Linux attributes have different
namespaces, so
# fail gracefully if can't call setxattr
@@ -149,13 +154,14 @@
def EA2Record(ea):
"""Convert ExtendedAttributes object to text record"""
- str_list = ['# file: %s' % C.acl_quote(ea.get_indexpath())]
+ str_list = ['# file: %s' %
C.acl_quote(ea.get_indexpath().encode('utf-8'))]
for (name, val) in ea.attr_dict.iteritems():
if not val: str_list.append(name)
else:
encoded_val = base64.encodestring(val).replace('\n', '')
try:
- str_list.append('%s=0s%s' % (C.acl_quote(name),
encoded_val))
+ str_list.append('%s=0s%s' %
(C.acl_quote(name.encode('utf-8')),
+
encoded_val))
except UnicodeEncodeError:
log.Log("Warning: unable to store Unicode
extended attribute %s"
% repr(name), 3)
@@ -169,7 +175,8 @@
raise metadata.ParsingError("Bad record beginning: " +
first[:8])
filename = first[8:]
if filename == '.': index = ()
- else: index = tuple(C.acl_unquote(filename).split('/'))
+ else: index = tuple(unicode(C.acl_unquote(filename.encode('utf-8')),
+
'utf-8').split('/'))
ea = ExtendedAttributes(index)
for line in lines:
@@ -194,7 +201,7 @@
def filename_to_index(self, filename):
"""Convert possibly quoted filename to index tuple"""
if filename == '.': return ()
- else: return tuple(C.acl_unquote(filename).split('/'))
+ else: return
tuple(C.acl_unquote(filename.encode('utf-8')).split('/'))
class ExtendedAttributesFile(metadata.FlatFile):
"""Store/retrieve EAs from extended_attributes file"""
@@ -379,7 +386,7 @@
else: acl = posix1e.ACL()
try:
- acl.applyto(rp.path)
+ acl.applyto(rp.path.encode('utf-8'))
except IOError, exc:
if exc[0] == errno.EOPNOTSUPP:
log.Log("Warning: unable to set ACL on %s: %s" %
@@ -391,12 +398,12 @@
if default_entry_list:
def_acl = list_to_acl(default_entry_list, map_names)
else: def_acl = posix1e.ACL()
- def_acl.applyto(rp.path, posix1e.ACL_TYPE_DEFAULT)
+ def_acl.applyto(rp.path.encode('utf-8'),
posix1e.ACL_TYPE_DEFAULT)
def get_acl_lists_from_rp(rp):
"""Returns (acl_list, def_acl_list) from an rpath. Call locally"""
assert rp.conn is Globals.local_connection
- try: acl = posix1e.ACL(file=rp.path)
+ try: acl = posix1e.ACL(file=rp.path.encode('utf-8'))
except IOError, exc:
if exc[0] == errno.EOPNOTSUPP:
acl = None
@@ -406,7 +413,7 @@
acl = None
else: raise
if rp.isdir():
- try: def_acl = posix1e.ACL(filedef=rp.path)
+ try: def_acl = posix1e.ACL(filedef=rp.path.encode('utf-8'))
except IOError, exc:
if exc[0] == errno.EOPNOTSUPP:
def_acl = None
@@ -533,7 +540,8 @@
def ACL2Record(acl):
"""Convert an AccessControlLists object into a text record"""
- return '# file: %s\n%s\n' % (C.acl_quote(acl.get_indexpath()), str(acl))
+ return '# file: %s\n%s\n' % \
+ (C.acl_quote(acl.get_indexpath().encode('utf-8')), str(acl))
def Record2ACL(record):
"""Convert text record to an AccessControlLists object"""
@@ -543,7 +551,8 @@
raise metadata.ParsingError("Bad record beginning: "+
first_line)
filename = first_line[8:]
if filename == '.': index = ()
- else: index = tuple(C.acl_unquote(filename).split('/'))
+ else: index = tuple(unicode(C.acl_unquote(filename.encode('utf-8')),
+ 'utf-8').split('/'))
return AccessControlLists(index, record[newline_pos:])
class ACLExtractor(EAExtractor):
--- rdiff_backup/log.py 14 Aug 2007 02:58:20 -0000 1.25
+++ rdiff_backup/log.py 8 Apr 2009 21:51:57 -0000
@@ -125,7 +125,11 @@
"""Write the message to the log file, if possible"""
if self.log_file_open:
if self.log_file_local:
- self.logfp.write(self.format(message,
self.verbosity))
+ str = self.format(message, self.verbosity)
+ if type(str) != unicode:
+ str = unicode(str, 'utf-8')
+ str = str.encode('utf-8')
+ self.logfp.write(str)
self.logfp.flush()
else: self.log_file_conn.log.Log.log_to_file(message)
@@ -133,7 +137,10 @@
"""Write message to stdout/stderr"""
if verbosity <= 2 or Globals.server: termfp = sys.stderr
else: termfp = sys.stdout
- termfp.write(self.format(message, self.term_verbosity))
+ str = self.format(message, self.term_verbosity)
+ if type(str) != unicode:
+ str = unicode(str, 'utf-8')
+ termfp.write(str.encode('ascii', 'replace'))
def conn(self, direction, result, req_num):
"""Log some data on the connection
@@ -165,10 +172,17 @@
def exception_to_string(self, arglist = []):
"""Return string version of current exception plus what's in
arglist"""
type, value, tb = sys.exc_info()
- s = ("Exception '%s' raised of class '%s':\n%s" %
- (value, type, "".join(traceback.format_tb(tb))))
+ s = (u"Exception '%s' raised of class '%s':\n%s" %
+ (value, type, u"".join(traceback.format_tb(tb))))
+ s = s.encode('ascii', 'replace')
if arglist:
- s += "__Arguments:\n" + "\n".join(map(str, arglist))
+ s += "__Arguments:"
+ for arg in arglist:
+ s += "\n"
+ try:
+ s += str(arg)
+ except UnicodeError:
+ s += unicode(arg).encode('ascii',
'replace')
return s
def exception(self, only_terminal = 0, verbosity = 5):
@@ -259,7 +273,8 @@
"""Return log string to put in error log"""
assert (error_type == "ListError" or error_type ==
"UpdateError" or
error_type == "SpecialFileError"), "Unknown
type "+error_type
- return "%s %s %s" % (error_type, cls.get_indexpath(rp),
str(exc))
+ str = u"%s %s %s" % (error_type, cls.get_indexpath(rp),
unicode(exc))
+ return str.encode('utf-8')
def close(cls):
"""Close the error log file"""
--- rdiff_backup/metadata.py 27 Sep 2008 00:17:24 -0000 1.32
+++ rdiff_backup/metadata.py 8 Apr 2009 17:58:42 -0000
@@ -55,7 +55,7 @@
"""
from __future__ import generators
-import re, gzip, os, binascii
+import re, gzip, os, binascii, codecs
import log, Globals, rpath, Time, robust, increment, static, rorpiter
class ParsingError(Exception):
@@ -376,16 +376,18 @@
compress = 1
if mode == 'r':
self.rp = rp_base
- self.fileobj = self.rp.open("rb", compress)
+ self.fileobj = rpath.UnicodeFile(self.rp.open("rb",
compress))
else:
assert mode == 'w'
if compress and check_path and not
rp_base.isinccompressed():
def callback(rp): self.rp = rp
- self.fileobj = rpath.MaybeGzip(rp_base,
callback)
+ self.fileobj =
rpath.UnicodeFile(rpath.MaybeGzip(rp_base,
+
callback))
else:
self.rp = rp_base
assert not self.rp.lstat(), self.rp
- self.fileobj = self.rp.open("wb", compress =
compress)
+ self.fileobj =
rpath.UnicodeFile(self.rp.open("wb",
+
compress = compress))
def write_record(self, record):
"""Write a (text) record into the file"""
--- rdiff_backup/rpath.py 8 Mar 2009 17:20:16 -0000 1.138
+++ rdiff_backup/rpath.py 8 Apr 2009 23:36:36 -0000
@@ -35,7 +35,7 @@
"""
-import os, stat, re, sys, shutil, gzip, socket, time, errno
+import os, stat, re, sys, shutil, gzip, socket, time, errno, codecs
import Globals, Time, static, log, user_group, C
try:
@@ -284,6 +284,8 @@
"""
if os.name != 'nt':
try:
+ if type(filename) == unicode:
+ filename = filename.encode('utf-8')
return C.make_file_dict(filename)
except OSError, error:
# Unicode filenames should be process by the Python
version
@@ -333,7 +335,7 @@
data['nlink'] = statblock[stat.ST_NLINK]
if os.name == 'nt':
- attribs = win32file.GetFileAttributes(filename)
+ attribs = win32file.GetFileAttributesW(filename)
if attribs & winnt.FILE_ATTRIBUTE_REPARSE_POINT:
data['type'] = 'sym'
data['linkname'] = None
@@ -995,7 +997,10 @@
def listdir(self):
"""Return list of string paths returned by os.listdir"""
- return self.conn.os.listdir(self.path)
+ path = self.path
+ if type(path) != unicode:
+ path = unicode(path, 'utf-8')
+ return self.conn.os.listdir(path)
def symlink(self, linktext):
"""Make symlink at self.path pointing to linktext"""
@@ -1406,6 +1411,23 @@
write_win_acl(self, acl)
self.data['win_acl'] = acl
+class UnicodeFile:
+ """ Wraps a RPath and reads/writes unicode. """
+
+ def __init__(self, fileobj):
+ self.fileobj = fileobj
+
+ def read(self, length = -1):
+ return unicode(self.fileobj.read(length), 'utf-8')
+
+ def write(self, buf):
+ if type(buf) != unicode:
+ buf = unicode(buf, 'utf-8')
+ return self.fileobj.write(buf.encode('utf-8'))
+
+ def close(self):
+ return self.fileobj.close()
+
class RPathFileHook:
"""Look like a file, but add closing hook"""
def __init__(self, file, closing_thunk):
@@ -1429,6 +1451,18 @@
messages. Use this class instead to clean those up.
"""
+ def __init__(self, filename=None, mode=None):
+ """ This is needed because we need to write an
+ encoded filename to the file, but use normal
+ unicode with the filename."""
+ if mode and 'b' not in mode:
+ mode += 'b'
+ if type(filename) != unicode:
+ filename = unicode(filename, 'utf-8')
+ fileobj = open(filename, mode or 'rb')
+ gzip.GzipFile.__init__(self, filename.encode('utf-8'),
+ mode=mode,
fileobj=fileobj)
+
def __del__(self): pass
def __getattr__(self, name):
if name == 'fileno': return self.fileobj.fileno
--- rdiff_backup/statistics.py 3 Jan 2009 21:35:59 -0000 1.23
+++ rdiff_backup/statistics.py 8 Apr 2009 17:58:42 -0000
@@ -20,7 +20,7 @@
"""Generate and process aggregated backup information"""
import re, os, time
-import Globals, Time, increment, log, static, metadata
+import Globals, Time, increment, log, static, metadata, rpath
class StatsException(Exception): pass
@@ -219,13 +219,13 @@
def write_stats_to_rp(self, rp):
"""Write statistics string to given rpath"""
- fp = rp.open("wb")
+ fp = rpath.UnicodeFile(rp.open("wb"))
fp.write(self.get_stats_string())
assert not fp.close()
def read_stats_from_rp(self, rp):
"""Set statistics from rpath, return self for convenience"""
- fp = rp.open("r")
+ fp = rpath.UnicodeFile(rp.open("r"))
self.set_stats_from_string(fp.read())
fp.close()
return self
@@ -364,7 +364,8 @@
suffix = Globals.compression and 'data.gz' or 'data'
cls._rp = increment.get_inc(rpbase, suffix, Time.curtime)
assert not cls._rp.lstat()
- cls._fileobj = cls._rp.open("wb", compress =
Globals.compression)
+ cls._fileobj = rpath.UnicodeFile(cls._rp.open("wb",
+ compress =
Globals.compression))
cls._line_sep = Globals.null_separator and '\0' or '\n'
cls.write_docstring()
--- rdiff_backup/win_acls.py 24 Nov 2008 22:07:32 -0000 1.4
+++ rdiff_backup/win_acls.py 8 Apr 2009 18:21:51 -0000
@@ -181,7 +181,7 @@
def __str__(self):
return '# file: %s\n%s\n' % \
- (C.acl_quote(self.get_indexpath()),
unicode(self.__acl))
+ (self.get_indexpath(),
unicode(self.__acl))
def from_string(self, acl_str):
lines = acl_str.splitlines()
@@ -189,7 +189,7 @@
raise metadata.ParsingError("Bad record beginning: " +
lines[0][:8])
filename = lines[0][8:]
if filename == '.': self.index = ()
- else: self.index = tuple(C.acl_unquote(filename).split('/'))
+ else: self.index = tuple(filename.split('/'))
self.__acl = lines[1]
def Record2WACL(record):
- [rdiff-backup-users] [PATCH] Unicode support on Windows,
Josh Nisly <=