>From 85eedb171016efa769c85ec1f9ee9188a7f79978 Mon Sep 17 00:00:00 2001 From: Richard Jones Date: Tue, 8 Dec 2009 13:09:31 +0000 Subject: [PATCH] Disk image exclusive and shared locks. Allow qemu to acquire shared and exclusive locks on disk images. This is done by extending the -drive option with an additional, optional parameter: -drive [...],lock=none -drive [...],lock=exclusive -drive [...],lock=shared lock=none is the default, and it means that we don't try to acquire any sort of lock. lock=exclusive tries to acquire an exclusive lock on the disk image. lock=shared tries to acquire a shared lock on the disk image. Multiple instances of qemu may all hold this sort of lock. If acquisition of a lock fails, opening the image fails. For disk types that have backing disks (eg. qcow2), the backing disk is opened with a shared lock if the front disk has any sort of lock. Otherwise the backing disk is not locked. Users must be cautious with the "commit" command which doesn't try to acquire an exclusive lock on the backing disk, but in any case it's almost never safe to use the "commit" command on any shared backing disk. Also implemented is a "lock" command in the monitor, allowing you to acquire (but not revoke) a lock after qemu has started. The implementation of locks only works for raw POSIX and Win32 files. However many of the other block types are implemented in terms of these drivers, so they "inherit" locking too. Other drivers are read-only, so don't require locking. Below we note only the cases where locking is *not* implemented: cloop - directly open()s the file, no locking implemented cow - same as cloop curl - protocol probably doesn't support locking nbd - same as curl Signed-off-by: Richard W.M. Jones --- block.c | 19 +++++++++++++++++-- block.h | 4 ++++ block/raw-posix.c | 25 +++++++++++++++++++++++++ block/raw-win32.c | 31 +++++++++++++++++++++++++++++++ block_int.h | 1 + monitor.c | 32 ++++++++++++++++++++++++++++++++ qemu-config.c | 4 ++++ qemu-monitor.hx | 19 +++++++++++++++++++ qemu-options.hx | 7 +++++++ qerror.c | 4 ++++ qerror.h | 3 +++ vl.c | 16 ++++++++++++++++ 12 files changed, 163 insertions(+), 2 deletions(-) diff --git a/block.c b/block.c index 3f3496e..0e69ba8 100644 --- a/block.c +++ b/block.c @@ -449,7 +449,7 @@ int bdrv_open2(BlockDriverState *bs, const char *filename, int flags, try_rw = !bs->read_only || bs->is_temporary; if (!(flags & BDRV_O_FILE)) open_flags = (try_rw ? BDRV_O_RDWR : 0) | - (flags & (BDRV_O_CACHE_MASK|BDRV_O_NATIVE_AIO)); + (flags & (BDRV_O_CACHE_MASK|BDRV_O_NATIVE_AIO|BDRV_O_LOCK_MASK)); else open_flags = flags & ~(BDRV_O_FILE | BDRV_O_SNAPSHOT); if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) @@ -480,14 +480,19 @@ int bdrv_open2(BlockDriverState *bs, const char *filename, int flags, if (bs->backing_file[0] != '\0') { /* if there is a backing file, use it */ BlockDriver *back_drv = NULL; + int back_drv_open_flags = open_flags; bs->backing_hd = bdrv_new(""); /* pass on read_only property to the backing_hd */ bs->backing_hd->read_only = bs->read_only; + /* if front disk is locked, lock backing disk shared */ + back_drv_open_flags &= ~BDRV_O_LOCK_MASK; + if (open_flags & BDRV_O_LOCK_MASK) + back_drv_open_flags |= BDRV_O_LOCK_SHARED; path_combine(backing_filename, sizeof(backing_filename), filename, bs->backing_file); if (bs->backing_format[0] != '\0') back_drv = bdrv_find_format(bs->backing_format); - ret = bdrv_open2(bs->backing_hd, backing_filename, open_flags, + ret = bdrv_open2(bs->backing_hd, backing_filename, back_drv_open_flags, back_drv); if (ret < 0) { bdrv_close(bs); @@ -1388,6 +1393,16 @@ int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) return drv->bdrv_get_info(bs, bdi); } +int bdrv_acquire_lock(BlockDriverState *bs, int lock_flags) +{ + BlockDriver *drv = bs->drv; + if (!drv) + return -ENOMEDIUM; + if (!drv->bdrv_acquire_lock) + return -ENOTSUP; + return drv->bdrv_acquire_lock(bs, lock_flags); +} + int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf, int64_t pos, int size) { diff --git a/block.h b/block.h index fa51ddf..bd15bbe 100644 --- a/block.h +++ b/block.h @@ -39,8 +39,11 @@ typedef struct QEMUSnapshotInfo { #define BDRV_O_NOCACHE 0x0020 /* do not use the host page cache */ #define BDRV_O_CACHE_WB 0x0040 /* use write-back caching */ #define BDRV_O_NATIVE_AIO 0x0080 /* use native AIO instead of the thread pool */ +#define BDRV_O_LOCK_SHARED 0x0100 /* fail unless we can lock shared */ +#define BDRV_O_LOCK_EXCLUSIVE 0x0200 /* fail unless we can lock exclusive */ #define BDRV_O_CACHE_MASK (BDRV_O_NOCACHE | BDRV_O_CACHE_WB) +#define BDRV_O_LOCK_MASK (BDRV_O_LOCK_SHARED | BDRV_O_LOCK_EXCLUSIVE) #define BDRV_SECTOR_BITS 9 #define BDRV_SECTOR_SIZE (1 << BDRV_SECTOR_BITS) @@ -170,6 +173,7 @@ const char *bdrv_get_device_name(BlockDriverState *bs); int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num, const uint8_t *buf, int nb_sectors); int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi); +int bdrv_acquire_lock(BlockDriverState *bs, int lock_flags); const char *bdrv_get_encrypted_filename(BlockDriverState *bs); void bdrv_get_backing_filename(BlockDriverState *bs, diff --git a/block/raw-posix.c b/block/raw-posix.c index 5a6a22b..4c3326c 100644 --- a/block/raw-posix.c +++ b/block/raw-posix.c @@ -128,6 +128,24 @@ static int64_t raw_getlength(BlockDriverState *bs); static int cdrom_reopen(BlockDriverState *bs); #endif +static int raw_acquire_lock(BlockDriverState *bs, int lock_flags) +{ + BDRVRawState *s = bs->opaque; + struct flock lk; + + if (lock_flags & BDRV_O_LOCK_SHARED) + lk.l_type = F_RDLCK; + else /* bdrv_flags & BDRV_O_LOCK_EXCLUSIVE */ + lk.l_type = F_WRLCK; + lk.l_whence = SEEK_SET; + lk.l_start = 0; + lk.l_len = 0; /* means lock the whole file */ + + if (fcntl (s->fd, F_SETLK, &lk) == -1) + return -errno; + return 0; +} + static int raw_open_common(BlockDriverState *bs, const char *filename, int bdrv_flags, int open_flags) { @@ -163,6 +181,11 @@ static int raw_open_common(BlockDriverState *bs, const char *filename, s->fd = fd; s->aligned_buf = NULL; + if (bdrv_flags & BDRV_O_LOCK_MASK) { + if (raw_acquire_lock(bs, bdrv_flags & BDRV_O_LOCK_MASK) < 0) + goto out_close; + } + if ((bdrv_flags & BDRV_O_NOCACHE)) { s->aligned_buf = qemu_blockalign(bs, ALIGNED_BUFFER_SIZE); if (s->aligned_buf == NULL) { @@ -768,6 +791,8 @@ static BlockDriver bdrv_raw = { .bdrv_truncate = raw_truncate, .bdrv_getlength = raw_getlength, + .bdrv_acquire_lock = raw_acquire_lock, + .create_options = raw_create_options, }; diff --git a/block/raw-win32.c b/block/raw-win32.c index 72acad5..6f89f03 100644 --- a/block/raw-win32.c +++ b/block/raw-win32.c @@ -73,6 +73,27 @@ static int set_sparse(int fd) NULL, 0, NULL, 0, &returned, NULL); } +static int raw_acquire_lock(BlockDriverState *bs, int lock_flags) +{ + BDRVRawState *s = bs->opaque; + DWORD flags; + OVERLAPPED ov; + + flags = LOCKFILE_FAIL_IMMEDIATELY; + if (lock_flags & BDRV_O_LOCK_EXCLUSIVE) + flags |= LOCKFILE_EXCLUSIVE_LOCK; + + memset(&ov, 0, sizeof(ov)); + ov.Offset = 0; + ov.OffsetHigh = 0; + + if (!LockFileEx(s->hfile, flags, 0, 1, 0, &ov)) + /* For compatibility with the POSIX lock failure ... */ + return -EAGAIN; + + return 0; +} + static int raw_open(BlockDriverState *bs, const char *filename, int flags) { BDRVRawState *s = bs->opaque; @@ -106,6 +127,15 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags) return -EACCES; return -1; } + + if (flags & BDRV_O_LOCK_MASK) { + int err; + + err = raw_acquire_lock(s, flags & BDRV_O_LOCK_MASK); + if (err < 0) + return err; + } + return 0; } @@ -253,6 +283,7 @@ static BlockDriver bdrv_raw = { .bdrv_write = raw_write, .bdrv_truncate = raw_truncate, .bdrv_getlength = raw_getlength, + .bdrv_acquire_lock = raw_acquire_lock, .create_options = raw_create_options, }; diff --git a/block_int.h b/block_int.h index 9a3b2e0..729d540 100644 --- a/block_int.h +++ b/block_int.h @@ -92,6 +92,7 @@ struct BlockDriver { int (*bdrv_snapshot_list)(BlockDriverState *bs, QEMUSnapshotInfo **psn_info); int (*bdrv_get_info)(BlockDriverState *bs, BlockDriverInfo *bdi); + int (*bdrv_acquire_lock)(BlockDriverState *bs, int lock_flags); int (*bdrv_save_vmstate)(BlockDriverState *bs, const uint8_t *buf, int64_t pos, int size); diff --git a/monitor.c b/monitor.c index d97d529..d00d5d4 100644 --- a/monitor.c +++ b/monitor.c @@ -890,6 +890,38 @@ static void do_block_set_passwd(Monitor *mon, const QDict *qdict, } } +static void do_block_lock(Monitor *mon, const QDict *qdict, + QObject **ret_data) +{ + BlockDriverState *bs; + const char *p; + int lock_flags; + + bs = bdrv_find(qdict_get_str(qdict, "device")); + if (!bs) { + qemu_error_new(QERR_DEVICE_NOT_FOUND, qdict_get_str(qdict, "device")); + return; + } + + p = qdict_get_str(qdict, "mode"); + if (!p) { + qemu_error_new(QERR_MISSING_PARAMETER, "mode"); + return; + } + if (!strcmp(p, "exclusive")) + lock_flags = BDRV_O_LOCK_EXCLUSIVE; + else if (!strcmp (p, "shared")) + lock_flags = BDRV_O_LOCK_SHARED; + else { + qemu_error_new(QERR_INVALID_PARAMETER, p); + return; + } + + if (bdrv_acquire_lock(bs, lock_flags) < 0) { + qemu_error_new(QERR_LOCK_ACQUIRE_FAILED); + } +} + static void do_change_block(Monitor *mon, const char *device, const char *filename, const char *fmt) { diff --git a/qemu-config.c b/qemu-config.c index c3203c8..df0d3fb 100644 --- a/qemu-config.c +++ b/qemu-config.c @@ -78,6 +78,10 @@ QemuOptsList qemu_drive_opts = { },{ .name = "readonly", .type = QEMU_OPT_BOOL, + },{ + .name = "lock", + .type = QEMU_OPT_STRING, + .help = "lock disk image (exclusive, shared, none)", }, { /* end if list */ } }, diff --git a/qemu-monitor.hx b/qemu-monitor.hx index c788c73..9474905 100644 --- a/qemu-monitor.hx +++ b/qemu-monitor.hx @@ -1063,6 +1063,25 @@ STEXI Set the encrypted device @var{device} password to @var{password} ETEXI + { + .name = "lock", + .args_type = "device:B,mode:s", + .params = "device [exclusive|shared]", + .help = "acquire a lock on an existing device", + .user_print = monitor_user_noop, + .mhandler.cmd_new = do_block_lock, + }, + +STEXI address@hidden lock @var{device} @var{mode} +Acquire a lock on @var{device}. The @var{mode} string should be address@hidden to acquire an exclusive lock, or @var{shared} to +acquire a shared lock. + +This does not attempt to lock the backing disk for formats like +qcow2 that can have backing storage. +ETEXI + STEXI @end table ETEXI diff --git a/qemu-options.hx b/qemu-options.hx index b8cc375..efc5f19 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -104,6 +104,7 @@ DEF("drive", HAS_ARG, QEMU_OPTION_drive, " [,cyls=c,heads=h,secs=s[,trans=t]][,snapshot=on|off]\n" " [,cache=writethrough|writeback|none][,format=f][,serial=s]\n" " [,addr=A][,id=name][,aio=threads|native]\n" + " [,lock=exclusive|shared|none]\n" " use 'file' as a drive image\n") DEF("set", HAS_ARG, QEMU_OPTION_set, "-set group.id.arg=value\n" @@ -149,6 +150,12 @@ an untrusted format header. This option specifies the serial number to assign to the device. @item address@hidden Specify the controller's PCI address (if=virtio only). address@hidden address@hidden +Acquire a lock on the disk image (@var{file}). +Available modes are: exclusive, shared, none. +The default is "none", meaning we don't try to acquire a lock. To +avoid multiple virtual machines trying to write to a disk at the +same time (which can cause disk corruption), use lock=exclusive. @end table By default, writethrough caching is used for all block device. This means that diff --git a/qerror.c b/qerror.c index 5f8fc5d..8b74a58 100644 --- a/qerror.c +++ b/qerror.c @@ -120,6 +120,10 @@ static const QErrorStringTable qerror_table[] = { .error_fmt = QERR_VNC_SERVER_FAILED, .desc = "Could not start VNC server on %(target)", }, + { + .error_fmt = QERR_LOCK_ACQUIRE_FAILED, + .desc = "Could not lock device with requested mode", + }, {} }; diff --git a/qerror.h b/qerror.h index 9e220d6..540bf8d 100644 --- a/qerror.h +++ b/qerror.h @@ -100,4 +100,7 @@ QError *qobject_to_qerror(const QObject *obj); #define QERR_VNC_SERVER_FAILED \ "{ 'class': 'VNCServerFailed', 'data': { 'target': %s } }" +#define QERR_LOCK_ACQUIRE_FAILED \ + "{ 'class': 'LockAcquireFailed', 'data': {} }" + #endif /* QERROR_H */ diff --git a/vl.c b/vl.c index c0d98f5..b114518 100644 --- a/vl.c +++ b/vl.c @@ -2110,6 +2110,7 @@ DriveInfo *drive_init(QemuOpts *opts, void *opaque, const char *devaddr; DriveInfo *dinfo; int snapshot = 0; + int lock_flags = 0; *fatal_error = 1; @@ -2300,6 +2301,19 @@ DriveInfo *drive_init(QemuOpts *opts, void *opaque, } } + if ((buf = qemu_opt_get(opts, "lock")) != NULL) { + if (!strcmp(buf, "none")) + /* nothing */; + else if (!strcmp(buf, "shared")) + lock_flags |= BDRV_O_LOCK_SHARED; + else if (!strcmp(buf, "exclusive")) + lock_flags |= BDRV_O_LOCK_EXCLUSIVE; + else { + fprintf(stderr, "qemu: invalid lock option\n"); + return NULL; + } + } + /* compute bus and unit according index */ if (index != -1) { @@ -2444,6 +2458,8 @@ DriveInfo *drive_init(QemuOpts *opts, void *opaque, (void)bdrv_set_read_only(dinfo->bdrv, 1); } + bdrv_flags |= lock_flags; + if (bdrv_open2(dinfo->bdrv, file, bdrv_flags, drv) < 0) { fprintf(stderr, "qemu: could not open disk image %s: %s\n", file, strerror(errno)); -- 1.6.5.2