qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH V8 08/14] Introduce file lock for the block laye


From: Stefan Berger
Subject: Re: [Qemu-devel] [PATCH V8 08/14] Introduce file lock for the block layer
Date: Thu, 01 Sep 2011 21:53:40 -0400
User-agent: Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.18) Gecko/20110621 Fedora/3.1.11-1.fc14 Lightning/1.0b3pre Thunderbird/3.1.11

On 09/01/2011 01:32 PM, Michael S. Tsirkin wrote:
On Wed, Aug 31, 2011 at 10:35:59AM -0400, Stefan Berger wrote:
This patch introduces file locking via fcntl() for the block layer so that
concurrent access to files shared by 2 Qemu instances, for example via NFS,
can be serialized. This feature is useful primarily during initial phases of
VM migration where the target machine's TIS driver validates the block
storage (and in a later patch checks for missing AES keys) and terminates
Qemu if the storage is found to be faulty. This then allows migration to
be gracefully terminated and Qemu continues running on the source machine.

Support for win32 is based on win32 API and has been lightly tested with a
standalone test program locking shared storage from two different machines.

To enable locking a file multiple times, a counter is used. Actual locking
happens the very first time and unlocking happens when the counter is zero.

v7:
  - fixed compilation error in win32 part

Signed-off-by: Stefan Berger<address@hidden>
Generally, what all other devices do is perform validation
as the last step in migration when device state
is restored. On failure, management can decide what to do:
retry migration or restart on source.

Why is TPM special and needs to be treated differently?



Some background on the TPM: Typically a TPM is a chip with built-in NVRAM where it can write its persistent state into. We are simulating this NVRAM through a file (QCoW2).

What's special about the TPM is that Qemu stores the libtpms-internal state onto that QCoW2 file whereas normally it's the OS that stores its data onto files accessed through BlockDriverState. This in turn is related to the fact that the TPM does not only have the internal state of the TPM TIS frontend but also that of the libtpms backend where for example keys and the owner's password are stored and have to be written into persistent storage.

More detail: Typically one starts out with an empty QCoW2 file created via qemu-img. Once Qemu starts and initializes the libtpms-based TPM, it tries to read existing state from that QCoW2 file. Since there is no state stored in the QCoW2, the TPM will start initializing itself to an initial 'blank' state. Then once the user has booted into the OS he can use the TPM. Assuming that user now creates an Endorsement Key (EK) using a command sent to the TPM, then this EK becomes part of the persistent state of the TPM and the persistent state is written into the QCoW2 file. The next time the VM is (cold) started that EK has to be there. Assuming now the user takes ownership of the TPM, then his password(s) also becomes part of the persistent state of the TPM and has to be written into the QCoW2 file. Again, the fact that ownership was taken leads to the requirement that the passwords are expected to be there upon another (cold) restart of the VM. All this is covered by storing the TPM's persistent state into that file.

I hope this explains what is special about the TPM.

  Stefan
---

---
  block.c           |   41 +++++++++++++++++++++++++++++++++++
  block.h           |    8 ++++++
  block/raw-posix.c |   63 
++++++++++++++++++++++++++++++++++++++++++++++++++++++
  block/raw-win32.c |   52 ++++++++++++++++++++++++++++++++++++++++++++
  block_int.h       |    4 +++
  5 files changed, 168 insertions(+)

Index: qemu-git/block.c
===================================================================
--- qemu-git.orig/block.c
+++ qemu-git/block.c
@@ -521,6 +521,8 @@ static int bdrv_open_common(BlockDriverS
          goto free_and_fail;
      }

+    drv->num_locks = 0;
+
      bs->keep_read_only = bs->read_only = !(open_flags&  BDRV_O_RDWR);

      ret = refresh_total_sectors(bs, bs->total_sectors);
@@ -1316,6 +1318,45 @@ void bdrv_get_geometry(BlockDriverState
      *nb_sectors_ptr = length;
  }

+/* file locking */
+static int bdrv_lock_common(BlockDriverState *bs, BDRVLockType lock_type)
+{
+    BlockDriver *drv = bs->drv;
+
+    if (!drv) {
+        return -ENOMEDIUM;
+    }
+
+    if (bs->file) {
+        drv = bs->file->drv;
+        if (drv->bdrv_lock) {
+            return drv->bdrv_lock(bs->file, lock_type);
+        }
+    }
+
+    if (drv->bdrv_lock) {
+        return drv->bdrv_lock(bs, lock_type);
+    }
+
+    return -ENOTSUP;
+}
+
+
+int bdrv_lock(BlockDriverState *bs)
+{
+    if (bdrv_is_read_only(bs)) {
+        return bdrv_lock_common(bs, BDRV_F_RDLCK);
+    }
+
+    return bdrv_lock_common(bs, BDRV_F_WRLCK);
+}
+
+void bdrv_unlock(BlockDriverState *bs)
+{
+    bdrv_lock_common(bs, BDRV_F_UNLCK);
+}
+
+
  struct partition {
          uint8_t boot_ind;           /* 0x80 - active */
          uint8_t head;               /* starting head */
Index: qemu-git/block.h
===================================================================
--- qemu-git.orig/block.h
+++ qemu-git/block.h
@@ -43,6 +43,12 @@ typedef struct QEMUSnapshotInfo {
  #define BDRV_SECTOR_MASK   ~(BDRV_SECTOR_SIZE - 1)

  typedef enum {
+    BDRV_F_UNLCK,
+    BDRV_F_RDLCK,
+    BDRV_F_WRLCK,
+} BDRVLockType;
+
+typedef enum {
      BLOCK_ERR_REPORT, BLOCK_ERR_IGNORE, BLOCK_ERR_STOP_ENOSPC,
      BLOCK_ERR_STOP_ANY
  } BlockErrorAction;
@@ -100,6 +106,8 @@ int bdrv_commit(BlockDriverState *bs);
  void bdrv_commit_all(void);
  int bdrv_change_backing_file(BlockDriverState *bs,
      const char *backing_file, const char *backing_fmt);
+int bdrv_lock(BlockDriverState *bs);
+void bdrv_unlock(BlockDriverState *bs);
  void bdrv_register(BlockDriver *bdrv);


Index: qemu-git/block/raw-posix.c
===================================================================
--- qemu-git.orig/block/raw-posix.c
+++ qemu-git/block/raw-posix.c
@@ -803,6 +803,67 @@ static int64_t raw_get_allocated_file_si
      return (int64_t)st.st_blocks * 512;
  }

+static int raw_lock(BlockDriverState *bs, BDRVLockType lock_type)
+{
+    BlockDriver *drv = bs->drv;
+    BDRVRawState *s = bs->opaque;
+    struct flock flock = {
+        .l_whence = SEEK_SET,
+        .l_start = 0,
+        .l_len = 0,
+    };
+    int n;
+
+    switch (lock_type) {
+    case BDRV_F_RDLCK:
+    case BDRV_F_WRLCK:
+        if (drv->num_locks) {
+            drv->num_locks++;
+            return 0;
+        }
+        flock.l_type = (lock_type == BDRV_F_RDLCK) ? F_RDLCK : F_WRLCK;
+        break;
+
+    case BDRV_F_UNLCK:
+        if (--drv->num_locks>  0) {
+            return 0;
+        }
+
+        assert(drv->num_locks == 0);
+
+        flock.l_type = F_UNLCK;
+        break;
+
+    default:
+        return -EINVAL;
+    }
+
+    while (1) {
+        n = fcntl(s->fd, F_SETLKW,&flock);
+        if (n<  0) {
+            if (errno == EINTR) {
+                continue;
+            }
+            if (errno == EAGAIN) {
+                usleep(10000);
+                continue;
+            }
+        }
+        break;
+    }
+
+    if (n == 0&&
+        ((lock_type == BDRV_F_RDLCK) || (lock_type == BDRV_F_WRLCK))) {
+        drv->num_locks = 1;
+    }
+
+    if (n) {
+        return -errno;
+    }
+
+    return 0;
+}
+
  static int raw_create(const char *filename, QEMUOptionParameter *options)
  {
      int fd;
@@ -901,6 +962,8 @@ static BlockDriver bdrv_file = {
      .bdrv_get_allocated_file_size
                          = raw_get_allocated_file_size,

+    .bdrv_lock = raw_lock,
+
      .create_options = raw_create_options,
  };

Index: qemu-git/block_int.h
===================================================================
--- qemu-git.orig/block_int.h
+++ qemu-git/block_int.h
@@ -146,6 +146,10 @@ struct BlockDriver {
       */
      int (*bdrv_has_zero_init)(BlockDriverState *bs);

+    /* File locking */
+    int num_locks;
+    int (*bdrv_lock)(BlockDriverState *bs, BDRVLockType lock_type);
+
      QLIST_ENTRY(BlockDriver) list;
  };

Index: qemu-git/block/raw-win32.c
===================================================================
--- qemu-git.orig/block/raw-win32.c
+++ qemu-git/block/raw-win32.c
@@ -242,6 +242,57 @@ static int64_t raw_get_allocated_file_si
      return st.st_size;
  }

+static int raw_lock(BlockDriverState *bs, int lock_type)
+{
+    BlockDriver *drv = bs->drv;
+    BDRVRawState *s = bs->opaque;
+    OVERLAPPED ov;
+    BOOL res;
+    DWORD num_bytes;
+
+    switch (lock_type) {
+    case BDRV_F_RDLCK:
+    case BDRV_F_WRLCK:
+        if (drv->num_locks) {
+            drv->num_locks++;
+            return 0;
+        }
+
+        memset(&ov, 0, sizeof(ov));
+
+        res = LockFileEx(s->hfile, LOCKFILE_EXCLUSIVE_LOCK, 0, ~0, ~0,&ov);
+
+        if (res == FALSE) {
+            res = GetOverlappedResult(s->hfile,&ov,&num_bytes, TRUE);
+        }
+
+        if (res == TRUE) {
+            drv->num_locks = 1;
+        }
+
+        break;
+
+    case BDRV_F_UNLCK:
+        if (--drv->num_locks>  0) {
+            return 0;
+        }
+
+        assert(drv->num_locks>= 0);
+
+        res = UnlockFile(s->hfile, 0, 0, ~0, ~0);
+        break;
+
+    default:
+        return -EINVAL;
+    }
+
+    if (res == FALSE) {
+        return -EIO;
+    }
+
+    return 0;
+}
+
  static int raw_create(const char *filename, QEMUOptionParameter *options)
  {
      int fd;
@@ -289,6 +340,7 @@ static BlockDriver bdrv_file = {
      .bdrv_get_allocated_file_size
                          = raw_get_allocated_file_size,

+    .bdrv_lock         = raw_lock,
      .create_options = raw_create_options,
  };






reply via email to

[Prev in Thread] Current Thread [Next in Thread]