qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [PATCH] Snapshot block device support


From: Rusty Russell
Subject: [Qemu-devel] [PATCH] Snapshot block device support
Date: Wed, 02 Jul 2003 16:50:41 +1000

Hi Fabrice,

        I haven't got the IDE emulation to work for me yet (is it
supposed to yet?), but this allows the "-snapshot" option and "C-a s"
to commit the disks.  The blocks which change are committed to backing
store, and a bitmap of changed blocks is kept.

Diff + new test file below.
Rusty.
--
  Anyone who quotes me in their sig is an idiot. -- Rusty Russell.

tests/test_block.c:
================
#define _GNU_SOURCE /* For lseek64 */
#include "../block.c"

#define NUM_SECTORS 2560

static int memisset(const void *mem, int c, size_t len)
{
        size_t i;

        for (i = 0; i < len; i++)
                if (((unsigned char *)mem)[i] != (unsigned char)c)
                        return 0;
        return 1;
}

static int read_test(BlockDriverState *bs, unsigned char contents[])
{
        int i;
        unsigned char sector[512];
        unsigned char ten_sectors[5120];

        /* Single read test. */
        for (i = 0; i < NUM_SECTORS; i++) {
                if (bdrv_read(bs, i, sector, 1) != 0)
                        return 0;
                if (!memisset(sector, contents[i], 512))
                        return 0;
        }

        /* Multiple read test. */
        for (i = 0; i < NUM_SECTORS - 10; i++) {
                int j;
                if (bdrv_read(bs, i, ten_sectors, 10) != 0)
                        return 0;

                for (j = 0; j < 10; j++)
                        if (!memisset(ten_sectors + j*512, contents[i+j], 512))
                                return 0;
        }
        return 1;
}

int main(int argc, char *argv[])
{
        int fd, i;
        unsigned char sector[512];
        unsigned char ten_sectors[5120];
        unsigned char contents[NUM_SECTORS];
        BlockDriverState *bs;

        fd = open(argv[1], O_RDWR|O_CREAT|O_TRUNC, 0600);
        for (i = 0; i < NUM_SECTORS; i++) {
                memset(sector, i, sizeof(sector));
                if (write(fd, sector, sizeof(sector)) != sizeof(sector))
                        abort();
                contents[i] = i;
        }
        close(fd);

        bs = bdrv_open(argv[1], argv[2] ? 1 : 0);

        /* Test data is as we expect. */
        if (!read_test(bs, contents))
                abort();

        /* Single write test. */
        memset(sector, 2, sizeof(sector));
        if (bdrv_write(bs, 0, sector, 1) != 0)
                abort();
        contents[0] = 2;
        if (!read_test(bs, contents))
                abort();

        memset(sector, 0, sizeof(sector));
        if (bdrv_read(bs, 0, sector, 1) != 0)
                abort();
        if (!memisset(sector, 2, sizeof(sector)))
                abort();

        /* Random test */
        for (i = 0; i < 10000; i++) {
                int j;
                int num_sectors = (random() % 10) + 1;
                int sect_start = random() % (NUM_SECTORS - num_sectors);

                if (random() % 2) {
                        if (bdrv_read(bs,sect_start,ten_sectors,num_sectors))
                                abort();
                        for (j = 0; j < num_sectors; j++)
                                if (!memisset(ten_sectors + j*512,
                                              contents[sect_start + j],
                                              512))
                                        abort();
                } else {
                        for (j = 0; j < num_sectors; j++) {
                                contents[sect_start + j] = random();
                                memset(ten_sectors + j*512,
                                       contents[sect_start + j],
                                       512);
                        }
                        if (bdrv_write(bs,sect_start,ten_sectors,num_sectors))
                                abort();
                }
        }

        if (!read_test(bs, contents))
                abort();

        if (argv[2]) {
                /* Test that it hasn't touched initial file. */
                fd = open(argv[1], O_RDONLY);
                if (fd < 0)
                        abort();
                for (i = 0; i < NUM_SECTORS; i++) {
                        if (read(fd, sector, sizeof(sector)) != sizeof(sector))
                                abort();
                        if (!memisset(sector, i, sizeof(sector)))
                                abort();
                }
                close(fd);

                /* Test that commit works. */
                bdrv_commit(bs);
                fd = open(argv[1], O_RDONLY);
                if (fd < 0)
                        abort();
                for (i = 0; i < NUM_SECTORS; i++) {
                        if (read(fd, sector, sizeof(sector)) != sizeof(sector))
                                abort();
                        if (!memisset(sector, contents[i], sizeof(sector)))
                                abort();
                }
                close(fd);
        }

        printf("All tests on %s%s passed!\n", argv[1],
               argv[2] ? " (with undo)" : "");
        return 0;
}
================
Index: block.c
===================================================================
RCS file: /cvsroot/qemu/qemu/block.c,v
retrieving revision 1.2
diff -u -r1.2 block.c
--- block.c     30 Jun 2003 23:17:31 -0000      1.2
+++ block.c     2 Jul 2003 06:45:08 -0000
@@ -45,9 +45,14 @@
     int fd;
     int64_t total_sectors;
     int read_only;
+    const char *filename;
+
+    /* If snapshot set, this is nonnull and these are used. */
+    unsigned char *changes_map;
+    int changes_fd;
 };
 
-BlockDriverState *bdrv_open(const char *filename)
+BlockDriverState *bdrv_open(const char *filename, int snapshot)
 {
     BlockDriverState *bs;
     int fd;
@@ -56,12 +61,12 @@
     bs = malloc(sizeof(BlockDriverState));
     if(!bs)
         return NULL;
+
     bs->read_only = 0;
     fd = open(filename, O_RDWR);
     if (fd < 0) {
         fd = open(filename, O_RDONLY);
         if (fd < 0) {
-            close(fd);
             free(bs);
             return NULL;
         }
@@ -70,44 +75,176 @@
     size = lseek64(fd, 0, SEEK_END);
     bs->total_sectors = size / 512;
     bs->fd = fd;
+    bs->filename = filename;
+
+    if (snapshot) {
+       /* Lazy paging in of /dev/zero for changes bitmap. */
+       int dev_zero;
+       char template[] = "/tmp/vl.XXXXXX";
+
+       dev_zero = open("/dev/zero", O_RDONLY);
+       if (dev_zero < 0) {
+           close(fd);
+           free(bs);
+           return NULL;
+       }
+
+       bs->changes_map = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_PRIVATE,
+                              dev_zero, 0);
+       close(dev_zero);
+       if (bs->changes_map == MAP_FAILED) {
+           close(fd);
+           free(bs);
+           return NULL;
+       }
+
+       /* Now, create a (sparse) temporary file for backing blocks. */
+       bs->changes_fd = mkstemp(template);
+       if (bs->changes_fd < 0) {
+           munmap(bs->changes_map, bs->total_sectors * 512);
+           close(fd);
+           free(bs);
+           return NULL;
+       }
+       /* Delete it. */
+       unlink(template);
+    } else {
+       bs->changes_map = NULL;
+    }
+
     return bs;
 }
 
 void bdrv_close(BlockDriverState *bs)
 {
     close(bs->fd);
+    if (bs->changes_map) {
+       munmap(bs->changes_map, bs->total_sectors * 512);
+       close(bs->changes_fd);
+    }
     free(bs);
 }
 
+static inline void set_bit(unsigned char *bitmap, int64_t bitnum)
+{
+    bitmap[bitnum / 8] |= (1 << (bitnum%8));
+}
+
+static inline int is_bit_set(const unsigned char *bitmap, int64_t bitnum)
+{
+    return !!(bitmap[bitnum / 8] & (1 << (bitnum%8)));
+}
+
+void bdrv_commit(BlockDriverState *bs)
+{
+    int64_t i;
+    unsigned char *changes_map;
+
+    if (!bs->changes_map) {
+       fprintf(stderr, "Already committing to %s\n", bs->filename);
+       return;
+    }
+
+    if (bs->read_only) {
+       fprintf(stderr, "Can't commit to %s: read-only\n", bs->filename);
+       return;
+    }
+
+    changes_map = bs->changes_map;
+    for (i = 0; i < bs->total_sectors; i++) {
+       if (is_bit_set(changes_map, i)) {
+           unsigned char sector[512];
+           if (bdrv_read(bs, i, sector, 1) != 0) {
+               fprintf(stderr, "Error reading sector %lli: aborting commit\n",
+                       (long long)i);
+               return;
+           }
+
+           /* Make bdrv_write write to real file for a moment. */
+           bs->changes_map = NULL;
+           if (bdrv_write(bs, i, sector, 1) != 0) {
+               fprintf(stderr, "Error writing sector %lli: aborting commit\n",
+                       (long long)i);
+               bs->changes_map = changes_map;
+               return;
+           }
+           bs->changes_map = changes_map;
+       }
+    }
+    fprintf(stderr, "Committed snapshot to %s\n", bs->filename);
+}
+    
+/* Return true if first block has been changed (ie. current version is
+ * in backing store).  Set the number of continuous blocks for which
+ * that is true. */
+static int is_changed(const unsigned char *bitmap,
+                     int64_t sector_num, int nb_sectors,
+                     int *num_same)
+{
+    int changed;
+
+    if (!bitmap || nb_sectors == 0) {
+       *num_same = nb_sectors;
+       return 0;
+    }
+
+    changed = is_bit_set(bitmap, sector_num);
+    for (*num_same = 1; *num_same < nb_sectors; (*num_same)++) {
+       if (is_bit_set(bitmap, sector_num + *num_same) != changed)
+           break;
+    }
+
+    return changed;
+}
+    
 /* return -1 if error */
 int bdrv_read(BlockDriverState *bs, int64_t sector_num, 
               uint8_t *buf, int nb_sectors)
 {
-    int ret;
+    int ret, num_same, fd;
 
-    lseek64(bs->fd, sector_num * 512, SEEK_SET);
-    ret = read(bs->fd, buf, nb_sectors * 512);
-    if (ret != nb_sectors * 512)
+    fd = bs->fd;
+    if (is_changed(bs->changes_map, sector_num, nb_sectors, &num_same))
+       fd = bs->changes_fd;
+
+    lseek64(fd, sector_num * 512, SEEK_SET);
+    ret = read(fd, buf, num_same * 512);
+    if (ret != num_same * 512) {
+       fprintf(stderr, "Block: Failed to read %i sectors at %lli\n",
+               num_same, (long long)sector_num);
         return -1;
-    else
-        return 0;
+    }
+
+    /* Recurse to do rest of blocks. */
+    if (num_same < nb_sectors)
+       return bdrv_read(bs, sector_num + num_same, buf + 512 * num_same,
+                        nb_sectors - num_same);
+    return 0;
 }
 
 /* return -1 if error */
 int bdrv_write(BlockDriverState *bs, int64_t sector_num, 
                const uint8_t *buf, int nb_sectors)
 {
-    int ret;
+    int ret, fd, i;
 
-    if (bs->read_only)
-        return -1;
+    fd = bs->fd;
+
+    if (bs->changes_map)
+       fd = bs->changes_fd;
+    else if (bs->read_only)
+       return -1;
 
-    lseek64(bs->fd, sector_num * 512, SEEK_SET);
-    ret = write(bs->fd, buf, nb_sectors * 512);
+    lseek64(fd, sector_num * 512, SEEK_SET);
+    ret = write(fd, buf, nb_sectors * 512);
     if (ret != nb_sectors * 512)
         return -1;
-    else
-        return 0;
+
+    if (bs->changes_map)
+       for (i = 0; i < nb_sectors; i++)
+           set_bit(bs->changes_map, sector_num + i);
+
+    return 0;
 }
 
 void bdrv_get_geometry(BlockDriverState *bs, int64_t *nb_sectors_ptr)
Index: vl.c
===================================================================
RCS file: /cvsroot/qemu/qemu/vl.c,v
retrieving revision 1.10
diff -u -r1.10 vl.c
--- vl.c        1 Jul 2003 16:27:45 -0000       1.10
+++ vl.c        2 Jul 2003 06:45:09 -0000
@@ -52,6 +52,7 @@
 
 #define DEBUG_LOGFILE "/tmp/vl.log"
 #define DEFAULT_NETWORK_SCRIPT "/etc/vl-ifup"
+#define MAX_DISKS 2
 
 //#define DEBUG_UNUSED_IOPORT
 //#define DEBUG_IRQ_LATENCY
@@ -63,6 +64,8 @@
 #define INITRD_LOAD_ADDR   0x00400000
 #define KERNEL_PARAMS_ADDR 0x00090000
 
+BlockDriverState *bs_table[MAX_DISKS];
+
 /* from plex86 (BSD license) */
 struct  __attribute__ ((packed)) linux_params {
   // For 0x00..0x3f, see 'struct screen_info' in linux/include/linux/tty.h.
@@ -1265,6 +1268,7 @@
     printf("\n"
            "C-a h    print this help\n"
            "C-a x    exit emulatior\n"
+          "C-a s    save disk data back to file (if -snapshot)\n"
            "C-a b    send break (magic sysrq)\n"
            "C-a C-a  send C-a\n"
            );
@@ -1282,6 +1286,14 @@
         case 'x':
             exit(0);
             break;
+       case 's': {
+           int i;
+
+           for (i = 0; i < MAX_DISKS; i++)
+               if (bs_table[i])
+                   bdrv_commit(bs_table[i]);
+           break;
+           }
         case 'b':
             /* send break */
             s->rbr = 0;
@@ -1976,8 +1988,6 @@
 /* set to 1 set disable mult support */
 #define MAX_MULT_SECTORS 8
 
-#define MAX_DISKS 2
-
 struct IDEState;
 
 typedef void EndTransferFunc(struct IDEState *);
@@ -2009,7 +2019,6 @@
     uint8_t io_buffer[MAX_MULT_SECTORS*512 + 4];
 } IDEState;
 
-BlockDriverState *bs_table[MAX_DISKS];
 IDEState ide_state[MAX_DISKS];
 
 static void padstr(char *str, const char *src, int len)
@@ -2577,6 +2586,7 @@
            "-initrd file   use 'file' as initial ram disk\n"
            "-hda file      use 'file' as hard disk 0 image\n"
            "-hdb file      use 'file' as hard disk 1 image\n"
+          "-snapshot      write to temporary files instead of disk files\n"
            "-m megs        set virtual RAM size to megs MB\n"
            "-n script      set network init script [default=%s]\n"
            "\n"
@@ -2595,12 +2605,13 @@
     { "initrd", 1, NULL, 0, },
     { "hda", 1, NULL, 0, },
     { "hdb", 1, NULL, 0, },
+    { "snapshot", 0, NULL, 0, },
     { NULL, 0, NULL, 0 },
 };
 
 int main(int argc, char **argv)
 {
-    int c, ret, initrd_size, i, use_gdbstub, gdbstub_port, long_index;
+    int c, ret, initrd_size, i, use_gdbstub, gdbstub_port, snapshot, 
long_index;
     struct linux_params *params;
     struct sigaction act;
     struct itimerval itv;
@@ -2617,6 +2628,7 @@
     pstrcpy(network_script, sizeof(network_script), DEFAULT_NETWORK_SCRIPT);
     use_gdbstub = 0;
     gdbstub_port = DEFAULT_GDBSTUB_PORT;
+    snapshot = 0;
     for(;;) {
         c = getopt_long_only(argc, argv, "hm:dn:sp:", long_options, 
&long_index);
         if (c == -1)
@@ -2633,6 +2645,9 @@
             case 2:
                 hd_filename[1] = optarg;
                 break;
+           case 3:
+               snapshot = 1;
+               break;
             }
             break;
         case 'h':
@@ -2679,7 +2694,7 @@
     /* open the virtual block devices */
     for(i = 0; i < MAX_DISKS; i++) {
         if (hd_filename[i]) {
-            bs_table[i] = bdrv_open(hd_filename[i]);
+            bs_table[i] = bdrv_open(hd_filename[i], snapshot);
             if (!bs_table[i]) {
                 fprintf(stderr, "vl: could not open hard disk image '%s\n",
                         hd_filename[i]);
Index: vl.h
===================================================================
RCS file: /cvsroot/qemu/qemu/vl.h,v
retrieving revision 1.1
diff -u -r1.1 vl.h
--- vl.h        30 Jun 2003 10:03:06 -0000      1.1
+++ vl.h        2 Jul 2003 06:45:09 -0000
@@ -27,13 +27,13 @@
 /* block.c */
 typedef struct BlockDriverState BlockDriverState;
 
-BlockDriverState *bdrv_open(const char *filename);
+BlockDriverState *bdrv_open(const char *filename, int snapshot);
 void bdrv_close(BlockDriverState *bs);
 int bdrv_read(BlockDriverState *bs, int64_t sector_num, 
               uint8_t *buf, int nb_sectors);
 int bdrv_write(BlockDriverState *bs, int64_t sector_num, 
                const uint8_t *buf, int nb_sectors);
 void bdrv_get_geometry(BlockDriverState *bs, int64_t *nb_sectors_ptr);
-
+void bdrv_commit(BlockDriverState *bs);
 
 #endif /* VL_H */
Index: tests/Makefile
===================================================================
RCS file: /cvsroot/qemu/qemu/tests/Makefile,v
retrieving revision 1.21
diff -u -r1.21 Makefile
--- tests/Makefile      15 Jun 2003 20:42:31 -0000      1.21
+++ tests/Makefile      2 Jul 2003 06:45:10 -0000
@@ -6,7 +6,7 @@
 ifeq ($(ARCH),i386)
 TESTS=testclone testsig testthread sha1-i386 test-i386 runcom
 endif
-TESTS+=sha1 test_path
+TESTS+=sha1 test_path test_block
 
 QEMU=../qemu
 
@@ -28,6 +28,12 @@
 test_path: test_path.c
        $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $<
        ./$@ || { rm $@; exit 1; }
+
+test_block: test_block.c
+       $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $<
+       ./$@ test_block_data || { rm -f $@ test_block_data; exit 1; }
+       ./$@ test_block_data undo || { rm -f $@ test_block_data; exit 1; }
+       @rm -f test_block_data
 
 # i386 emulation test (test various opcodes) */
 test-i386: test-i386.c test-i386-code16.S test-i386-vm86.S \




reply via email to

[Prev in Thread] Current Thread [Next in Thread]