qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] multi-partition block driver


From: Aleksandar Kanchev
Subject: [Qemu-devel] multi-partition block driver
Date: Mon, 03 Mar 2008 18:22:56 +0100
User-agent: Thunderbird 2.0.0.12 (X11/20080226)

Hello,

recently I was looking for a way to boot a xen linux guest in qemu in order to upgrade it with a cdrom. I've posted my problem on the forum and since there wasn't any answer until couple of hours ago I  programmed a qemu block driver myself.
Following the FAQ about booting from partitions I came to a qemu-devel mailing list's thread about using partition images, where Jim Brown posted an experimental patch which supports just one partition.
My patch is based on the same idea, but it supports up to four partition (which is the maximum actually). The partition table could be automatically generated or left unchanged. An image file could be used for the MBR, which could provide a boot code or just to keep changes made to the MBR. A partition is defined with a disk image, partition type (fs id) and status (active / non-active). Those values are only used if partition table generation is requested. Any qemu-supported image format could be used as disk image. I think it's also possible to use this in some sick recursive way, but I didn't test that.
The block driver uses a mini config file as already suggested on the mailing list thread. The format of the config file is described in the patch itself. Example: "qemu -hda multi:mymultipart.img", where 'mymultipart.img' is the config file.
I already did some testing and everything seems to be working fine. I'll try it tomorrow with the xen guests, which I have to upgrade . I'll be glad if more people give this driver a try and review the code. I find this a nice feature which could be included in qemu and which could change the FAQ's short answer to 'yes' :-).

best regards,
Aleksandar Kanchev

INSTALL: 'tar -xzf qemu-0.9.1.tar.gz; patch -p0 < qemu-block-multipart.patch'
diff -Nurp qemu-0.9.1/block.c qemu-0.9.1-multipart/block.c
--- qemu-0.9.1/block.c  2008-01-06 20:38:42.000000000 +0100
+++ qemu-0.9.1-multipart/block.c        2008-03-03 09:56:42.000000000 +0100
@@ -273,8 +273,8 @@ static BlockDriver *find_image_format(co
 #endif
 
     drv = find_protocol(filename);
-    /* no need to test disk image formats for vvfat */
-    if (drv == &bdrv_vvfat)
+    /* no need to test disk image formats for vvfat or multipart */
+    if (drv == &bdrv_vvfat || drv == &bdrv_multipart)
         return drv;
 
     ret = bdrv_file_open(&bs, filename, BDRV_O_RDONLY);
@@ -1294,6 +1294,7 @@ void bdrv_init(void)
     bdrv_register(&bdrv_vvfat);
     bdrv_register(&bdrv_qcow2);
     bdrv_register(&bdrv_parallels);
+    bdrv_register(&bdrv_multipart);
 }
 
 void *qemu_aio_get(BlockDriverState *bs, BlockDriverCompletionFunc *cb,
diff -Nurp qemu-0.9.1/block.h qemu-0.9.1-multipart/block.h
--- qemu-0.9.1/block.h  2008-01-06 20:38:42.000000000 +0100
+++ qemu-0.9.1-multipart/block.h        2008-03-03 09:57:04.000000000 +0100
@@ -16,6 +16,7 @@ extern BlockDriver bdrv_vpc;
 extern BlockDriver bdrv_vvfat;
 extern BlockDriver bdrv_qcow2;
 extern BlockDriver bdrv_parallels;
+extern BlockDriver bdrv_multipart;
 
 typedef struct BlockDriverInfo {
     /* in bytes, 0 if irrelevant */
diff -Nurp qemu-0.9.1/block-multipart.c qemu-0.9.1-multipart/block-multipart.c
--- qemu-0.9.1/block-multipart.c        1970-01-01 01:00:00.000000000 +0100
+++ qemu-0.9.1-multipart/block-multipart.c      2008-03-03 17:04:04.000000000 
+0100
@@ -0,0 +1,596 @@
+/*
+ * QEMU Block driver for multi-partition images (virtually merges disk images)
+ *
+ * Copyright (c) 2008 Aleksandar Kanchev
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to 
deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+/*
+ * The block driver expects a mini config file with the following format:
+ *
+ * The first line tells the driver if it should generate a
+ * partition table. If it is set to the word "dynamic", a partition table is
+ * generated, anything else (including an empty line) tells the driver not to
+ * modify the MBR.
+ *
+ * The second line specifies a disk_image with the drive's first track. It 
could
+ * be a simple raw 512b file containing the MBR. It could be even smaller,
+ * containing only the MBR's code. The first track is always kept in memory and
+ * changes are mirrored to the disk image if it's available. If the line is 
left
+ * empty, all changes will be lost when the VM is powered down.
+ *
+ * Lines from 3 to 6 contain the four partitions of the drive. The partition
+ * format is:
+ *   <disk_image>[,<partition type>[,<partition status>]]
+ * example:
+ *   /dev/hda1,83,80
+ *   /dev/hda1,83,a
+ *   /dev/hda1,83
+ * The first two lines are equivalent, where 'a' is 0x80 or "active" and 0x83
+ * is the linux partition type. The partition type and status are only
+ * relevant if the first line is set to "dynamic".
+ *
+ * The rest of the config file is ignored.
+ */
+#include "qemu-common.h"
+#include "block_int.h"
+
+#include <string.h>
+#include <stdarg.h>
+
+
+#define        FORMAT_NAME                     "multipart"
+#define        PROTOCOL_NAME           "multi"
+
+#define        CONFIG_LINE_LEN         1024
+
+#define        BLOCK_SHIFT                     9
+#define        BLOCK_SIZE                      (1 << BLOCK_SHIFT)
+#define TRACK_SECTORS          63
+#define        HEADS                           255
+
+
+typedef struct {
+       uint8_t head;
+       uint8_t sector;
+       uint8_t cylinder;
+} __attribute__((packed)) chs_t;
+
+typedef struct {
+       uint8_t status; // 0x80 = bootable, 0x00 = non-bootable, other = 
malformed
+       chs_t chs_start;
+       uint8_t type; // 0x07 = HPFS/NTFS, 0x82 = Linux swap, 0x83 = Linux ...
+       chs_t chs_end;
+       uint32_t lba_start;
+       uint32_t lba_length;
+} __attribute__((packed)) partition_t;
+
+typedef struct {
+       uint8_t code[440];
+       uint8_t disk_signature[4];
+       uint8_t zero[2];
+       partition_t partition[4];
+       uint8_t magic[2];
+} __attribute__((packed)) mbr_t;
+
+typedef union {
+       uint8_t buf[TRACK_SECTORS * BLOCK_SIZE];
+       mbr_t mbr;
+} first_track_t;
+
+typedef struct BdrvMultipartState {
+       first_track_t first_track;
+       BlockDriverState *bs_first_track;
+       BlockDriverState *bs_partition[4];
+} BdrvMultipartState;
+
+typedef struct {
+       int16_t status;
+       int16_t type;
+       char *filename;
+} config_partition_t;
+
+typedef struct {
+       uint8_t dynamic;
+       char *ft_filename;
+       config_partition_t partition[4];
+} config_t;
+
+
+static void info(const char *format, ...)
+{
+       va_list ap;
+
+       fprintf(stderr, "qemu %s: ", FORMAT_NAME);
+
+       va_start(ap, format);
+       vfprintf(stderr, format, ap);
+       va_end(ap);
+
+       fprintf(stderr, "\n");
+}
+
+static int16_t get_hex(const char *str)
+{
+       int16_t ret = -1;
+
+       if (str[0] == '0' && tolower(str[1]) == 'x')
+               str += 2;
+
+       if (*str != '\0') {
+               int i, ch;
+
+               ret = 0;
+               for (i = 0; i < 2 && str[i] != '\0'; i++) {
+                       ret <<= 4;
+
+                       ch = tolower(str[i]);
+                       if (ch >= 'a' && ch <= 'f')
+                               ret += 10 + ch - 'a';
+                       else if (ch >= '0' && ch <= '9')
+                               ret += ch - '0';
+               }
+       }
+
+       return ret;
+}
+
+static char *pstrdup(const char *src)
+{
+       const char *p = src;
+       char *ret;
+       size_t len;
+
+       while (*p != '\0')
+               p++;
+
+       len = p - src;
+       ret = qemu_mallocz(len + 1);
+       if (ret != NULL)
+               memcpy(ret, src, len);
+
+       return ret;
+}
+
+static char *pgetline(int fd)
+{
+       static char buf[CONFIG_LINE_LEN + 1];
+       char *p;
+       int r;
+
+       for (p = buf; (r = read(fd, p, 1)) > 0; p++) {
+               if (*p == '\r' || *p == '\n')
+                       break;
+
+               if ((p - buf + 1) >= (sizeof(buf) - 1)) {
+                       p++;
+                       break;
+               }
+       }
+
+       *p = '\0';
+
+       if (r <= 0 && p == buf)
+               return NULL;
+
+       return buf;
+}
+
+static void config_delete(config_t *cfg)
+{
+       int i;
+
+       if (cfg->ft_filename != NULL)
+               qemu_free(cfg->ft_filename);
+
+       for (i = 0; i < 4 && cfg->partition[i].filename != NULL; i++)
+               qemu_free(cfg->partition[i].filename);
+
+       qemu_free(cfg);
+}
+
+static int config_parse(const char *filename, config_t **pcfg)
+{
+       int nline;
+       int fd;
+       char *line;
+       char *p, *q;
+       config_t *cfg;
+
+       cfg = qemu_mallocz(sizeof(config_t));
+       if (!cfg)
+               return -ENOMEM;
+
+    strstart(filename, PROTOCOL_NAME ":", &filename);
+
+       fd = open(filename, O_RDONLY);
+       if (fd < 0)
+               return -1;
+
+       for (nline = 0; nline < 6; nline++) {
+               line = pgetline(fd);
+               if (!line) {
+                       if (nline < 3) {
+                               info("config is too short");
+                               goto error;
+                       }
+                       break;
+               }
+
+               /* dynamic */
+               if (nline == 0) {
+                       if (tolower(*line) == 'd')
+                               cfg->dynamic = 1;
+
+               /* first track */
+               } else if (nline == 1) {
+                       if (*line != '\0')
+                               cfg->ft_filename = pstrdup(line);
+
+               /* partitions */
+               } else {
+                       config_partition_t *partition = &(cfg->partition[nline 
- 2]);
+
+                       p = strrchr(line, ',');
+                       if (p != NULL) {
+                               partition->type = get_hex(p + 1);
+
+                               *p = '\0';
+                               q = strrchr(line, ',');
+                               if (q != NULL) {
+                                       partition->status = partition->type;
+                                       if (p[1] == 'a' && p[2] == '\0')
+                                               partition->status = 0x80;
+ 
+                                       partition->type = get_hex(q + 1);
+                                       *q = '\0';
+                               }
+                       }
+
+                       if (*line == '\0') {
+                               if (nline < 3) {
+                                       info("at least one partition is 
required");
+                                       goto error;
+                               }
+                               break;
+                       }
+
+                       partition->filename = pstrdup(line);
+               }
+       }
+
+       close(fd);
+       *pcfg = cfg;
+
+       return 0;
+
+error:
+       close(fd);
+       config_delete(cfg);
+
+       return -1;
+}
+
+static inline int first_track_init(BlockDriverState *bs, const char *filename, 
int flags)
+{
+       BdrvMultipartState *bs_mp = bs->opaque;
+
+       memset(bs_mp->first_track.buf, 0, sizeof(bs_mp->first_track));
+       bs->total_sectors = sizeof(bs_mp->first_track) >> BLOCK_SHIFT;
+
+       if (filename != NULL) {
+               int ret;
+               int64_t len;
+               BlockDriverState *bs_ft;
+
+               bs_ft = bdrv_new("");
+               if (!bs_ft)
+                       return -ENOMEM;
+
+               ret = bdrv_open(bs_ft, filename, flags);
+               if (ret < 0) {
+                       info("unable to open mbr image %s", filename);
+                       return ret;
+               }
+
+               len = bdrv_getlength(bs_ft);
+               if (len > sizeof(bs_mp->first_track))
+                       len = sizeof(bs_mp->first_track);
+
+               ret = bdrv_pread(bs_ft, 0, bs_mp->first_track.buf, len);
+               if (ret < 0) {
+                       info("unable to read mbr image %s", filename);
+                       bdrv_delete(bs_ft);
+                       return ret;
+               }
+
+               if (bs_ft->read_only)
+                       bdrv_delete(bs_ft);
+               else
+                       bs_mp->bs_first_track = bs_ft;
+       }
+
+       return 0;
+}
+
+static void lbatochs(uint32_t lba, chs_t *pchs)
+{
+       uint32_t c, h, s;
+
+       c = lba / (HEADS * TRACK_SECTORS);
+       h = (lba % (HEADS * TRACK_SECTORS)) / TRACK_SECTORS;
+       s = (lba % (HEADS * TRACK_SECTORS)) % TRACK_SECTORS + 1;
+
+       pchs->head = h > 254 ? 254 : (uint8_t) h;
+       pchs->sector = s > 63 ? 63 : (uint8_t) s;
+
+       if (c > 1023)
+               c = 1023;
+
+       pchs->sector |= (uint8_t) ((c >> 2) & 0xc0);
+       pchs->cylinder = (uint8_t) (c & 0xff);
+}
+
+static inline int partitions_init(BlockDriverState *bs, int flags, config_t 
*cfg)
+{
+       int i, ret = 0;
+       mbr_t *mbr;
+       BdrvMultipartState *bs_mp = bs->opaque;
+       BlockDriverState *bs_part;
+
+       mbr = &(bs_mp->first_track.mbr);
+       if (cfg->dynamic) {
+               mbr->magic[0] = 0x55;
+               mbr->magic[1] = 0xAA;
+       }
+
+       for (i = 0; i < 4 && cfg->partition[i].filename != NULL; i++) {
+               config_partition_t *cpart = &(cfg->partition[i]);
+
+               bs_part = bdrv_new("");
+               if (!bs_part) {
+                       ret = -ENOMEM;
+                       break;
+               }
+
+               ret = bdrv_open(bs_part, cpart->filename, flags);
+               if (ret < 0) {
+                       info("unable to open partition image %s", 
cpart->filename);
+                       qemu_free(bs_part);
+                       break;
+               }
+
+               if (bs_part->total_sectors == 0) {
+                       info("zero length partition %s", cpart->filename);
+                       bdrv_delete(bs_part);
+                       break;
+               }
+
+               bs_mp->bs_partition[i] = bs_part;
+
+               if (cfg->dynamic) {
+                       partition_t *partition = &(mbr->partition[i]);
+
+                       partition->lba_start = cpu_to_le32(bs->total_sectors);
+                       lbatochs(partition->lba_start, &(partition->chs_start));
+
+                       partition->lba_length = 
cpu_to_le32(bs_part->total_sectors);
+                       lbatochs(bs->total_sectors + bs_part->total_sectors - 1,
+                                               &(partition->chs_end));
+
+                       if (cpart->status >= 0)
+                               partition->status = cpart->status;
+
+                       if (cpart->type >= 0)
+                               partition->type = cpart->type;
+               }
+
+               bs_mp->bs_partition[i] = bs_part;
+               bs->total_sectors += bs_part->total_sectors;
+       }
+
+       if (ret < 0) {
+               for (i = 0; i < 4 && bs_mp->bs_partition[i] != NULL; i++)
+                       bdrv_delete(bs_mp->bs_partition[i]);
+       }
+
+       return ret;
+}
+
+static int ft_image_write(BlockDriverState *bs, int64_t sector_num,
+                                       const uint8_t *buf, int nb_sectors)
+{
+       int ret = 0;
+       int64_t write_offset, write_length;
+       int64_t bdrv_length;
+
+       write_offset = sector_num << BLOCK_SHIFT;
+       bdrv_length = bdrv_getlength(bs);
+
+       if (write_offset < bdrv_length) {
+               write_length = nb_sectors << BLOCK_SHIFT;
+               if ((write_offset + write_length) > bdrv_length)
+                       write_length = bdrv_length - write_offset;
+
+               ret = bdrv_pwrite(bs, write_offset, buf, write_length);
+       }
+
+       return ret;
+}
+
+
+static int multipart_open(BlockDriverState *bs, const char *filename, int 
flags)
+{
+       int i, ret;
+       config_t *cfg;
+       BdrvMultipartState *bs_mp = bs->opaque;
+
+       ret = config_parse(filename, &cfg);
+       if (ret < 0)
+               return ret;
+
+       ret = first_track_init(bs, cfg->ft_filename, flags);
+       if (ret < 0) {
+               config_delete(cfg);
+               return ret;
+       }
+
+       ret = partitions_init(bs, flags, cfg);
+       if (ret >= 0 && cfg->dynamic && bs_mp->bs_first_track) {
+               ret = ft_image_write(bs_mp->bs_first_track, 0, 
bs_mp->first_track.buf, 1);
+               if (ret < 0) {
+                       info("writing MBR to image %s failed", 
cfg->ft_filename);
+
+                       /* clean up the partitions */
+                       for (i = 0; i < 4 && bs_mp->bs_partition[i] != NULL; 
i++)
+                               bdrv_delete(bs_mp->bs_partition[i]);
+               }
+       }
+
+       config_delete(cfg);
+
+       if (ret < 0 && bs_mp->bs_first_track != NULL)
+               bdrv_delete(bs_mp->bs_first_track);
+
+       return ret;
+}
+
+static int multipart_read(BlockDriverState *bs, int64_t sector_num,
+                     uint8_t *buf, int nb_sectors)
+{
+       int i, ret = 0;
+       int read_sectors;
+       int64_t ft_nsectors;
+       BdrvMultipartState *bs_mp = bs->opaque;
+       BlockDriverState *bs_part;
+
+       ft_nsectors = sizeof(bs_mp->first_track) >> BLOCK_SHIFT;
+       if (sector_num < ft_nsectors) {
+               read_sectors = nb_sectors;
+               if ((sector_num + read_sectors) > ft_nsectors)
+                       read_sectors = ft_nsectors - sector_num;
+
+               memcpy(buf, bs_mp->first_track.buf + (sector_num << 
BLOCK_SHIFT),
+                                       read_sectors << BLOCK_SHIFT);
+
+               sector_num += read_sectors;
+               nb_sectors -= read_sectors;
+               buf += read_sectors << BLOCK_SHIFT;
+       }
+
+       sector_num -= ft_nsectors;
+       for (i = 0; i < 4 && bs_mp->bs_partition[i] != NULL && nb_sectors > 0; 
i++) {
+               bs_part = bs_mp->bs_partition[i];
+
+               if (sector_num < bs_part->total_sectors) {
+                       read_sectors = nb_sectors;
+                       if ((sector_num + read_sectors) > 
bs_part->total_sectors)
+                               read_sectors = bs_part->total_sectors - 
sector_num;
+
+                       ret = bdrv_read(bs_part, sector_num, buf, read_sectors);
+                       if (ret < 0)
+                               break;
+
+                       sector_num += read_sectors;
+                       nb_sectors -= read_sectors;
+                       buf += read_sectors << BLOCK_SHIFT;
+               }
+
+               sector_num -= bs_part->total_sectors;
+       }
+
+       return ret;
+}
+
+static int multipart_write(BlockDriverState *bs, int64_t sector_num,
+                      const uint8_t *buf, int nb_sectors)
+{
+       int i, ret = 0;
+       int write_sectors;
+       int64_t ft_nsectors;
+       BdrvMultipartState *bs_mp = bs->opaque;
+       BlockDriverState *bs_part;
+
+       ft_nsectors = sizeof(bs_mp->first_track) >> BLOCK_SHIFT;
+       if (sector_num < ft_nsectors) {
+               if (bs_mp->bs_first_track != NULL) {
+                       ret = ft_image_write(bs_mp->bs_first_track, sector_num,
+                                                                       buf, 
nb_sectors);
+                       if (ret < 0)
+                               return ret;
+               }
+
+               write_sectors = nb_sectors;
+               if ((sector_num + write_sectors) > ft_nsectors)
+                       write_sectors = ft_nsectors - sector_num;
+
+               memcpy(bs_mp->first_track.buf + (sector_num << BLOCK_SHIFT), 
buf,
+                                       write_sectors << BLOCK_SHIFT);
+
+               sector_num += write_sectors;
+               nb_sectors -= write_sectors;
+               buf += write_sectors << BLOCK_SHIFT;
+       }
+
+       sector_num -= ft_nsectors;
+       for (i = 0; i < 4 && bs_mp->bs_partition[i] != NULL && nb_sectors > 0; 
i++) {
+               bs_part = bs_mp->bs_partition[i];
+
+               if (sector_num < bs_part->total_sectors) {
+                       write_sectors = nb_sectors;
+                       if ((sector_num + write_sectors) > 
bs_part->total_sectors)
+                               write_sectors = bs_part->total_sectors - 
sector_num;
+
+                       ret = bdrv_write(bs_part, sector_num, buf, 
write_sectors);
+                       if (ret < 0)
+                               break;
+
+                       sector_num += write_sectors;
+                       nb_sectors -= write_sectors;
+                       buf += write_sectors << BLOCK_SHIFT;
+               }
+
+               sector_num -= bs_part->total_sectors;
+       }
+
+       return ret;
+}
+
+static void multipart_close(BlockDriverState *bs)
+{
+       int i;
+       BdrvMultipartState *bs_mp = bs->opaque;
+
+       if (bs_mp->bs_first_track)
+               bdrv_delete(bs_mp->bs_first_track);
+
+       for (i = 0; i < 4 && bs_mp->bs_partition[i] != NULL; i++)
+               bdrv_delete(bs_mp->bs_partition[i]);
+}
+
+BlockDriver bdrv_multipart = {
+       .format_name = FORMAT_NAME,
+       .instance_size = sizeof(BdrvMultipartState),
+       .bdrv_open = multipart_open,
+       .bdrv_read = multipart_read,
+       .bdrv_write = multipart_write,
+       .bdrv_close = multipart_close,
+       .protocol_name = PROTOCOL_NAME
+};
+
diff -Nurp qemu-0.9.1/Makefile qemu-0.9.1-multipart/Makefile
--- qemu-0.9.1/Makefile 2008-01-06 20:38:41.000000000 +0100
+++ qemu-0.9.1-multipart/Makefile       2008-03-03 09:55:55.000000000 +0100
@@ -40,7 +40,7 @@ recurse-all: $(patsubst %,subdir-%, $(TA
 BLOCK_OBJS=cutils.o
 BLOCK_OBJS+=block-cow.o block-qcow.o aes.o block-vmdk.o block-cloop.o
 BLOCK_OBJS+=block-dmg.o block-bochs.o block-vpc.o block-vvfat.o
-BLOCK_OBJS+=block-qcow2.o block-parallels.o
+BLOCK_OBJS+=block-qcow2.o block-parallels.o block-multipart.o
 
 ######################################################################
 # libqemu_common.a: Target indepedent part of system emulation. The

reply via email to

[Prev in Thread] Current Thread [Next in Thread]