qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH 2/4] block: add block topology options


From: Anthony Liguori
Subject: Re: [Qemu-devel] [PATCH 2/4] block: add block topology options
Date: Wed, 03 Feb 2010 13:00:47 -0600
User-agent: Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.1.5) Gecko/20091209 Fedora/3.0-4.fc12 Lightning/1.0pre Thunderbird/3.0

On 01/29/2010 01:04 PM, Christoph Hellwig wrote:
Add three new suboptions for the drive option to export block topology
information to the guest.  This is needed to get optimal I/O alignment
for RAID arrays or SSDs.

The options are:

  - physical_block_size to specify the physical block size of the device,
    this is going to increase from 512 bytes to 4096 kilobytes for many
    modern storage devices
  - min_io_size to specify the minimal I/O size without performance impact,
    this is typically set to the RAID chunk size for arrays.
  - opt_io_size to specify the optimal sustained I/O size, this is
    typically the RAID stripe width for arrays.

This makes sense.

I decided to not auto-probe these values from blkid which might easily
be possible as I don't know how to deal with these issues on migration.

Note that we specificly only set the physical_block_size, and not the
logial one which is the unit all I/O is described in.  The reason for
that is that IDE does not support increasing the logical block size and
at last for now I want to stick to one meachnisms in queue and allow
for easy switching of transports for a given backing image which would
not be possible if scsi and virtio use real 4k sectors, while ide only
uses the physical block exponent.

Signed-off-by: Christoph Hellwig<address@hidden>

Index: qemu/block.c
===================================================================
--- qemu.orig/block.c   2010-01-29 11:07:50.083004364 +0100
+++ qemu/block.c        2010-01-29 11:08:32.940004255 +0100
@@ -1028,6 +1028,51 @@ int bdrv_enable_write_cache(BlockDriverS
      return bs->enable_write_cache;
  }

+unsigned int bdrv_get_physical_block_size(BlockDriverState *bs)
+{
+    return bs->physical_block_size;
+}
+
+unsigned int bdrv_get_physical_block_exp(BlockDriverState *bs)
+{
+    unsigned int exp = 0, size;
+
+    for (size = bs->physical_block_size; size>  512; size>>= 1) {
+        exp++;
+    }
+
+    return exp;
+}
+
+void bdrv_set_physical_block_size(BlockDriverState *bs,
+        unsigned int physical_block_size)
+{
+    bs->physical_block_size = physical_block_size;
+}
+

But I don't think this is the wrong place to do it. The BlockDriverState reflects that backing device, not the emulated device itself. In this case, you're trying to set a property of the emulated device.

I think these need to be qdev properties of the respective devices. From a UI perspective, you can still expose -drive options for the end user to consume, but this data should be associated with the devices themselves.

Regards,

Anthony Liguori

+unsigned int bdrv_get_min_io_size(BlockDriverState *bs)
+{
+    return bs->min_io_size;
+}
+
+void bdrv_set_min_io_size(BlockDriverState *bs,
+        unsigned int min_io_size)
+{
+    bs->min_io_size = min_io_size;
+}
+
+unsigned int bdrv_get_opt_io_size(BlockDriverState *bs)
+{
+    return bs->opt_io_size;
+}
+
+void bdrv_set_opt_io_size(BlockDriverState *bs,
+        unsigned int opt_io_size)
+{
+    bs->opt_io_size = opt_io_size;
+}
+
  /* XXX: no longer used */
  void bdrv_set_change_cb(BlockDriverState *bs,
                          void (*change_cb)(void *opaque), void *opaque)
Index: qemu/block.h
===================================================================
--- qemu.orig/block.h   2010-01-29 11:07:50.089004011 +0100
+++ qemu/block.h        2010-01-29 11:08:32.940004255 +0100
@@ -152,6 +152,16 @@ int bdrv_is_inserted(BlockDriverState *b
  int bdrv_media_changed(BlockDriverState *bs);
  int bdrv_is_locked(BlockDriverState *bs);
  void bdrv_set_locked(BlockDriverState *bs, int locked);
+unsigned int bdrv_get_physical_block_size(BlockDriverState *bs);
+unsigned int bdrv_get_physical_block_exp(BlockDriverState *bs);
+void bdrv_set_physical_block_size(BlockDriverState *bs,
+        unsigned int physical_block_size);
+unsigned int bdrv_get_min_io_size(BlockDriverState *bs);
+void bdrv_set_min_io_size(BlockDriverState *bs,
+        unsigned int min_io_size);
+unsigned int bdrv_get_opt_io_size(BlockDriverState *bs);
+void bdrv_set_opt_io_size(BlockDriverState *bs,
+        unsigned int opt_io_size);
  int bdrv_eject(BlockDriverState *bs, int eject_flag);
  void bdrv_set_change_cb(BlockDriverState *bs,
                          void (*change_cb)(void *opaque), void *opaque);
Index: qemu/block_int.h
===================================================================
--- qemu.orig/block_int.h       2010-01-29 11:07:50.096004065 +0100
+++ qemu/block_int.h    2010-01-29 11:08:32.941003474 +0100
@@ -173,6 +173,14 @@ struct BlockDriverState {
         drivers. They are not used by the block driver */
      int cyls, heads, secs, translation;
      int type;
+
+    /*
+     * Topology information, all optional.
+     */
+    unsigned int physical_block_size;
+    unsigned int min_io_size;
+    unsigned int opt_io_size;
+
      char device_name[32];
      unsigned long *dirty_bitmap;
      BlockDriverState *next;
Index: qemu/qemu-config.c
===================================================================
--- qemu.orig/qemu-config.c     2010-01-29 11:07:50.133004032 +0100
+++ qemu/qemu-config.c  2010-01-29 11:08:32.944025367 +0100
@@ -78,6 +78,15 @@ QemuOptsList qemu_drive_opts = {
          },{
              .name = "readonly",
              .type = QEMU_OPT_BOOL,
+        },{
+            .name = "physical_block_size",
+            .type = QEMU_OPT_NUMBER,
+        },{
+            .name = "min_io_size",
+            .type = QEMU_OPT_NUMBER,
+        },{
+            .name = "opt_io_size",
+            .type = QEMU_OPT_NUMBER,
          },
          { /* end if list */ }
      },
Index: qemu/vl.c
===================================================================
--- qemu.orig/vl.c      2010-01-29 11:07:50.141004284 +0100
+++ qemu/vl.c   2010-01-29 11:08:32.947003820 +0100
@@ -1904,6 +1904,9 @@ DriveInfo *drive_init(QemuOpts *opts, vo
      int index;
      int cache;
      int aio = 0;
+    unsigned long physical_block_size = 512;
+    unsigned long min_io_size = 0;
+    unsigned long opt_io_size = 0;
      int ro = 0;
      int bdrv_flags;
      int on_read_error, on_write_error;
@@ -2053,6 +2056,32 @@ DriveInfo *drive_init(QemuOpts *opts, vo
      }
  #endif

+    if ((buf = qemu_opt_get(opts, "physical_block_size")) != NULL) {
+        physical_block_size = strtoul(buf, NULL, 10);
+        if (physical_block_size<  512) {
+            fprintf(stderr, "sector size must be larger than 512 bytes\n");
+            return NULL;
+        }
+    }
+
+    if ((buf = qemu_opt_get(opts, "min_io_size")) != NULL) {
+        min_io_size = strtoul(buf, NULL, 10);
+        if (!min_io_size || (min_io_size % physical_block_size)) {
+            fprintf(stderr,
+                    "min_io_size must be a multiple of the sector size\n");
+            return NULL;
+        }
+    }
+
+    if ((buf = qemu_opt_get(opts, "opt_io_size")) != NULL) {
+        opt_io_size = strtoul(buf, NULL, 10);
+        if (!opt_io_size || (opt_io_size % min_io_size)) {
+            fprintf(stderr,
+                    "opt_io_size must be a multiple of min_io_size\n");
+            return NULL;
+        }
+    }
+
      if ((buf = qemu_opt_get(opts, "format")) != NULL) {
         if (strcmp(buf, "?") == 0) {
              fprintf(stderr, "qemu: Supported formats:");
@@ -2257,6 +2286,12 @@ DriveInfo *drive_init(QemuOpts *opts, vo
          return NULL;
      }

+    bdrv_set_physical_block_size(dinfo->bdrv, physical_block_size);
+    if (min_io_size)
+        bdrv_set_min_io_size(dinfo->bdrv, min_io_size);
+    if (opt_io_size)
+        bdrv_set_opt_io_size(dinfo->bdrv, opt_io_size);
+
      if (bdrv_key_required(dinfo->bdrv))
          autostart = 0;
      *fatal_error = 0;
Index: qemu/qemu-options.hx
===================================================================
--- qemu.orig/qemu-options.hx   2010-01-29 11:07:50.149004395 +0100
+++ qemu/qemu-options.hx        2010-01-29 19:36:06.118256061 +0100
@@ -104,6 +104,7 @@ DEF("drive", HAS_ARG, QEMU_OPTION_drive,
      "       [,cyls=c,heads=h,secs=s[,trans=t]][,snapshot=on|off]\n"
      "       [,cache=writethrough|writeback|none][,format=f][,serial=s]\n"
      "       [,addr=A][,id=name][,aio=threads|native][,readonly=on|off]\n"
+    "       
[,physical_block=size=size][,min_io_size=size][,opt_io_size=size]\n"
      "                use 'file' as a drive image\n")
  DEF("set", HAS_ARG, QEMU_OPTION_set,
      "-set group.id.arg=value\n"
@@ -149,6 +150,15 @@ an untrusted format header.
  This option specifies the serial number to assign to the device.
  @item address@hidden
  Specify the controller's PCI address (if=virtio only).
address@hidden address@hidden
+Report a physical block size larger than the logical block size of 512 bytes.
address@hidden address@hidden
+Reported a minimum I/O size or optimum I/O granularity.  This is the smallest
+I/O size the device can perform without a performance penalty.  For RAID
+devices this should be set to the stripe chunk size.
address@hidden address@hidden
+Report an optimal I/O size, which is the device's preferred unit for
+sustained I/O.  This should be set to the stripe width for RAID devices.
  @end table

  By default, writethrough caching is used for all block device.  This means 
that








reply via email to

[Prev in Thread] Current Thread [Next in Thread]