qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [PATCH 2/3] Perform emulated IDE flushes asynchronously.


From: Ian Jackson
Subject: [Qemu-devel] [PATCH 2/3] Perform emulated IDE flushes asynchronously.
Date: Mon, 1 Sep 2008 18:18:28 +0100

We arrange for the WIN_FLUSH_CACHE and WIN_FLUSH_CACHE_EXT
commands to use a new bdrv_aio_flush facility.

If there is an error, the ATA-7 spec says that we are supposed to know
which is the first block whose flush failed and leave that in the
block offset registers.  However since we are using f(data)sync that's
not possible for us.  There is sadly no way for us to report the error
which won't encourage the guest to try to understand what went wrong
and then do the flush again expecting the remaining blocks to be
written (as specified by ATA-7).

So if the asynchronous flush fails, we kill the disk by detaching
->bs.  This makes it vanish: we don't generate any more interrupts,
leave status set to busy, and ignore future commands (and discard any
in-flight IO).  Alan Cox reports that this will probably induce the
best available behaviour in guests (retry for a while and then give
up).  Fine-grained error reporting is available if the guest turns off
the write cache.

Signed-off-by: Ian Jackson <address@hidden>
Modified-by: Ian Jackson <address@hidden>
Signed-off-by: Kouya Shimura <address@hidden>

Cherry picked from qemu-xen
        d1e5cc49395831cb9c23e00c37898cf943c1d4be
Conflicts:
        hw/ide.c
Signed-off-by: Ian Jackson <address@hidden>
---
 block-qcow.c      |    8 +++++++
 block-qcow2.c     |    8 +++++++
 block-raw-posix.c |   16 +++++++++++++++
 block.c           |   39 ++++++++++++++++++++++++++++++++++++++
 block.h           |    2 +
 block_int.h       |    2 +
 hw/ide.c          |   54 +++++++++++++++++++++++++++++++++++++++++++++-------
 7 files changed, 121 insertions(+), 8 deletions(-)

diff --git a/block-qcow.c b/block-qcow.c
index b5bfce6..f0043a0 100644
--- a/block-qcow.c
+++ b/block-qcow.c
@@ -720,6 +720,13 @@ static void qcow_aio_cancel(BlockDriverAIOCB *blockacb)
     qemu_aio_release(acb);
 }
 
+static BlockDriverAIOCB *qcow_aio_flush(BlockDriverState *bs,
+        BlockDriverCompletionFunc *cb, void *opaque)
+{
+    BDRVQcowState *s = bs->opaque;
+    return bdrv_aio_flush(s->hd, cb, opaque);
+}
+
 static void qcow_close(BlockDriverState *bs)
 {
     BDRVQcowState *s = bs->opaque;
@@ -898,6 +905,7 @@ BlockDriver bdrv_qcow = {
     .bdrv_aio_read = qcow_aio_read,
     .bdrv_aio_write = qcow_aio_write,
     .bdrv_aio_cancel = qcow_aio_cancel,
+    .bdrv_aio_flush = qcow_aio_flush,
     .aiocb_size = sizeof(QCowAIOCB),
     .bdrv_write_compressed = qcow_write_compressed,
     .bdrv_get_info = qcow_get_info,
diff --git a/block-qcow2.c b/block-qcow2.c
index 07c64ce..71df9a9 100644
--- a/block-qcow2.c
+++ b/block-qcow2.c
@@ -1377,6 +1377,13 @@ static void qcow_aio_cancel(BlockDriverAIOCB *blockacb)
     qemu_aio_release(acb);
 }
 
+static BlockDriverAIOCB *qcow_aio_flush(BlockDriverState *bs,
+        BlockDriverCompletionFunc *cb, void *opaque)
+{
+    BDRVQcowState *s = bs->opaque;
+    return bdrv_aio_flush(s->hd, cb, opaque);
+}
+
 static void qcow_close(BlockDriverState *bs)
 {
     BDRVQcowState *s = bs->opaque;
@@ -2613,6 +2620,7 @@ BlockDriver bdrv_qcow2 = {
     .bdrv_aio_read = qcow_aio_read,
     .bdrv_aio_write = qcow_aio_write,
     .bdrv_aio_cancel = qcow_aio_cancel,
+    .bdrv_aio_flush = qcow_aio_flush,
     .aiocb_size = sizeof(QCowAIOCB),
     .bdrv_write_compressed = qcow_write_compressed,
 
diff --git a/block-raw-posix.c b/block-raw-posix.c
index 5fdd6e9..43ae293 100644
--- a/block-raw-posix.c
+++ b/block-raw-posix.c
@@ -1062,6 +1062,21 @@ static int fd_open(BlockDriverState *bs)
 {
     return 0;
 }
+
+static BlockDriverAIOCB *raw_aio_flush(BlockDriverState *bs,
+        BlockDriverCompletionFunc *cb, void *opaque)
+{
+    RawAIOCB *acb;
+
+    acb = raw_aio_setup(bs, 0, NULL, 0, cb, opaque);
+    if (!acb)
+        return NULL;
+    if (aio_fsync(O_SYNC, &acb->aiocb) < 0) {
+        qemu_aio_release(acb);
+        return NULL;
+    }
+    return &acb->common;
+}
 #endif
 
 #if defined(__linux__)
@@ -1214,6 +1229,7 @@ BlockDriver bdrv_host_device = {
     .bdrv_aio_read = raw_aio_read,
     .bdrv_aio_write = raw_aio_write,
     .bdrv_aio_cancel = raw_aio_cancel,
+    .bdrv_aio_flush = raw_aio_flush,
     .aiocb_size = sizeof(RawAIOCB),
 #endif
     .bdrv_pread = raw_pread,
diff --git a/block.c b/block.c
index e5aa401..127430c 100644
--- a/block.c
+++ b/block.c
@@ -50,6 +50,8 @@ static BlockDriverAIOCB *bdrv_aio_read_em(BlockDriverState 
*bs,
 static BlockDriverAIOCB *bdrv_aio_write_em(BlockDriverState *bs,
         int64_t sector_num, const uint8_t *buf, int nb_sectors,
         BlockDriverCompletionFunc *cb, void *opaque);
+static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs,
+        BlockDriverCompletionFunc *cb, void *opaque);
 static void bdrv_aio_cancel_em(BlockDriverAIOCB *acb);
 static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
                         uint8_t *buf, int nb_sectors);
@@ -136,6 +138,8 @@ static void bdrv_register(BlockDriver *bdrv)
         bdrv->bdrv_read = bdrv_read_em;
         bdrv->bdrv_write = bdrv_write_em;
     }
+    if (!bdrv->bdrv_aio_flush)
+        bdrv->bdrv_aio_flush = bdrv_aio_flush_em;
     bdrv->next = first_drv;
     first_drv = bdrv;
 }
@@ -1189,6 +1193,17 @@ void bdrv_aio_cancel(BlockDriverAIOCB *acb)
     drv->bdrv_aio_cancel(acb);
 }
 
+BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs, 
+                                 BlockDriverCompletionFunc *cb, void *opaque)
+{
+    BlockDriver *drv = bs->drv;
+
+    if (!drv)
+        return NULL;
+
+    return drv->bdrv_aio_flush(bs, cb, opaque);
+}
+
 
 /**************************************************************/
 /* async block device emulation */
@@ -1214,6 +1229,15 @@ static BlockDriverAIOCB 
*bdrv_aio_write_em(BlockDriverState *bs,
     return NULL;
 }
 
+static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs,
+        BlockDriverCompletionFunc *cb, void *opaque)
+{
+    int ret;
+    ret = bdrv_flush(bs);
+    cb(opaque, ret);
+    return NULL;
+}
+
 static void bdrv_aio_cancel_em(BlockDriverAIOCB *acb)
 {
 }
@@ -1257,6 +1281,21 @@ static BlockDriverAIOCB 
*bdrv_aio_write_em(BlockDriverState *bs,
     return &acb->common;
 }
 
+static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs,
+        BlockDriverCompletionFunc *cb, void *opaque)
+{
+    BlockDriverAIOCBSync *acb;
+    int ret;
+
+    acb = qemu_aio_get(bs, cb, opaque);
+    if (!acb->bh)
+        acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
+    ret = bdrv_flush(bs);
+    acb->ret = ret;
+    qemu_bh_schedule(acb->bh);
+    return &acb->common;
+}
+
 static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
 {
     BlockDriverAIOCBSync *acb = (BlockDriverAIOCBSync *)blockacb;
diff --git a/block.h b/block.h
index 510818b..05a3de8 100644
--- a/block.h
+++ b/block.h
@@ -87,6 +87,8 @@ BlockDriverAIOCB *bdrv_aio_read(BlockDriverState *bs, int64_t 
sector_num,
 BlockDriverAIOCB *bdrv_aio_write(BlockDriverState *bs, int64_t sector_num,
                                  const uint8_t *buf, int nb_sectors,
                                  BlockDriverCompletionFunc *cb, void *opaque);
+BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs, 
+                                 BlockDriverCompletionFunc *cb, void *opaque);
 void bdrv_aio_cancel(BlockDriverAIOCB *acb);
 
 void qemu_aio_init(void);
diff --git a/block_int.h b/block_int.h
index 796ce29..bda0eab 100644
--- a/block_int.h
+++ b/block_int.h
@@ -55,6 +55,8 @@ struct BlockDriver {
         int64_t sector_num, const uint8_t *buf, int nb_sectors,
         BlockDriverCompletionFunc *cb, void *opaque);
     void (*bdrv_aio_cancel)(BlockDriverAIOCB *acb);
+    BlockDriverAIOCB *(*bdrv_aio_flush)(BlockDriverState *bs,
+        BlockDriverCompletionFunc *cb, void *opaque);
     int aiocb_size;
 
     const char *protocol_name;
diff --git a/hw/ide.c b/hw/ide.c
index 6b73f41..a3b9e6e 100644
--- a/hw/ide.c
+++ b/hw/ide.c
@@ -748,6 +748,7 @@ static inline void ide_dma_submit_check(IDEState *s,
 static inline void ide_set_irq(IDEState *s)
 {
     BMDMAState *bm = s->bmdma;
+    if (!s->bs) return; /* ouch! (see ide_flush_cb) */
     if (!(s->cmd & IDE_CMD_DISABLE_IRQ)) {
         if (bm) {
             bm->status |= BM_STATUS_INT;
@@ -927,6 +928,8 @@ static void ide_read_dma_cb(void *opaque, int ret)
        return;
     }
 
+    if (!s->bs) return; /* ouch! (see ide_flush_cb) */
+
     n = s->io_buffer_size >> 9;
     sector_num = ide_get_sector(s);
     if (n > 0) {
@@ -1038,6 +1041,8 @@ static void ide_write_dma_cb(void *opaque, int ret)
        return;
     }
 
+    if (!s->bs) return; /* ouch! (see ide_flush_cb) */
+
     n = s->io_buffer_size >> 9;
     sector_num = ide_get_sector(s);
     if (n > 0) {
@@ -1084,6 +1089,39 @@ static void ide_sector_write_dma(IDEState *s)
     ide_dma_start(s, ide_write_dma_cb);
 }
 
+static void ide_device_utterly_broken(IDEState *s) {
+    s->status |= BUSY_STAT;
+    s->bs = NULL;
+    /* This prevents all future commands from working.  All of the
+     * asynchronous callbacks (and ide_set_irq, as a safety measure)
+     * check to see whether this has happened and bail if so.
+     */
+}
+
+static void ide_flush_cb(void *opaque, int ret)
+{
+    IDEState *s = opaque;
+
+    if (!s->bs) return; /* ouch! (see ide_flush_cb) */
+
+    if (ret) {
+        /* We are completely doomed.  The IDE spec does not permit us
+        * to return an error from a flush except via a protocol which
+        * requires us to say where the error is and which
+        * contemplates the guest repeating the flush attempt to
+        * attempt flush the remaining data.  We can't support that
+        * because f(data)sync (which is what the block drivers use
+        * eventually) doesn't report the necessary information or
+        * give us the necessary control.  So we make the disk vanish.
+        */
+       ide_device_utterly_broken(s);
+       return;
+    }
+    else
+        s->status = READY_STAT | SEEK_STAT;
+    ide_set_irq(s);
+}
+
 static void ide_atapi_cmd_ok(IDEState *s)
 {
     s->error = 0;
@@ -1310,6 +1348,8 @@ static void ide_atapi_cmd_read_dma_cb(void *opaque, int 
ret)
     IDEState *s = bm->ide_if;
     int data_offset, n;
 
+    if (!s->bs) return; /* ouch! (see ide_flush_cb) */
+
     if (ret < 0) {
         ide_atapi_io_error(s, ret);
         goto eot;
@@ -1973,6 +2013,8 @@ static void cdrom_change_cb(void *opaque)
     IDEState *s = opaque;
     uint64_t nb_sectors;
 
+    if (!s->bs) return; /* ouch! (see ide_flush_cb) */
+
     /* XXX: send interrupt too */
     bdrv_get_geometry(s->bs, &nb_sectors);
     s->nb_sectors = nb_sectors;
@@ -2081,8 +2123,8 @@ static void ide_ioport_write(void *opaque, uint32_t addr, 
uint32_t val)
         printf("ide: CMD=%02x\n", val);
 #endif
         s = ide_if->cur_drive;
-        /* ignore commands to non existant slave */
-        if (s != ide_if && !s->bs)
+       /* ignore commands to non existant device */
+        if (!s->bs)
             break;
 
         /* Only DEVICE RESET is allowed while BSY or/and DRQ are set */
@@ -2277,12 +2319,8 @@ static void ide_ioport_write(void *opaque, uint32_t 
addr, uint32_t val)
             break;
         case WIN_FLUSH_CACHE:
         case WIN_FLUSH_CACHE_EXT:
-            if (s->bs) {
-                ret = bdrv_flush(s->bs);
-               if (ret) goto abort_cmd;
-           }
-           s->status = READY_STAT | SEEK_STAT;
-            ide_set_irq(s);
+           s->status = BUSY_STAT;
+           bdrv_aio_flush(s->bs, ide_flush_cb, s);
             break;
         case WIN_STANDBY:
         case WIN_STANDBY2:
-- 
1.4.4.4





reply via email to

[Prev in Thread] Current Thread [Next in Thread]