qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [PATCH v2 01/15] ide: add limit to .prepare_buf()


From: John Snow
Subject: [Qemu-devel] [PATCH v2 01/15] ide: add limit to .prepare_buf()
Date: Wed, 1 Jul 2015 12:19:24 -0400

prepare_buf should not always grab as many descriptors
as it can, sometimes it should self-limit.

For example, an NCQ transfer of 1 sector with a PRDT that
describes 4GiB of data should not copy 4GiB of data, it
should just transfer that first 512 bytes.

PIO is not affected, because the dma_buf_rw dma helpers
already have a byte limit built-in to them, but DMA/NCQ
will exhaust the entire list regardless of requested size.

AHCI 1.3 specifies in section 6.1.6 Command List Underflow that
NCQ is not required to detect underflow conditions. Non-NCQ
pathways signal underflow by writing to the PRDBC field, which
will already occur by writing the actual transferred byte count
to the PRDBC, signaling the underflow.

Our NCQ pathways aren't required to detect underflow, but since our DMA
backend uses the size of the PRDT to determine the size of the transer,
if our PRDT is bigger than the transaction (the underflow condition) it
doesn't cost us anything to detect it and truncate the PRDT.

This is a recoverable error and is not signaled to the guest, in either
NCQ or normal DMA cases.

For BMDMA, the existing pathways should see no guest-visible difference,
but any bytes described in the overage will no longer be transferred
before indicating to the guest that there was an underflow.

Signed-off-by: John Snow <address@hidden>
---
 hw/ide/ahci.c     | 27 ++++++++++++++-------------
 hw/ide/core.c     |  8 ++++----
 hw/ide/internal.h |  2 +-
 hw/ide/macio.c    |  2 +-
 hw/ide/pci.c      | 21 ++++++++++++++++-----
 5 files changed, 36 insertions(+), 24 deletions(-)

diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c
index b73a2e4..de1759a 100644
--- a/hw/ide/ahci.c
+++ b/hw/ide/ahci.c
@@ -49,7 +49,7 @@ static int handle_cmd(AHCIState *s,int port,int slot);
 static void ahci_reset_port(AHCIState *s, int port);
 static void ahci_write_fis_d2h(AHCIDevice *ad, uint8_t *cmd_fis);
 static void ahci_init_d2h(AHCIDevice *ad);
-static int ahci_dma_prepare_buf(IDEDMA *dma, int is_write);
+static int ahci_dma_prepare_buf(IDEDMA *dma, int32_t limit);
 static void ahci_commit_buf(IDEDMA *dma, uint32_t tx_bytes);
 static bool ahci_map_clb_address(AHCIDevice *ad);
 static bool ahci_map_fis_address(AHCIDevice *ad);
@@ -827,11 +827,12 @@ static void ahci_write_fis_d2h(AHCIDevice *ad, uint8_t 
*cmd_fis)
 
 static int prdt_tbl_entry_size(const AHCI_SG *tbl)
 {
+    /* flags_size is zero-based */
     return (le32_to_cpu(tbl->flags_size) & AHCI_PRDT_SIZE_MASK) + 1;
 }
 
 static int ahci_populate_sglist(AHCIDevice *ad, QEMUSGList *sglist,
-                                int32_t offset)
+                                int64_t limit, int32_t offset)
 {
     AHCICmdHdr *cmd = ad->cur_cmd;
     uint16_t opts = le16_to_cpu(cmd->opts);
@@ -881,9 +882,8 @@ static int ahci_populate_sglist(AHCIDevice *ad, QEMUSGList 
*sglist,
         AHCI_SG *tbl = (AHCI_SG *)prdt;
         sum = 0;
         for (i = 0; i < prdtl; i++) {
-            /* flags_size is zero-based */
             tbl_entry_size = prdt_tbl_entry_size(&tbl[i]);
-            if (offset <= (sum + tbl_entry_size)) {
+            if (offset < (sum + tbl_entry_size)) {
                 off_idx = i;
                 off_pos = offset - sum;
                 break;
@@ -901,12 +901,13 @@ static int ahci_populate_sglist(AHCIDevice *ad, 
QEMUSGList *sglist,
         qemu_sglist_init(sglist, qbus->parent, (prdtl - off_idx),
                          ad->hba->as);
         qemu_sglist_add(sglist, le64_to_cpu(tbl[off_idx].addr) + off_pos,
-                        prdt_tbl_entry_size(&tbl[off_idx]) - off_pos);
+                        MIN(prdt_tbl_entry_size(&tbl[off_idx]) - off_pos,
+                            limit));
 
-        for (i = off_idx + 1; i < prdtl; i++) {
-            /* flags_size is zero-based */
+        for (i = off_idx + 1; i < prdtl && sglist->size < limit; i++) {
             qemu_sglist_add(sglist, le64_to_cpu(tbl[i].addr),
-                            prdt_tbl_entry_size(&tbl[i]));
+                            MIN(prdt_tbl_entry_size(&tbl[i]),
+                                limit - sglist->size));
             if (sglist->size > INT32_MAX) {
                 error_report("AHCI Physical Region Descriptor Table describes "
                              "more than 2 GiB.\n");
@@ -1024,8 +1025,8 @@ static void process_ncq_command(AHCIState *s, int port, 
uint8_t *cmd_fis,
 
     ncq_tfs->sector_count = ((uint16_t)ncq_fis->sector_count_high << 8) |
                                 ncq_fis->sector_count_low;
-    ahci_populate_sglist(ad, &ncq_tfs->sglist, 0);
     size = ncq_tfs->sector_count * 512;
+    ahci_populate_sglist(ad, &ncq_tfs->sglist, size, 0);
 
     if (ncq_tfs->sglist.size < size) {
         error_report("ahci: PRDT length for NCQ command (0x%zx) "
@@ -1262,7 +1263,7 @@ static void ahci_start_transfer(IDEDMA *dma)
         goto out;
     }
 
-    if (ahci_dma_prepare_buf(dma, is_write)) {
+    if (ahci_dma_prepare_buf(dma, size)) {
         has_sglist = 1;
     }
 
@@ -1312,12 +1313,12 @@ static void ahci_restart_dma(IDEDMA *dma)
  * Not currently invoked by PIO R/W chains,
  * which invoke ahci_populate_sglist via ahci_start_transfer.
  */
-static int32_t ahci_dma_prepare_buf(IDEDMA *dma, int is_write)
+static int32_t ahci_dma_prepare_buf(IDEDMA *dma, int32_t limit)
 {
     AHCIDevice *ad = DO_UPCAST(AHCIDevice, dma, dma);
     IDEState *s = &ad->port.ifs[0];
 
-    if (ahci_populate_sglist(ad, &s->sg, s->io_buffer_offset) == -1) {
+    if (ahci_populate_sglist(ad, &s->sg, limit, s->io_buffer_offset) == -1) {
         DPRINTF(ad->port_no, "ahci_dma_prepare_buf failed.\n");
         return -1;
     }
@@ -1352,7 +1353,7 @@ static int ahci_dma_rw_buf(IDEDMA *dma, int is_write)
     uint8_t *p = s->io_buffer + s->io_buffer_index;
     int l = s->io_buffer_size - s->io_buffer_index;
 
-    if (ahci_populate_sglist(ad, &s->sg, s->io_buffer_offset)) {
+    if (ahci_populate_sglist(ad, &s->sg, l, s->io_buffer_offset)) {
         return 0;
     }
 
diff --git a/hw/ide/core.c b/hw/ide/core.c
index 1efd98a..be7c350 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -716,8 +716,8 @@ static void ide_dma_cb(void *opaque, int ret)
 
     sector_num = ide_get_sector(s);
     if (n > 0) {
-        assert(s->io_buffer_size == s->sg.size);
-        dma_buf_commit(s, s->io_buffer_size);
+        assert(n * 512 == s->sg.size);
+        dma_buf_commit(s, s->sg.size);
         sector_num += n;
         ide_set_sector(s, sector_num);
         s->nsector -= n;
@@ -734,7 +734,7 @@ static void ide_dma_cb(void *opaque, int ret)
     n = s->nsector;
     s->io_buffer_index = 0;
     s->io_buffer_size = n * 512;
-    if (s->bus->dma->ops->prepare_buf(s->bus->dma, ide_cmd_is_read(s)) < 512) {
+    if (s->bus->dma->ops->prepare_buf(s->bus->dma, s->io_buffer_size) < 512) {
         /* The PRDs were too short. Reset the Active bit, but don't raise an
          * interrupt. */
         s->status = READY_STAT | SEEK_STAT;
@@ -2326,7 +2326,7 @@ static void ide_nop(IDEDMA *dma)
 {
 }
 
-static int32_t ide_nop_int32(IDEDMA *dma, int x)
+static int32_t ide_nop_int32(IDEDMA *dma, int32_t l)
 {
     return 0;
 }
diff --git a/hw/ide/internal.h b/hw/ide/internal.h
index 965cc55..3736e1b 100644
--- a/hw/ide/internal.h
+++ b/hw/ide/internal.h
@@ -324,7 +324,7 @@ typedef void EndTransferFunc(IDEState *);
 typedef void DMAStartFunc(IDEDMA *, IDEState *, BlockCompletionFunc *);
 typedef void DMAVoidFunc(IDEDMA *);
 typedef int DMAIntFunc(IDEDMA *, int);
-typedef int32_t DMAInt32Func(IDEDMA *, int);
+typedef int32_t DMAInt32Func(IDEDMA *, int32_t len);
 typedef void DMAu32Func(IDEDMA *, uint32_t);
 typedef void DMAStopFunc(IDEDMA *, bool);
 typedef void DMARestartFunc(void *, int, RunState);
diff --git a/hw/ide/macio.c b/hw/ide/macio.c
index dd52d50..a55a479 100644
--- a/hw/ide/macio.c
+++ b/hw/ide/macio.c
@@ -499,7 +499,7 @@ static int ide_nop_int(IDEDMA *dma, int x)
     return 0;
 }
 
-static int32_t ide_nop_int32(IDEDMA *dma, int x)
+static int32_t ide_nop_int32(IDEDMA *dma, int32_t l)
 {
     return 0;
 }
diff --git a/hw/ide/pci.c b/hw/ide/pci.c
index 4afd0cf..d31ff88 100644
--- a/hw/ide/pci.c
+++ b/hw/ide/pci.c
@@ -53,10 +53,14 @@ static void bmdma_start_dma(IDEDMA *dma, IDEState *s,
 }
 
 /**
- * Return the number of bytes successfully prepared.
- * -1 on error.
+ * Prepare an sglist based on available PRDs.
+ * @limit: How many bytes to prepare total.
+ *
+ * Returns the number of bytes prepared, -1 on error.
+ * IDEState.io_buffer_size will contain the number of bytes described
+ * by the PRDs, whether or not we added them to the sglist.
  */
-static int32_t bmdma_prepare_buf(IDEDMA *dma, int is_write)
+static int32_t bmdma_prepare_buf(IDEDMA *dma, int32_t limit)
 {
     BMDMAState *bm = DO_UPCAST(BMDMAState, dma, dma);
     IDEState *s = bmdma_active_if(bm);
@@ -75,7 +79,7 @@ static int32_t bmdma_prepare_buf(IDEDMA *dma, int is_write)
             /* end of table (with a fail safe of one page) */
             if (bm->cur_prd_last ||
                 (bm->cur_addr - bm->addr) >= BMDMA_PAGE_SIZE) {
-                return s->io_buffer_size;
+                return s->sg.size;
             }
             pci_dma_read(pci_dev, bm->cur_addr, &prd, 8);
             bm->cur_addr += 8;
@@ -90,7 +94,14 @@ static int32_t bmdma_prepare_buf(IDEDMA *dma, int is_write)
         }
         l = bm->cur_prd_len;
         if (l > 0) {
-            qemu_sglist_add(&s->sg, bm->cur_prd_addr, l);
+            uint64_t sg_len;
+
+            /* Don't add extra bytes to the SGList; consume any remaining
+             * PRDs from the guest, but ignore them. */
+            sg_len = MIN(limit - s->sg.size, bm->cur_prd_len);
+            if (sg_len) {
+                qemu_sglist_add(&s->sg, bm->cur_prd_addr, sg_len);
+            }
 
             /* Note: We limit the max transfer to be 2GiB.
              * This should accommodate the largest ATA transaction
-- 
2.1.0




reply via email to

[Prev in Thread] Current Thread [Next in Thread]