[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [PATCH 11/11] QEMU NVMe: Support NVMe DIF and Meta-data
From: |
Keith Busch |
Subject: |
[Qemu-devel] [PATCH 11/11] QEMU NVMe: Support NVMe DIF and Meta-data |
Date: |
Tue, 26 Feb 2013 17:47:14 -0700 |
This only works with separate buffer meta-data, i.e. non-interleaved
meta-data. The sgl method requires block multiple transfers, which may
not happen with the interleaved meta-data, so more work needed to double
buffer. I don't think there is any use for the interleaved meta-data
anyway from a host driver perspected, except for perhaps controller
PRACT bit set to 1.
Signed-off-by: Keith Busch <address@hidden>
---
hw/nvme.c | 72 +++++++++++++++++++++++++++++++++++++++++++++++++-----------
hw/nvme.h | 3 ++
2 files changed, 61 insertions(+), 14 deletions(-)
diff --git a/hw/nvme.c b/hw/nvme.c
index aec1ef7..8260e7c 100644
--- a/hw/nvme.c
+++ b/hw/nvme.c
@@ -59,7 +59,7 @@ enum {
};
#define DBGBIT(x) (1 << x)
-int debug_flags = DBGBIT(IO_DBG) | DBGBIT(DBG) | DBGBIT(INFO) | DBGBIT(ERR) |
DBGBIT(ADM_DBG);
+int debug_flags = DBGBIT(INFO) | DBGBIT(ERR) | DBGBIT(ADM_DBG);
#define NVME_LOG(level, fmt, ...) \
do {\
if (debug_flags & DBGBIT(level)) { \
@@ -460,14 +460,32 @@ static void nvme_rw_cb(void *opaque, int ret)
if (!req->rw) {
nvme_update_ns_util(ns, req->slba, req->nlb);
}
-
if (!ret) {
req->cqe.status = NVME_SUCCESS << 1;
+ if (req->meta_size) {
+ uint8_t lba_index = NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas);
+ int64_t offset = ns->meta_start_block * BDRV_SECTOR_SIZE +
+ req->slba * ns->id_ns.lbaf[lba_index].ms;
+ void *buf = g_malloc(req->meta_size);
+ if (req->rw) {
+ ret = bdrv_pread(n->conf.bs, offset, buf, req->meta_size);
+ pci_dma_write(&n->dev, req->mptr, buf, req->meta_size);
+ } else {
+ pci_dma_read(&n->dev, req->mptr, buf, req->meta_size);
+ ret = bdrv_pwrite(n->conf.bs, offset, buf, req->meta_size);
+ }
+ if (ret != req->meta_size) {
+ NVME_LOG(ERR, "meta-data transfer error ret:%d", ret);
+ req->cqe.status = NVME_INTERNAL_DEV_ERROR << 1;
+ }
+ g_free(buf);
+ }
} else {
NVME_LOG(ERR, "nsid:%u nlb:%u slba:%"PRIu64" ret:%d", ns->id, req->nlb,
req->slba, ret);
req->cqe.status = NVME_INTERNAL_DEV_ERROR << 1;
}
+
nvme_enqueue_req_completion(cq, req);
}
@@ -478,6 +496,7 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns,
NvmeCmd *cmd,
uint8_t lba_index = NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas);
uint8_t data_shift = ns->id_ns.lbaf[lba_index].ds;
uint64_t data_size = (rw->nlb + 1) << data_shift;
+ uint64_t meta_size = (rw->nlb + 1) * ns->id_ns.lbaf[lba_index].ms;
int data_dir = rw->opcode == NVME_CMD_WRITE ? 0 : 1;
uint16_t ret;
@@ -498,6 +517,13 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns,
NvmeCmd *cmd,
offsetof(NvmeRwCmd, nlb), rw->slba + rw->nlb, ns->id);
return NVME_INVALID_FIELD | NVME_DNR;
}
+ if (meta_size && !rw->mptr &&
+ !NVME_ID_NS_FLBAS_EXTENDED(ns->id_ns.flbas)) {
+ NVME_LOG(ERR, "meta-data pointer required for namespace format");
+ nvme_set_error_page(n, req->sq->id, cmd->cid, NVME_INVALID_FIELD,
+ offsetof(NvmeRwCmd, mptr), rw->slba + rw->nlb, ns->id);
+ return NVME_INVALID_FIELD | NVME_DNR;
+ }
ret = nvme_map_prp(&req->qsg, rw->prp1, rw->prp2, data_size, n);
if (ret == NVME_SUCCESS) {
@@ -505,6 +531,8 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns,
NvmeCmd *cmd,
uint32_t nlb = (rw->nlb + 1) << (data_shift - 9);
assert(nlb * BDRV_SECTOR_SIZE == req->qsg.size);
+ req->meta_size = meta_size;
+ req->mptr = rw->mptr;
req->slba = rw->slba;
req->nlb = rw->nlb;
req->ns = ns;
@@ -512,6 +540,7 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns,
NvmeCmd *cmd,
req->aiocb = data_dir ?
dma_bdrv_read(n->conf.bs, &req->qsg, slba, nvme_rw_cb, req) :
dma_bdrv_write(n->conf.bs, &req->qsg, slba, nvme_rw_cb, req);
+
NVME_LOG(IO_DBG,
"req:%p aiocb:%p ns:%u sq:%u cid:%x nlb:%u slba:%"PRIu64" "\
"aio slba:%"PRIu64" aio nlb:%u", req, req->aiocb, ns->id,
@@ -1228,6 +1257,8 @@ static uint16_t nvme_abort_req(NvmeCtrl *n, NvmeCmd *cmd,
uint32_t *result)
}
sq = n->sq[sqid];
+ /* scan the queue for work that hasn't been processed yet.
+ * TODO: scan the outstanding work and issue an aio cancel */
while ((sq->head + index) % sq->size != sq->tail) {
NvmeCmd abort_cmd;
hwaddr addr;
@@ -1315,8 +1346,7 @@ static uint16_t nvme_format_namespace(NvmeNamespace *ns,
uint8_t lba_idx,
ns->util = bitmap_new(ns->id_ns.nsze);
if (sec_erase) {
- /* TODO: write zeros, complete asynchronously */
- ;
+ /* TODO: write zeros, complete asynchronously */ ;
}
return NVME_SUCCESS;
@@ -1427,7 +1457,6 @@ static void nvme_sq_process(void *opaque)
req->aiocb = NULL;
req->aiocb_dsm = NULL;
- printf("about to call nvme command handler\n");
status = sq->id ? nvme_io_cmd(n, &cmd, req) :
nvme_admin_cmd(n, &cmd, req);
if (status != NVME_NO_COMPLETE) {
@@ -1714,7 +1743,7 @@ static int nvme_init(PCIDevice *pci_dev)
NvmeIdCtrl *id = &n->id_ctrl;
uint8_t *pci_conf;
int64_t bs_size;
- int i, j;
+ int i, j, k;
NVME_LOG(DBG, "new controller B:D.f: %02x:%02x.%u",
pci_bus_num(pci_dev->bus), PCI_SLOT(pci_dev->devfn),
@@ -1757,7 +1786,7 @@ static int nvme_init(PCIDevice *pci_dev)
NVME_LOG(ERR, "requested invalid volatile write cache:%u", n->vwc);
return -1;
}
- if (n->lba_index > 3) {
+ if (n->lba_index > 15) {
NVME_LOG(ERR, "requested invalid lba index:%u", n->lba_index);
return -1;
}
@@ -1854,18 +1883,34 @@ static int nvme_init(PCIDevice *pci_dev)
NVME_LOG(DBG, "ctrl:%u cap:%016lx", n->instance, n->bar.cap);
for (i = 0; i < n->num_namespaces; i++) {
+ uint64_t blks;
NvmeNamespace *ns = &n->namespaces[i];
NvmeIdNs *id_ns = &ns->id_ns;
- id_ns->nlbaf = 0x4;
+ id_ns->nlbaf = 0x7;
id_ns->flbas = n->lba_index;
-
- for (j = 0; j <= id_ns->nlbaf; j++) {
- id_ns->lbaf[j].ds = 9 + j;
+ id_ns->mc = 1 << 1;
+ id_ns->dps = DPS_TYPE_1;
+ id_ns->dpc = 1 << 3 | 1;
+ for (j = 0; j < 2; j++) {
+ for (k = 0; k <= id_ns->nlbaf; k++) {
+ id_ns->lbaf[k + j * (id_ns->nlbaf)].ds = 9 + k;
+ id_ns->lbaf[k + j * (id_ns->nlbaf)].ms = j * n->meta;
+ }
}
- id_ns->ncap = id_ns->nsze = (n->ns_size) >> id_ns->lbaf[j].ds;
+
+ blks = n->ns_size / ((1 << id_ns->lbaf[n->lba_index].ds) +
+ id_ns->lbaf[n->lba_index].ms);
+ id_ns->ncap = id_ns->nsze = blks;
ns->id = i + 1;
ns->ctrl = n;
- ns->start_block = (n->ns_size / BDRV_SECTOR_SIZE) * i;
+ ns->start_block = (n->ns_size / BDRV_SECTOR_SIZE +
+ n->meta * id_ns->nsze) * i;
+ if (n->meta) {
+ id_ns->nlbaf = 0xf;
+ ns->meta_start_block = ns->start_block +
+ (id_ns->nsze << id_ns->lbaf[n->lba_index].ds) /
+ BDRV_SECTOR_SIZE;
+ }
ns->util = bitmap_new(id_ns->nsze);
}
@@ -1875,7 +1920,6 @@ static int nvme_init(PCIDevice *pci_dev)
"\tfile size:%"PRIu64"", instance, n->num_namespaces, n->ns_size,
n->num_queues, n->db_stride, n->reg_size, n->max_q_ents, n->cqr,
n->mdts, n->aerl, n->acl, n->elpe, bdrv_getlength(n->conf.bs));
-
return 0;
}
diff --git a/hw/nvme.h b/hw/nvme.h
index 926f843..5264232 100644
--- a/hw/nvme.h
+++ b/hw/nvme.h
@@ -596,6 +596,8 @@ typedef struct NvmeRequest {
uint16_t rw;
uint16_t nlb;
uint16_t aio_count;
+ uint32_t meta_size;
+ uint64_t mptr;
NvmeCqe cqe;
QEMUSGList qsg;
QTAILQ_ENTRY(NvmeRequest)entry;
@@ -651,6 +653,7 @@ typedef struct NvmeNamespace {
unsigned long *util;
unsigned long *uncorrectable;
uint64_t start_block;
+ uint64_t meta_start_block;
} NvmeNamespace;
typedef struct NvmeCtrl {
--
1.7.0.4
- [Qemu-devel] [PATCH 02/11] QEMU NVMe: Add command line options, (continued)
- [Qemu-devel] [PATCH 02/11] QEMU NVMe: Add command line options, Keith Busch, 2013/02/26
- [Qemu-devel] [PATCH 06/11] QEMU NVMe: Implement flush and dsm, Keith Busch, 2013/02/26
- [Qemu-devel] [PATCH 07/11] QEMU NVMe: Set error pages with error data, Keith Busch, 2013/02/26
- [Qemu-devel] [PATCH 05/11] QEMU NVMe: Add DSM command support, Keith Busch, 2013/02/26
- [Qemu-devel] [PATCH 09/11] QEMU NVMe: Implement discontiguous queues, Keith Busch, 2013/02/26
- [Qemu-devel] [PATCH 04/11] QEMU NVMe: Implement additional admin commands, Keith Busch, 2013/02/26
- [Qemu-devel] [PATCH 10/11] QEMU NVMe: Add logging, Keith Busch, 2013/02/26
- [Qemu-devel] [PATCH 11/11] QEMU NVMe: Support NVMe DIF and Meta-data,
Keith Busch <=
- [Qemu-devel] [PATCH 01/11] NVMe: Initial commit for NVM Express device, Keith Busch, 2013/02/26
Re: [Qemu-devel] [PATCH 00/11] *** SUBJECT HERE ***, Stefan Hajnoczi, 2013/02/27