qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [PATCH v2 4/7] md: add hot-plug and hot-unplug support


From: Liu Yuan
Subject: [Qemu-devel] [PATCH v2 4/7] md: add hot-plug and hot-unplug support
Date: Fri, 29 Mar 2013 13:42:21 +0800

From: Liu Yuan <address@hidden>

We allow group plug, group unplug and disks failure during (un)plugging.

Also add disk information function for collie.

Signed-off-by: Liu Yuan <address@hidden>
---
 collie/collie.c          |    2 +-
 include/internal_proto.h |   16 +++
 include/sheepdog_proto.h |    2 +
 sheep/md.c               |  263 ++++++++++++++++++++++++++++++++--------------
 sheep/ops.c              |   45 ++++++++
 sheep/sheep_priv.h       |    5 +-
 sheep/store.c            |    3 +-
 7 files changed, 253 insertions(+), 83 deletions(-)

diff --git a/collie/collie.c b/collie/collie.c
index 08c78eb..19085b4 100644
--- a/collie/collie.c
+++ b/collie/collie.c
@@ -19,7 +19,7 @@
 #include "util.h"
 
 static const char program_name[] = "collie";
-const char *sdhost = "localhost";
+const char *sdhost = "127.0.0.1";
 int sdport = SD_LISTEN_PORT;
 bool highlight = true;
 bool raw_output;
diff --git a/include/internal_proto.h b/include/internal_proto.h
index 6f1fdb3..c43855b 100644
--- a/include/internal_proto.h
+++ b/include/internal_proto.h
@@ -69,6 +69,9 @@
 #define SD_OP_FLUSH_PEER 0xAE
 #define SD_OP_NOTIFY_VDI_ADD  0xAF
 #define SD_OP_DELETE_CACHE    0xB0
+#define SD_OP_MD_INFO   0xB1
+#define SD_OP_MD_PLUG   0xB2
+#define SD_OP_MD_UNPLUG 0xB3
 
 /* internal flags for hdr.flags, must be above 0x80 */
 #define SD_FLAG_CMD_RECOVERY 0x0080
@@ -229,4 +232,17 @@ struct vdi_op_message {
        uint8_t data[0];
 };
 
+struct md_info {
+       int idx;
+       uint64_t size;
+       uint64_t used;
+       char path[PATH_MAX];
+};
+
+#define MD_MAX_DISK 64 /* FIXME remove roof and make it dynamic */
+struct sd_md_info {
+       struct md_info disk[MD_MAX_DISK];
+       int nr;
+};
+
 #endif /* __INTERNAL_PROTO_H__ */
diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h
index fe3738b..94baede 100644
--- a/include/sheepdog_proto.h
+++ b/include/sheepdog_proto.h
@@ -13,6 +13,8 @@
 
 #include <inttypes.h>
 #include <stdint.h>
+#include <linux/limits.h>
+
 #include "util.h"
 
 #define SD_PROTO_VER 0x02
diff --git a/sheep/md.c b/sheep/md.c
index 821a391..124f2ba 100644
--- a/sheep/md.c
+++ b/sheep/md.c
@@ -21,11 +21,12 @@
 #include <sys/xattr.h>
 #include <dirent.h>
 #include <pthread.h>
+#include <string.h>
 
 #include "sheep_priv.h"
+#include "util.h"
 
 #define MD_DEFAULT_VDISKS 128
-#define MD_MAX_DISK 64 /* FIXME remove roof and make it dynamic */
 #define MD_MAX_VDISK (MD_MAX_DISK * MD_DEFAULT_VDISKS)
 
 struct disk {
@@ -123,20 +124,33 @@ static inline struct vdisk *oid_to_vdisk(uint64_t oid)
        return oid_to_vdisk_from(md_vds, md_nr_vds, oid);
 }
 
-int md_init_disk(char *path)
+static int path_to_disk_idx(char *path)
 {
+       int i;
+
+       for (i = 0; i < md_nr_disks; i++)
+               if (strcmp(md_disks[i].path, path) == 0)
+                       return i;
+
+       return -1;
+}
+
+void md_add_disk(char *path)
+{
+       if (path_to_disk_idx(path) != -1) {
+               sd_eprintf("duplicate path %s", path);
+               return;
+       }
+
        md_nr_disks++;
 
-       if (xmkdir(path, def_dmode) < 0)
-                       panic("%s, %m", path);
        pstrcpy(md_disks[md_nr_disks - 1].path, PATH_MAX, path);
-       sd_iprintf("%s added to md, nr %d", md_disks[md_nr_disks - 1].path,
+       sd_iprintf("%s, nr %d", md_disks[md_nr_disks - 1].path,
                   md_nr_disks);
-       return 0;
 }
 
 static inline void calculate_vdisks(struct disk *disks, int nr_disks,
-                            uint64_t total)
+                                   uint64_t total)
 {
        uint64_t avg_size = total / nr_disks;
        float factor;
@@ -154,6 +168,79 @@ static inline void calculate_vdisks(struct disk *disks, 
int nr_disks,
 #define MDNAME "user.md.size"
 #define MDSIZE sizeof(uint64_t)
 
+static int get_total_object_size(uint64_t oid, char *ignore, void *total)
+{
+       uint64_t *t = total;
+       *t += get_objsize(oid);
+
+       return SD_RES_SUCCESS;
+}
+
+/* If cleanup is true, temporary objects will be removed */
+static int for_each_object_in_path(char *path,
+                                  int (*func)(uint64_t, char *, void *),
+                                  bool cleanup, void *arg)
+{
+       DIR *dir;
+       struct dirent *d;
+       uint64_t oid;
+       int ret = SD_RES_SUCCESS;
+       char p[PATH_MAX];
+
+       dir = opendir(path);
+       if (!dir) {
+               sd_eprintf("failed to open %s, %m", path);
+               return SD_RES_EIO;
+       }
+
+       while ((d = readdir(dir))) {
+               if (!strncmp(d->d_name, ".", 1))
+                       continue;
+
+               oid = strtoull(d->d_name, NULL, 16);
+               if (oid == 0 || oid == ULLONG_MAX)
+                       continue;
+
+               /* don't call callback against temporary objects */
+               if (strlen(d->d_name) == 20 &&
+                   strcmp(d->d_name + 16, ".tmp") == 0) {
+                       if (cleanup) {
+                               snprintf(p, PATH_MAX, "%s/%016"PRIx64".tmp",
+                                        path, oid);
+                               sd_dprintf("remove tmp object %s", p);
+                               unlink(p);
+                       }
+                       continue;
+               }
+
+               ret = func(oid, path, arg);
+               if (ret != SD_RES_SUCCESS)
+                       break;
+       }
+       closedir(dir);
+       return ret;
+}
+
+static uint64_t get_path_size(char *path, uint64_t *used)
+{
+       struct statvfs fs;
+       uint64_t size;
+
+       if (statvfs(path, &fs) < 0) {
+               sd_eprintf("get disk %s space failed %m", path);
+               return 0;
+       }
+       size = (int64_t)fs.f_frsize * fs.f_bfree;
+
+       if (!used)
+               goto out;
+       if (for_each_object_in_path(path, get_total_object_size, false, used)
+           != SD_RES_SUCCESS)
+               return 0;
+out:
+       return size;
+}
+
 /*
  * If path is broken during initilization or not support xattr return 0. We can
  * safely use 0 to represent failure case  because 0 space path can be
@@ -161,9 +248,13 @@ static inline void calculate_vdisks(struct disk *disks, 
int nr_disks,
  */
 static uint64_t init_path_space(char *path)
 {
-       struct statvfs fs;
        uint64_t size;
 
+       if (xmkdir(path, def_dmode) < 0) {
+               sd_eprintf("%s, %m", path);
+               goto broken_path;
+       }
+
        if (!is_xattr_enabled(path)) {
                sd_iprintf("multi-disk support need xattr feature");
                goto broken_path;
@@ -180,11 +271,9 @@ static uint64_t init_path_space(char *path)
 
        return size;
 create:
-       if (statvfs(path, &fs) < 0) {
-               sd_eprintf("get disk %s space failed %m", path);
+       size = get_path_size(path, NULL);
+       if (!size)
                goto broken_path;
-       }
-       size = (int64_t)fs.f_frsize * fs.f_bfree;
        if (setxattr(path, MDNAME, &size, MDSIZE, 0) < 0) {
                sd_eprintf("%s, %m", path);
                goto broken_path;
@@ -229,7 +318,8 @@ reinit:
        }
        calculate_vdisks(md_disks, md_nr_disks, total);
        md_nr_vds = disks_to_vdisks(md_disks, md_nr_disks, md_vds);
-       sys->enable_md = true;
+       if (!sys->enable_md)
+               sys->enable_md = true;
 
        return total;
 }
@@ -259,51 +349,6 @@ static char *get_object_path_nolock(uint64_t oid)
        return md_disks[vd->idx].path;
 }
 
-/* If cleanup is true, temporary objects will be removed */
-static int for_each_object_in_path(char *path,
-                                  int (*func)(uint64_t, char *, void *),
-                                  bool cleanup, void *arg)
-{
-       DIR *dir;
-       struct dirent *d;
-       uint64_t oid;
-       int ret = SD_RES_SUCCESS;
-       char p[PATH_MAX];
-
-       dir = opendir(path);
-       if (!dir) {
-               sd_eprintf("failed to open %s, %m", path);
-               return SD_RES_EIO;
-       }
-
-       while ((d = readdir(dir))) {
-               if (!strncmp(d->d_name, ".", 1))
-                       continue;
-
-               oid = strtoull(d->d_name, NULL, 16);
-               if (oid == 0 || oid == ULLONG_MAX)
-                       continue;
-
-               /* don't call callback against temporary objects */
-               if (strlen(d->d_name) == 20 &&
-                   strcmp(d->d_name + 16, ".tmp") == 0) {
-                       if (cleanup) {
-                               snprintf(p, PATH_MAX, "%s/%016"PRIx64".tmp",
-                                        path, oid);
-                               sd_dprintf("remove tmp object %s", p);
-                               unlink(p);
-                       }
-                       continue;
-               }
-
-               ret = func(oid, path, arg);
-               if (ret != SD_RES_SUCCESS)
-                       break;
-       }
-       closedir(dir);
-       return ret;
-}
-
 int for_each_object_in_wd(int (*func)(uint64_t oid, char *path, void *arg),
                          bool cleanup, void *arg)
 {
@@ -345,17 +390,6 @@ struct md_work {
        char path[PATH_MAX];
 };
 
-static int path_to_disk_idx(char *path)
-{
-       int i;
-
-       for (i = 0; i < md_nr_disks; i++)
-               if (strcmp(md_disks[i].path, path) == 0)
-                       return i;
-
-       return -1;
-}
-
 static inline void kick_recover(void)
 {
        struct vnode_info *vinfo = get_vnode_info();
@@ -364,15 +398,6 @@ static inline void kick_recover(void)
        put_vnode_info(vinfo);
 }
 
-static void unplug_disk(int idx)
-{
-
-       remove_disk(idx);
-       sys->disk_space = md_init_space();
-       if (md_nr_disks > 0)
-               kick_recover();
-}
-
 static void md_do_recover(struct work *work)
 {
        struct md_work *mw = container_of(work, struct md_work, work);
@@ -383,7 +408,10 @@ static void md_do_recover(struct work *work)
        if (idx < 0)
                /* Just ignore the duplicate EIO of the same path */
                goto out;
-       unplug_disk(idx);
+       remove_disk(idx);
+       sys->disk_space = md_init_space();
+       if (md_nr_disks > 0)
+               kick_recover();
 out:
        pthread_rwlock_unlock(&md_lock);
        free(mw);
@@ -500,3 +528,80 @@ int md_get_stale_path(uint64_t oid, uint32_t epoch, char 
*path)
 
        return SD_RES_NO_OBJ;
 }
+
+uint32_t md_get_info(struct sd_md_info *info)
+{
+       uint32_t ret = sizeof(*info);
+       int i;
+
+       memset(info, 0, ret);
+       pthread_rwlock_rdlock(&md_lock);
+       for (i = 0; i < md_nr_disks; i++) {
+               info->disk[i].idx = i;
+               pstrcpy(info->disk[i].path, PATH_MAX, md_disks[i].path);
+               info->disk[i].size = get_path_size(info->disk[i].path,
+                                                  &info->disk[i].used);
+               if (!info->disk[i].size) {
+                       ret = 0;
+                       break;
+               }
+       }
+       info->nr = md_nr_disks;
+       pthread_rwlock_unlock(&md_lock);
+       return ret;
+}
+
+static inline void md_del_disk(char *path)
+{
+       int idx = path_to_disk_idx(path);
+
+       if (idx < 0) {
+               sd_eprintf("invalid path %s", path);
+               return;
+       }
+       remove_disk(idx);
+}
+
+static int do_plug_unplug(char *disks, bool plug)
+{
+       char *path;
+       int old_nr, ret = SD_RES_UNKNOWN;
+
+       pthread_rwlock_wrlock(&md_lock);
+       old_nr = md_nr_disks;
+       path = strtok(disks, ",");
+       do {
+               if (plug)
+                       md_add_disk(path);
+               else
+                       md_del_disk(path);
+       } while ((path = strtok(NULL, ",")));
+
+       /* If no disks change, bail out */
+       if (old_nr == md_nr_disks)
+               goto out;
+
+       sys->disk_space = md_init_space();
+       /*
+        * We have to kick recover aggressively because there is possibility
+        * that nr of disks are removed during md_init_space() happens to equal
+        * nr of disks we added.
+        */
+       if (md_nr_disks > 0)
+               kick_recover();
+
+       ret = SD_RES_SUCCESS;
+out:
+       pthread_rwlock_unlock(&md_lock);
+       return ret;
+}
+
+int md_plug_disks(char *disks)
+{
+       return do_plug_unplug(disks, true);
+}
+
+int md_unplug_disks(char *disks)
+{
+       return do_plug_unplug(disks, false);
+}
diff --git a/sheep/ops.c b/sheep/ops.c
index 8cba70d..3839437 100644
--- a/sheep/ops.c
+++ b/sheep/ops.c
@@ -667,6 +667,33 @@ static int local_set_cache_size(const struct sd_req *req, 
struct sd_rsp *rsp,
        return SD_RES_SUCCESS;
 }
 
+static int local_md_info(struct request *request)
+{
+       struct sd_rsp *rsp = &request->rp;
+       struct sd_req *req = &request->rq;
+
+       assert(req->data_length == sizeof(struct sd_md_info));
+       rsp->data_length = md_get_info((struct sd_md_info *)request->data);
+
+       return rsp->data_length ? SD_RES_SUCCESS : SD_RES_UNKNOWN;
+}
+
+static int local_md_plug(const struct sd_req *req, struct sd_rsp *rsp,
+                        void *data)
+{
+       char *disks = (char *)data;
+
+       return md_plug_disks(disks);
+}
+
+static int local_md_unplug(const struct sd_req *req, struct sd_rsp *rsp,
+                          void *data)
+{
+       char *disks = (char *)data;
+
+       return md_unplug_disks(disks);
+}
+
 static int cluster_restore(const struct sd_req *req, struct sd_rsp *rsp,
                           void *data)
 {
@@ -1110,6 +1137,24 @@ static struct sd_op_template sd_ops[] = {
                .process_main = local_set_cache_size,
        },
 
+       [SD_OP_MD_INFO] = {
+               .name = "MD_INFO",
+               .type = SD_OP_TYPE_LOCAL,
+               .process_work = local_md_info,
+       },
+
+       [SD_OP_MD_PLUG] = {
+               .name = "MD_PLUG_DISKS",
+               .type = SD_OP_TYPE_LOCAL,
+               .process_main = local_md_plug,
+       },
+
+       [SD_OP_MD_UNPLUG] = {
+               .name = "MD_UNPLUG_DISKS",
+               .type = SD_OP_TYPE_LOCAL,
+               .process_main = local_md_unplug,
+       },
+
        /* gateway I/O operations */
        [SD_OP_CREATE_AND_WRITE_OBJ] = {
                .name = "CREATE_AND_WRITE_OBJ",
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index 652fd3a..098a7bb 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -417,11 +417,14 @@ int journal_file_init(const char *path, size_t size, bool 
skip);
 int journal_file_write(uint64_t oid, const char *buf, size_t size, off_t, 
bool);
 
 /* md.c */
-int md_init_disk(char *path);
+void md_add_disk(char *path);
 uint64_t md_init_space(void);
 char *get_object_path(uint64_t oid);
 int md_handle_eio(char *);
 bool md_exist(uint64_t oid);
 int md_get_stale_path(uint64_t oid, uint32_t epoch, char *path);
+uint32_t md_get_info(struct sd_md_info *info);
+int md_plug_disks(char *disks);
+int md_unplug_disks(char *disks);
 
 #endif
diff --git a/sheep/store.c b/sheep/store.c
index 58303fa..cbf24dc 100644
--- a/sheep/store.c
+++ b/sheep/store.c
@@ -269,8 +269,7 @@ static int init_obj_path(const char *base_path, char *argp)
        /* Eat up the first component */
        strtok(argp, ",");
        while ((p = strtok(NULL, ",")))
-               if (md_init_disk(p) < 0)
-                       return -1;
+               md_add_disk(p);
 
        return init_path(obj_path, NULL);
 }
-- 
1.7.9.5




reply via email to

[Prev in Thread] Current Thread [Next in Thread]