Patchwork [BUG:1562] Restart all bricks which are down when glusterd comes up

login
register
Submitter shishir gowda
Date 2010-09-22 10:51:28
Message ID <20100922105128.GA4035@dev.gluster.com>
Download mbox | patch
Permalink /patch/4927/
State Accepted
Headers show

Comments

shishir gowda - 2010-09-22 10:51:28
This is only done is the volume is started.

Signed-off-by: shishir gowda <shishirng@gluster.com>
---
 xlators/mgmt/glusterd/src/glusterd-pmap.c  |   16 ++++-
 xlators/mgmt/glusterd/src/glusterd-pmap.h  |    4 +-
 xlators/mgmt/glusterd/src/glusterd-utils.c |  109 ++++++++++++++++++++++++++++
 xlators/mgmt/glusterd/src/glusterd-utils.h |   13 ++++
 xlators/mgmt/glusterd/src/glusterd.c       |    3 +-
 xlators/mgmt/glusterd/src/glusterd.h       |   10 +++
 6 files changed, 150 insertions(+), 5 deletions(-)

Patch

diff --git a/xlators/mgmt/glusterd/src/glusterd-pmap.c b/xlators/mgmt/glusterd/src/glusterd-pmap.c
index aaa6ed1..1e72b69 100644
--- a/xlators/mgmt/glusterd/src/glusterd-pmap.c
+++ b/xlators/mgmt/glusterd/src/glusterd-pmap.c
@@ -399,6 +399,8 @@  gluster_pmap_signin (rpcsvc_request_t *req)
 {
         pmap_signin_req    args = {0,};
         pmap_signin_rsp    rsp  = {0,};
+        glusterd_brickinfo_t *brickinfo = NULL;
+        int                ret = -1;
 
         if (xdr_to_glusterfs_req (req, &args, xdr_to_pmap_signin_req)) {
                 req->rpc_err = GARBAGE_ARGS;
@@ -408,6 +410,11 @@  gluster_pmap_signin (rpcsvc_request_t *req)
         rsp.op_ret = pmap_registry_bind (THIS, args.port, args.brick,
                                          GF_PMAP_PORT_BRICKSERVER, req->trans);
 
+        ret = glusterd_get_brickinfo (THIS, args.brick, args.port, _gf_true,
+                                      &brickinfo);
+        if (!ret)
+                glusterd_set_brick_status (brickinfo, GF_BRICK_STARTED); 
+
 fail:
         glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
                                (gd_serialize_t)xdr_from_pmap_signin_rsp);
@@ -424,7 +431,8 @@  gluster_pmap_signout (rpcsvc_request_t *req)
 {
         pmap_signout_req    args = {0,};
         pmap_signout_rsp    rsp  = {0,};
-
+        int                 ret = -1;
+        glusterd_brickinfo_t *brickinfo = NULL;
 
         if (xdr_to_glusterfs_req (req, &args, xdr_to_pmap_signout_req)) {
                 //failed to decode msg;
@@ -435,6 +443,11 @@  gluster_pmap_signout (rpcsvc_request_t *req)
         rsp.op_ret = pmap_registry_remove (THIS, args.port, args.brick,
                                            GF_PMAP_PORT_BRICKSERVER, req->trans);
 
+        ret = glusterd_get_brickinfo (THIS, args.brick, args.port, _gf_true, 
+                                      &brickinfo);
+        if (!ret)
+                glusterd_set_brick_status (brickinfo, GF_BRICK_STOPPED);
+
 fail:
         glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
                                (gd_serialize_t)xdr_from_pmap_signout_rsp);
@@ -444,7 +457,6 @@  fail:
         return 0;
 }
 
-
 rpcsvc_actor_t gluster_pmap_actors[] = {
         [GF_PMAP_NULL] = {"NULL", GF_HNDSK_NULL, NULL, NULL, NULL },
         [GF_PMAP_PORTBYBRICK] = {"PORTBYBRICK", GF_PMAP_PORTBYBRICK,
diff --git a/xlators/mgmt/glusterd/src/glusterd-pmap.h b/xlators/mgmt/glusterd/src/glusterd-pmap.h
index a021da0..fcad50d 100644
--- a/xlators/mgmt/glusterd/src/glusterd-pmap.h
+++ b/xlators/mgmt/glusterd/src/glusterd-pmap.h
@@ -55,5 +55,7 @@  int pmap_registry_bind (xlator_t *this, int port, const char *brickname,
                         gf_pmap_port_type_t type, void *xprt);
 int pmap_registry_remove (xlator_t *this, int port, const char *brickname,
                           gf_pmap_port_type_t type, void *xprt);
-
+int
+pmap_registry_search (xlator_t *this, const char *brickname,
+                      gf_pmap_port_type_t type);
 #endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
index e359289..a59a069 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
@@ -41,6 +41,7 @@ 
 #include "glusterd-utils.h"
 #include "glusterd-store.h"
 #include "glusterd-volgen.h"
+#include "glusterd-pmap.h"
 
 #include <sys/resource.h>
 #include <inttypes.h>
@@ -1750,3 +1751,111 @@  glusterd_is_exisiting_brick (char *hostname, char *path)
 out:
         return ret;
 }
+
+int
+glusterd_restart_bricks (glusterd_conf_t *conf, xlator_t *this)
+{
+        glusterd_volinfo_t       *volinfo = NULL;
+        glusterd_brickinfo_t     *brickinfo = NULL;
+        char                     pidfile[PATH_MAX] = {0,};
+        char                     path[PATH_MAX] = {0,};
+        int                      ret = -1;
+        struct stat              stbuf = {0,};
+        struct                   timespec timeout;
+        sigset_t                 mask;
+
+        if (sigprocmask(SIG_BLOCK, &mask, NULL) < 0) {
+                perror ("sigprocmask");
+                return -1;
+        }
+
+        sigemptyset (&mask);
+
+        timeout.tv_sec = 5;
+        timeout.tv_nsec = 0;
+
+        sigtimedwait(&mask, NULL, &timeout);
+        GF_ASSERT (conf);
+        GF_ASSERT (this);
+
+        list_for_each_entry (volinfo, &conf->volumes, vol_list) {
+                //If volume status is not started, do not proceed
+                if (volinfo->status == GLUSTERD_STATUS_STARTED) {
+                        list_for_each_entry (brickinfo, &volinfo->bricks,
+                                             brick_list) {
+                               //Only bricks on localhost to started
+                                if (glusterd_is_local_addr (brickinfo->hostname))
+                                        continue;
+                               //if started, implies already registered with pmap
+                                if (!glusterd_is_brick_started(brickinfo))
+                                        continue;
+                                GLUSTERD_GET_VOLUME_DIR (path, volinfo, conf);
+                               GLUSTERD_GET_BRICK_PIDFILE (pidfile, path,
+                                        brickinfo->hostname, brickinfo->path);
+                                ret = stat (pidfile, &stbuf);
+                                //pid file not found, proceed to start
+                                if (ret && errno == ENOENT) {
+                                        glusterd_volume_start_glusterfs (
+                                                     volinfo, brickinfo, 0);
+                                } else if (!ret) {
+                                        ret = pmap_registry_search (this,
+                                                      brickinfo->path,
+                                                      GF_PMAP_PORT_BRICKSERVER);
+                                        if (ret)
+                                                continue;
+                                       //might be a stale pid file 
+                                        ret = unlink (pidfile);
+                                        //goto out;
+                                        glusterd_volume_start_glusterfs (
+                                                        volinfo, brickinfo, 0);
+                                }
+                        }
+                        glusterd_check_generate_start_nfs (volinfo);
+                }
+        }
+        return ret;
+}
+
+int
+glusterd_get_brickinfo (xlator_t *this, const char *brickname, int port, 
+                        gf_boolean_t localhost, glusterd_brickinfo_t **brickinfo)
+{
+        glusterd_conf_t         *priv = NULL;
+        glusterd_volinfo_t      *volinfo = NULL;
+        glusterd_brickinfo_t    *tmpbrkinfo = NULL;
+        int                     ret = -1;
+
+        GF_ASSERT (brickname);
+        GF_ASSERT (this);
+
+        priv = this->private;
+        list_for_each_entry (volinfo, &priv->volumes, vol_list) {
+                list_for_each_entry (tmpbrkinfo, &volinfo->bricks,
+                                     brick_list) {
+                        if (localhost && glusterd_is_local_addr (tmpbrkinfo->hostname))
+                                continue;
+                        if (!strcmp(tmpbrkinfo->path, brickname) &&
+                            (tmpbrkinfo->port == port)) {
+                                *brickinfo = tmpbrkinfo;
+                                return 0;
+                        }
+                }
+        }
+        return ret;
+}
+
+void
+glusterd_set_brick_status (glusterd_brickinfo_t  *brickinfo,
+                            gf_brick_status_t status)
+{
+        GF_ASSERT (brickinfo);
+        brickinfo->status = status;
+}
+
+int
+glusterd_is_brick_started (glusterd_brickinfo_t  *brickinfo)
+{       
+        GF_ASSERT (brickinfo);
+        return (!(brickinfo->status == GF_BRICK_STARTED));
+}
+
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h
index 72715a0..bc20c2d 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.h
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.h
@@ -160,4 +160,17 @@  glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo,
                              dict_t  *dict, int32_t count);
 int
 glusterd_is_exisiting_brick (char *hostname, char *path);
+
+int
+glusterd_get_brickinfo (xlator_t *this, const char *brickname, 
+                        int port, gf_boolean_t localhost, 
+                        glusterd_brickinfo_t **brickinfo);
+
+void
+glusterd_set_brick_status (glusterd_brickinfo_t  *brickinfo,
+                            gf_brick_status_t status);
+
+int
+glusterd_is_brick_started (glusterd_brickinfo_t  *brickinfo);
+
 #endif
diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c
index da40c28..b64b001 100644
--- a/xlators/mgmt/glusterd/src/glusterd.c
+++ b/xlators/mgmt/glusterd/src/glusterd.c
@@ -44,7 +44,6 @@ 
 #include "glusterd-op-sm.h"
 #include "glusterd-store.h"
 
-
 static uuid_t glusterd_uuid;
 extern struct rpcsvc_program glusterd1_mop_prog;
 extern struct rpcsvc_program gluster_handshake_prog;
@@ -414,7 +413,7 @@  init (xlator_t *this)
         glusterd_op_sm_init ();
         glusterd_opinfo_init ();
 
-
+        glusterd_restart_bricks(conf, this);
         ret = 0;
 out:
         if (ret == -1) {
diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h
index b989076..abfb72c 100644
--- a/xlators/mgmt/glusterd/src/glusterd.h
+++ b/xlators/mgmt/glusterd/src/glusterd.h
@@ -75,6 +75,11 @@  typedef struct {
         glusterd_store_handle_t *handle;
 } glusterd_conf_t;
 
+typedef enum gf_brick_status {
+        GF_BRICK_STOPPED,
+        GF_BRICK_STARTED,
+} gf_brick_status_t;
+
 struct glusterd_brickinfo {
         char    hostname[1024];
         char    path[PATH_MAX];
@@ -83,6 +88,7 @@  struct glusterd_brickinfo {
         int     port;
         char   *logfile;
         glusterd_store_handle_t *shandle;
+        gf_brick_status_t status; 
 };
 
 typedef struct glusterd_brickinfo glusterd_brickinfo_t;
@@ -393,7 +399,11 @@  glusterd_fetchspec_notify (xlator_t *this);
 
 int32_t
 glusterd_sync_volume (rpcsvc_request_t *req, dict_t *ctx);
+
 int
 glusterd_add_volume_detail_to_dict (glusterd_volinfo_t *volinfo,
                                     dict_t  *volumes, int   count);
+
+int
+glusterd_restart_bricks(glusterd_conf_t *conf, xlator_t *this);
 #endif