[SCM] CTDB repository - branch master updated - ctdb-1.13-148-g05603e9

Ronnie Sahlberg sahlberg at samba.org
Mon Apr 30 19:05:55 MDT 2012


The branch, master has been updated
       via  05603e914f8c12618d7e06943c0f7df207f645b0 (commit)
      from  e9fad1ff82e6fb9e6bf615986c41c746fd415058 (commit)

http://gitweb.samba.org/?p=ctdb.git;a=shortlog;h=master


- Log -----------------------------------------------------------------
commit 05603e914f8c12618d7e06943c0f7df207f645b0
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Mon Apr 30 15:50:44 2012 +1000

    Add new control to reload the public ip address file on a node
    
    Also add a method to use the recovery master/daemon to reload the public ips on all nodes in the cluster.
    Reloading the public ips on all node sin the cluster is only suported if all nodes in the cluster are available and healthy.

-----------------------------------------------------------------------

Summary of changes:
 include/ctdb_private.h  |   18 ++++-
 include/ctdb_protocol.h |    6 +
 server/ctdb_control.c   |    4 +
 server/ctdb_daemon.c    |    6 +-
 server/ctdb_recoverd.c  |   89 +++++++++++++++++
 server/ctdb_takeover.c  |  246 +++++++++++++++++++++++++++++++++++++++++++++--
 tools/ctdb.c            |   91 +++++++++++++++++
 7 files changed, 450 insertions(+), 10 deletions(-)


Changeset truncated at 500 lines:

diff --git a/include/ctdb_private.h b/include/ctdb_private.h
index 657bb62..6111910 100644
--- a/include/ctdb_private.h
+++ b/include/ctdb_private.h
@@ -497,6 +497,10 @@ struct ctdb_context {
 
 	/* list of event script callback functions that are active */
 	struct event_script_callback *script_callbacks;
+
+	struct ctdb_reloadips_handle *reload_ips;
+
+	const char *public_addresses_file;
 };
 
 struct ctdb_db_context {
@@ -1135,7 +1139,7 @@ int ctdb_sys_send_tcp(const ctdb_sock_addr *dest,
 		      const ctdb_sock_addr *src,
 		      uint32_t seq, uint32_t ack, int rst);
 
-int ctdb_set_public_addresses(struct ctdb_context *ctdb, const char *alist);
+int ctdb_set_public_addresses(struct ctdb_context *ctdb);
 int ctdb_set_single_public_ip(struct ctdb_context *ctdb,
 			      const char *iface,
 			      const char *ip);
@@ -1470,4 +1474,16 @@ int32_t ctdb_control_get_db_statistics(struct ctdb_context *ctdb,
 
 int ctdb_set_db_sticky(struct ctdb_context *ctdb, struct ctdb_db_context *ctdb_db);
 
+/*
+  description for a message to reload all ips via recovery master/daemon
+ */
+struct reloadips_all_reply {
+	uint32_t pnn;
+	uint64_t srvid;
+};
+
+int32_t ctdb_control_reload_public_ips(struct ctdb_context *ctdb, struct ctdb_req_control *c, bool *async_reply);
+
+int ctdb_start_monitoring_interfaces(struct ctdb_context *ctdb);
+
 #endif
diff --git a/include/ctdb_protocol.h b/include/ctdb_protocol.h
index cb34fd5..5c787ff 100644
--- a/include/ctdb_protocol.h
+++ b/include/ctdb_protocol.h
@@ -135,6 +135,11 @@ struct ctdb_call_info {
 */
 #define CTDB_SRVID_REBALANCE_NODE 0xFB01000000000000LL
 
+/*
+   a message handler ID meaning to ask recovery master to reload all ips
+ */
+#define CTDB_SRVID_RELOAD_ALL_IPS 0xFB02000000000000LL
+
 /* A message id to ask the recovery daemon to temporarily disable the
    public ip checks
 */
@@ -384,6 +389,7 @@ enum ctdb_controls {CTDB_CONTROL_PROCESS_EXISTS          = 0,
 		    CTDB_CONTROL_TRAVERSE_START_EXT	 = 131,
 		    CTDB_CONTROL_GET_DB_STATISTICS	 = 132,
 		    CTDB_CONTROL_SET_DB_STICKY		 = 133,
+		    CTDB_CONTROL_RELOAD_PUBLIC_IPS	 = 134,
 };
 
 /*
diff --git a/server/ctdb_control.c b/server/ctdb_control.c
index 0921082..2a16a23 100644
--- a/server/ctdb_control.c
+++ b/server/ctdb_control.c
@@ -644,6 +644,10 @@ static int32_t ctdb_control_dispatch(struct ctdb_context *ctdb,
 		CHECK_CONTROL_DATA_SIZE(sizeof(uint32_t));
 		return ctdb_control_get_db_statistics(ctdb, *(uint32_t *)indata.dptr, outdata);
 
+	case CTDB_CONTROL_RELOAD_PUBLIC_IPS:
+		CHECK_CONTROL_DATA_SIZE(0);
+		return ctdb_control_reload_public_ips(ctdb, c, async_reply);
+
 	default:
 		DEBUG(DEBUG_CRIT,(__location__ " Unknown CTDB control opcode %u\n", opcode));
 		return -1;
diff --git a/server/ctdb_daemon.c b/server/ctdb_daemon.c
index 5002629..192dbff 100644
--- a/server/ctdb_daemon.c
+++ b/server/ctdb_daemon.c
@@ -1161,11 +1161,15 @@ int ctdb_start_daemon(struct ctdb_context *ctdb, bool do_fork, bool use_syslog,
 		ctdb_fatal(ctdb, "transport failed to initialise");
 	}
 	if (public_address_list) {
-		ret = ctdb_set_public_addresses(ctdb, public_address_list);
+		ctdb->public_addresses_file = public_address_list;
+		ret = ctdb_set_public_addresses(ctdb);
 		if (ret == -1) {
 			DEBUG(DEBUG_ALERT,("Unable to setup public address list\n"));
 			exit(1);
 		}
+		if (ctdb->do_checkpublicip) {
+			ctdb_start_monitoring_interfaces(ctdb);
+		}
 	}
 
 
diff --git a/server/ctdb_recoverd.c b/server/ctdb_recoverd.c
index 359fe79..d56fdb5 100644
--- a/server/ctdb_recoverd.c
+++ b/server/ctdb_recoverd.c
@@ -30,6 +30,11 @@
 #include "dlinklist.h"
 
 
+/* most recent reload all ips request we need to perform during the 
+   next monitoring loop
+*/
+struct reloadips_all_reply *reload_all_ips_request = NULL;
+
 /* list of "ctdb ipreallocate" processes to call back when we have
    finished the takeover run.
 */
@@ -2174,6 +2179,79 @@ static void disable_ip_check_handler(struct ctdb_context *ctdb, uint64_t srvid,
 
 
 /*
+  handler for reload all ips.
+*/
+static void ip_reloadall_handler(struct ctdb_context *ctdb, uint64_t srvid, 
+			     TDB_DATA data, void *private_data)
+{
+	struct ctdb_recoverd *rec = talloc_get_type(private_data, struct ctdb_recoverd);
+
+	if (data.dsize != sizeof(struct reloadips_all_reply)) {
+		DEBUG(DEBUG_ERR, (__location__ " Wrong size of return address.\n"));
+		return;
+	}
+
+	reload_all_ips_request = (struct reloadips_all_reply *)talloc_steal(rec, data.dptr);
+
+	DEBUG(DEBUG_NOTICE,("RELOAD_ALL_IPS message received from node:%d srvid:%d\n", reload_all_ips_request->pnn, (int)reload_all_ips_request->srvid));
+	return;
+}
+
+static void async_reloadips_callback(struct ctdb_context *ctdb, uint32_t node_pnn, int32_t res, TDB_DATA outdata, void *callback_data)
+{
+	uint32_t *status = callback_data;
+
+	if (res != 0) {
+		DEBUG(DEBUG_ERR,("Reload ips all failed on node %d\n", node_pnn));
+		*status = 1;
+	}
+}
+
+static int
+reload_all_ips(struct ctdb_context *ctdb, struct ctdb_recoverd *rec, struct ctdb_node_map *nodemap, struct reloadips_all_reply *rips)
+{
+	TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
+	uint32_t *nodes;
+	uint32_t status;
+	int i;
+
+	DEBUG(DEBUG_ERR,("RELOAD ALL IPS on all active nodes\n"));
+	for (i = 0; i< nodemap->num; i++) {
+		if (nodemap->nodes[i].flags != 0) {
+			DEBUG(DEBUG_ERR, ("Can not reload ips on all nodes. Node %d is not up and healthy\n", i));
+			talloc_free(tmp_ctx);
+			return -1;
+		}
+	}
+
+	/* send the flags update to all connected nodes */
+	nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
+	status = 0;
+	if (ctdb_client_async_control(ctdb, CTDB_CONTROL_RELOAD_PUBLIC_IPS,
+					nodes, 0,
+					CONTROL_TIMEOUT(),
+					false, tdb_null,
+					async_reloadips_callback, NULL,
+					&status) != 0) {
+		DEBUG(DEBUG_ERR, (__location__ " Failed to reloadips on all nodes.\n"));
+		talloc_free(tmp_ctx);
+		return -1;
+	}
+
+	if (status != 0) {
+		DEBUG(DEBUG_ERR, (__location__ " Failed to reloadips on all nodes.\n"));
+		talloc_free(tmp_ctx);
+		return -1;
+	}
+
+	ctdb_client_send_message(ctdb, rips->pnn, rips->srvid, tdb_null);
+
+	talloc_free(tmp_ctx);
+	return 0;
+}
+
+
+/*
   handler for ip reallocate, just add it to the list of callers and 
   handle this later in the monitor_cluster loop so we do not recurse
   with other callers to takeover_run()
@@ -3403,6 +3481,14 @@ static void main_loop(struct ctdb_context *ctdb, struct ctdb_recoverd *rec,
 		}
 	}
 
+
+	/* is there a pending reload all ips ? */
+	if (reload_all_ips_request != NULL) {
+		reload_all_ips(ctdb, rec, nodemap, reload_all_ips_request);
+		talloc_free(reload_all_ips_request);
+		reload_all_ips_request = NULL;
+	}
+
 	/* if there are takeovers requested, perform it and notify the waiters */
 	if (rec->reallocate_callers) {
 		process_ipreallocate_requests(ctdb, rec);
@@ -3668,6 +3754,9 @@ static void monitor_cluster(struct ctdb_context *ctdb)
 	/* register a message port for performing a takeover run */
 	ctdb_client_set_message_handler(ctdb, CTDB_SRVID_TAKEOVER_RUN, ip_reallocate_handler, rec);
 
+	/* register a message port for performing a reload all ips */
+	ctdb_client_set_message_handler(ctdb, CTDB_SRVID_RELOAD_ALL_IPS, ip_reloadall_handler, rec);
+
 	/* register a message port for disabling the ip check for a short while */
 	ctdb_client_set_message_handler(ctdb, CTDB_SRVID_DISABLE_IP_CHECK, disable_ip_check_handler, rec);
 
diff --git a/server/ctdb_takeover.c b/server/ctdb_takeover.c
index c0acdcc..1670f01 100644
--- a/server/ctdb_takeover.c
+++ b/server/ctdb_takeover.c
@@ -994,7 +994,7 @@ static void ctdb_check_interfaces_event(struct event_context *ev, struct timed_e
 }
 
 
-static int ctdb_start_monitoring_interfaces(struct ctdb_context *ctdb)
+int ctdb_start_monitoring_interfaces(struct ctdb_context *ctdb)
 {
 	if (ctdb->check_public_ifaces_ctx != NULL) {
 		talloc_free(ctdb->check_public_ifaces_ctx);
@@ -1017,15 +1017,15 @@ static int ctdb_start_monitoring_interfaces(struct ctdb_context *ctdb)
 /*
   setup the public address lists from a file
 */
-int ctdb_set_public_addresses(struct ctdb_context *ctdb, const char *alist)
+int ctdb_set_public_addresses(struct ctdb_context *ctdb)
 {
 	char **lines;
 	int nlines;
 	int i;
 
-	lines = file_lines_load(alist, &nlines, ctdb);
+	lines = file_lines_load(ctdb->public_addresses_file, &nlines, ctdb);
 	if (lines == NULL) {
-		ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", alist);
+		ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", ctdb->public_addresses_file);
 		return -1;
 	}
 	while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
@@ -1077,10 +1077,6 @@ int ctdb_set_public_addresses(struct ctdb_context *ctdb, const char *alist)
 	}
 
 
-	if (ctdb->do_checkpublicip) {
-		ctdb_start_monitoring_interfaces(ctdb);
-	}
-
 	talloc_free(lines);
 	return 0;
 }
@@ -3656,3 +3652,237 @@ int update_ip_assignment_tree(struct ctdb_context *ctdb, struct ctdb_public_ip *
 
 	return 0;
 }
+
+
+
+
+
+
+struct ctdb_reloadips_handle {
+	struct ctdb_context *ctdb;
+	struct ctdb_req_control *c;
+	int status;
+	int fd[2];
+	pid_t child;
+};
+
+static int ctdb_reloadips_destructor(struct ctdb_reloadips_handle *h)
+{
+	h->ctdb->reload_ips = NULL;
+	ctdb_request_control_reply(h->ctdb, h->c, NULL, h->status, NULL);
+
+	if (h->fd[0] != -1) {
+		close(h->fd[0]);
+		h->fd[0] = -1;
+	}
+	if (h->fd[1] != -1) {
+		close(h->fd[1]);
+		h->fd[1] = -1;
+	}
+
+	kill(h->child, SIGKILL);
+
+	return 0;
+}
+
+static void ctdb_reloadips_timeout_event(struct event_context *ev,
+				struct timed_event *te,
+				struct timeval t, void *private_data)
+{
+	struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
+
+	talloc_free(h);
+}	
+
+static void ctdb_reloadips_child_handler(struct event_context *ev, struct fd_event *fde, 
+			     uint16_t flags, void *private_data)
+{
+	struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
+	char res;
+	int ret;
+
+	ret = read(h->fd[0], &res, 1);
+	if (ret < 1 || res != 0) {
+		DEBUG(DEBUG_ERR, (__location__ " Realoadips child process returned error\n"));
+		res = 1;
+	}
+
+	h->status = res;
+	talloc_free(h);
+}
+
+
+static int ctdb_reloadips_child(struct ctdb_context *ctdb)
+{
+	TALLOC_CTX *mem_ctx = talloc_new(NULL);
+	struct ctdb_all_public_ips *ips;
+	struct ctdb_vnn *vnn;
+	int i, ret;
+
+	/* read the ip allocation from the local node */
+	ret = ctdb_ctrl_get_public_ips(ctdb, TAKEOVER_TIMEOUT(), CTDB_CURRENT_NODE, mem_ctx, &ips);
+	if (ret != 0) {
+		DEBUG(DEBUG_ERR, ("Unable to get public ips from local node\n"));
+		talloc_free(mem_ctx);
+		return -1;
+	}
+
+	/* re-read the public ips file */
+	ctdb->vnn = NULL;
+	if (ctdb_set_public_addresses(ctdb) != 0) {
+		DEBUG(DEBUG_ERR,("Failed to re-read public addresses file\n"));
+		talloc_free(mem_ctx);
+		return -1;
+	}		
+
+
+	/* check the previous list of ips and scan for ips that have been
+	   dropped.
+	 */
+	for (i = 0; i < ips->num; i++) {
+		for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
+			if (ctdb_same_ip(&vnn->public_address, &ips->ips[i].addr)) {
+				break;
+			}
+		}
+
+		/* we need to delete this ip, no longer available on this node */
+		if (vnn == NULL) {
+			struct ctdb_control_ip_iface pub;
+
+			DEBUG(DEBUG_NOTICE,("RELOADIPS: IP%s is no longer available on this node. Deleting it.\n", ctdb_addr_to_str(&ips->ips[i].addr)));
+			pub.addr  = ips->ips[i].addr;
+			pub.mask  = 0;
+			pub.len   = 0;
+
+			ret = ctdb_ctrl_del_public_ip(ctdb, TAKEOVER_TIMEOUT(), CTDB_CURRENT_NODE, &pub);
+			if (ret != 0) {
+				DEBUG(DEBUG_ERR, ("RELOADIPS: Unable to del public ip:%s from local node\n", ctdb_addr_to_str(&ips->ips[i].addr)));
+				return -1;
+			}
+		}
+	}
+
+
+	/* loop over all new ones and check the ones we need to add */
+	for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
+		for (i = 0; i < ips->num; i++) {
+			if (ctdb_same_ip(&vnn->public_address, &ips->ips[i].addr)) {
+				break;
+			}
+		}
+		if (i == ips->num) {
+			struct ctdb_control_ip_iface pub;
+			char *ifaces = NULL;
+			int iface = 0;
+
+			DEBUG(DEBUG_NOTICE,("RELOADIPS: New ip:%s found, adding it.\n", ctdb_addr_to_str(&vnn->public_address)));
+
+			pub.addr  = vnn->public_address;
+			pub.mask  = vnn->public_netmask_bits;
+
+
+			ifaces = vnn->ifaces[0];
+			iface = 1;
+			while (vnn->ifaces[iface] != NULL) {
+				ifaces = talloc_asprintf(vnn, "%s,%s", ifaces, vnn->ifaces[iface]);
+				iface++;
+			}
+			pub.len   = strlen(ifaces)+1;
+			memcpy(&pub.iface[0], ifaces, strlen(ifaces)+1);
+
+			ret = ctdb_ctrl_add_public_ip(ctdb, TAKEOVER_TIMEOUT(), CTDB_CURRENT_NODE, &pub);
+			if (ret != 0) {
+				DEBUG(DEBUG_ERR, ("RELOADIPS: Unable to add public ip:%s to local node\n", ctdb_addr_to_str(&vnn->public_address)));
+				return -1;
+			}
+		}
+	}
+
+	return 0;
+}
+
+/* This control is sent to force the node to re-read the public addresses file
+   and drop any addresses we should nnot longer host, and add new addresses
+   that we are now able to host
+*/
+int32_t ctdb_control_reload_public_ips(struct ctdb_context *ctdb, struct ctdb_req_control *c, bool *async_reply)
+{
+	struct ctdb_reloadips_handle *h;
+	pid_t parent = getpid();
+
+	if (ctdb->reload_ips != NULL) {
+		talloc_free(ctdb->reload_ips);
+	}
+
+	h = talloc(ctdb, struct ctdb_reloadips_handle);
+	CTDB_NO_MEMORY(ctdb, h);
+	h->ctdb     = ctdb;
+	h->c        = talloc_steal(h, c);
+	h->status   = -1;
+	h->fd[0]    = -1;
+	h->fd[1]    = -1;
+
+	if (pipe(h->fd) == -1) {
+		DEBUG(DEBUG_ERR,("Failed to create pipe for ctdb_freeze_lock\n"));
+		talloc_free(h);
+		return -1;
+	}
+
+	h->child = ctdb_fork(ctdb);
+	if (h->child == (pid_t)-1) {
+		DEBUG(DEBUG_ERR, ("Failed to fork a child for reloadips\n"));
+		close(h->fd[0]);
+		close(h->fd[1]);
+		talloc_free(h);
+		return -1;
+	}
+
+	/* child process */
+	if (h->child == 0) {
+		signed char res = 0;
+		close(h->fd[0]);
+		h->fd[0] = -1;
+		debug_extra = talloc_asprintf(NULL, "reloadips:");
+
+		if (switch_from_server_to_client(ctdb, "reloadips-child") != 0) {
+			DEBUG(DEBUG_CRIT,("ERROR: Failed to switch reloadips child into client mode\n"));
+			res = -1;
+		} else {
+			res = ctdb_reloadips_child(ctdb);
+			if (res != 0) {
+				DEBUG(DEBUG_ERR,("Failed to reload ips on local node\n"));
+			}
+		}
+
+		write(h->fd[1], &res, 1);
+		/* make sure we die when our parent dies */
+		while (kill(parent, 0) == 0 || errno != ESRCH) {
+			sleep(5);
+		}
+		_exit(0);
+	}
+
+	ctdb->reload_ips = h;
+
+
+	close(h->fd[1]);
+	h->fd[1] = -1;
+	set_close_on_exec(h->fd[0]);
+
+	event_add_fd(ctdb->ev, h, h->fd[0],
+			EVENT_FD_READ, ctdb_reloadips_child_handler,
+			(void *)h);
+
+	talloc_set_destructor(h, ctdb_reloadips_destructor);
+
+	event_add_timed(ctdb->ev, h,
+			timeval_current_ofs(10, 0), 
+			ctdb_reloadips_timeout_event, h);
+
+	/* we reply later */
+	*async_reply = True;
+


-- 
CTDB repository


More information about the samba-cvs mailing list