[SCM] CTDB repository - branch master updated - ctdb-1.0.92-7-g789703e

Ronnie Sahlberg sahlberg at samba.org
Mon Oct 5 19:57:02 MDT 2009


The branch, master has been updated
       via  789703ea684717781c176fd3a2a24d96abde220b (commit)
       via  9c63858c0b22c81eaccb9865a414af0bbb2833d4 (commit)
       via  3befe5526e147d49451fddc930aaafc3dbe2e9c1 (commit)
       via  9d95dfbd12898975ba0d8560d95a974210d3de7c (commit)
       via  be52954d921e7d443304cf49fbd488c619a9c4ec (commit)
       via  804e5905be51f43c8a338bfbe216fd8d5718850f (commit)
       via  e8df6fc301fb7faf72c72eb39ea68d44d1526b00 (commit)
      from  9ffb0d08d34cbafed0e49350a3a72b15d92c8ea7 (commit)

http://gitweb.samba.org/?p=sahlberg/ctdb.git;a=shortlog;h=master


- Log -----------------------------------------------------------------
commit 789703ea684717781c176fd3a2a24d96abde220b
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Tue Oct 6 12:25:44 2009 +1100

    always send the release/take ip controls to make sure all nodes are updated

commit 9c63858c0b22c81eaccb9865a414af0bbb2833d4
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Tue Oct 6 12:11:32 2009 +1100

    add a new message to ask the recovery daemon to temporarily disable checking ip address consistency.
    
    This is useful when we are moving addresses using moveip in the cluster since otherwise if we collide with the recovery daemons own check we could cause a recovery

commit 3befe5526e147d49451fddc930aaafc3dbe2e9c1
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Tue Oct 6 11:41:18 2009 +1100

    update addip/moveip/delip to make it less likely to trigger an accidental recovery

commit 9d95dfbd12898975ba0d8560d95a974210d3de7c
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Tue Oct 6 11:40:38 2009 +1100

    change some loglevels and also pront the pnn of the ip for takeip/releaseip logging

commit be52954d921e7d443304cf49fbd488c619a9c4ec
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Tue Oct 6 10:52:31 2009 +1100

    add a new function to collect a list of all active nodes EXCEPT a certain node

commit 804e5905be51f43c8a338bfbe216fd8d5718850f
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Tue Oct 6 09:35:15 2009 +1100

    allocate takeoverip state as a child of vnn and also make the takeocerip context a child of vnn

commit e8df6fc301fb7faf72c72eb39ea68d44d1526b00
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Tue Oct 6 08:19:25 2009 +1100

    When adding a public ip to a node, make sure to push the assignment of ip addresses out to all nodes so all nodes become aware who currently holds the ip.

-----------------------------------------------------------------------

Summary of changes:
 client/ctdb_client.c   |   34 +++++++++++++++
 include/ctdb.h         |    9 ++++-
 server/ctdb_recoverd.c |   52 +++++++++++++++++++++-
 server/ctdb_takeover.c |   11 +++--
 tools/ctdb.c           |  110 ++++++++++++++++++++++++-----------------------
 5 files changed, 153 insertions(+), 63 deletions(-)


Changeset truncated at 500 lines:

diff --git a/client/ctdb_client.c b/client/ctdb_client.c
index 9621435..b8cc301 100644
--- a/client/ctdb_client.c
+++ b/client/ctdb_client.c
@@ -2975,6 +2975,40 @@ uint32_t *list_of_active_nodes(struct ctdb_context *ctdb,
 	return nodes;
 }
 
+uint32_t *list_of_active_nodes_except_pnn(struct ctdb_context *ctdb,
+				struct ctdb_node_map *node_map,
+				TALLOC_CTX *mem_ctx,
+				uint32_t pnn)
+{
+	int i, j, num_nodes;
+	uint32_t *nodes;
+
+	for (i=num_nodes=0;i<node_map->num;i++) {
+		if (node_map->nodes[i].flags & NODE_FLAGS_INACTIVE) {
+			continue;
+		}
+		if (node_map->nodes[i].pnn == pnn) {
+			continue;
+		}
+		num_nodes++;
+	} 
+
+	nodes = talloc_array(mem_ctx, uint32_t, num_nodes);
+	CTDB_NO_MEMORY_FATAL(ctdb, nodes);
+
+	for (i=j=0;i<node_map->num;i++) {
+		if (node_map->nodes[i].flags & NODE_FLAGS_INACTIVE) {
+			continue;
+		}
+		if (node_map->nodes[i].pnn == pnn) {
+			continue;
+		}
+		nodes[j++] = node_map->nodes[i].pnn;
+	} 
+
+	return nodes;
+}
+
 uint32_t *list_of_connected_nodes(struct ctdb_context *ctdb,
 				struct ctdb_node_map *node_map,
 				TALLOC_CTX *mem_ctx,
diff --git a/include/ctdb.h b/include/ctdb.h
index a5d9d1f..23d73fb 100644
--- a/include/ctdb.h
+++ b/include/ctdb.h
@@ -101,7 +101,10 @@ struct ctdb_call_info {
  */
 #define CTDB_SRVID_TAKEOVER_RUN 0xFB00000000000000LL
 
-
+/* A message id to ask the recovery daemon to temporarily disable the
+   public ip checks
+*/
+#define CTDB_SRVID_DISABLE_IP_CHECK  0xFC00000000000000LL
 
 /* used on the domain socket, send a pdu to the local daemon */
 #define CTDB_CURRENT_NODE     0xF0000001
@@ -581,6 +584,10 @@ uint32_t *list_of_vnnmap_nodes(struct ctdb_context *ctdb,
 				struct ctdb_vnn_map *vnn_map,
 				TALLOC_CTX *mem_ctx,
 				bool include_self);
+uint32_t *list_of_active_nodes_except_pnn(struct ctdb_context *ctdb,
+				struct ctdb_node_map *node_map,
+				TALLOC_CTX *mem_ctx,
+				uint32_t pnn);
 
 int ctdb_read_pnn_lock(int fd, int32_t pnn);
 
diff --git a/server/ctdb_recoverd.c b/server/ctdb_recoverd.c
index 7e81e20..2be53f8 100644
--- a/server/ctdb_recoverd.c
+++ b/server/ctdb_recoverd.c
@@ -63,6 +63,7 @@ struct ctdb_recoverd {
 	struct vacuum_info *vacuum_info;
 	TALLOC_CTX *ip_reallocate_ctx;
 	struct ip_reallocate_list *reallocate_callers;
+	TALLOC_CTX *ip_check_disable_ctx;
 };
 
 #define CONTROL_TIMEOUT() timeval_current_ofs(ctdb->tunable.recover_timeout, 0)
@@ -1685,6 +1686,46 @@ static void reload_nodes_handler(struct ctdb_context *ctdb, uint64_t srvid,
 	reload_nodes_file(rec->ctdb);
 }
 
+
+static void reenable_ip_check(struct event_context *ev, struct timed_event *te, 
+			      struct timeval yt, void *p)
+{
+	struct ctdb_recoverd *rec = talloc_get_type(p, struct ctdb_recoverd);
+
+	talloc_free(rec->ip_check_disable_ctx);
+	rec->ip_check_disable_ctx = NULL;
+}
+
+static void disable_ip_check_handler(struct ctdb_context *ctdb, uint64_t srvid, 
+			     TDB_DATA data, void *private_data)
+{
+	struct ctdb_recoverd *rec = talloc_get_type(private_data, struct ctdb_recoverd);
+	uint32_t timeout;
+
+	if (rec->ip_check_disable_ctx != NULL) {
+		talloc_free(rec->ip_check_disable_ctx);
+		rec->ip_check_disable_ctx = NULL;
+	}
+
+	if (data.dsize != sizeof(uint32_t)) {
+		DEBUG(DEBUG_ERR,(__location__ " Wrong size for data :%lu expexting %lu\n", data.dsize, sizeof(uint32_t)));
+		return;
+	}
+	if (data.dptr == NULL) {
+		DEBUG(DEBUG_ERR,(__location__ " No data recaived\n"));
+		return;
+	}
+
+	timeout = *((uint32_t *)data.dptr);
+	DEBUG(DEBUG_NOTICE,("Disabling ip check for %u seconds\n", timeout));
+
+	rec->ip_check_disable_ctx = talloc_new(rec);
+	CTDB_NO_MEMORY_VOID(ctdb, rec->ip_check_disable_ctx);
+
+	event_add_timed(ctdb->ev, rec->ip_check_disable_ctx, timeval_current_ofs(timeout, 0), reenable_ip_check, rec);
+}
+
+
 /*
   handler for ip reallocate, just add it to the list of callers and 
   handle this later in the monitor_cluster loop so we do not recurse
@@ -2531,6 +2572,9 @@ static void monitor_cluster(struct ctdb_context *ctdb)
 	/* register a message port for performing a takeover run */
 	ctdb_set_message_handler(ctdb, CTDB_SRVID_TAKEOVER_RUN, ip_reallocate_handler, rec);
 
+	/* register a message port for disabling the ip check for a short while */
+	ctdb_set_message_handler(ctdb, CTDB_SRVID_DISABLE_IP_CHECK, disable_ip_check_handler, rec);
+
 again:
 	if (mem_ctx) {
 		talloc_free(mem_ctx);
@@ -2762,9 +2806,11 @@ again:
 	 * have addresses we shouldnt have.
 	 */ 
 	if (ctdb->do_checkpublicip) {
-		if (verify_ip_allocation(ctdb, pnn) != 0) {
-			DEBUG(DEBUG_ERR, (__location__ " Public IPs were inconsistent.\n"));
-			goto again;
+		if (rec->ip_check_disable_ctx == NULL) {
+			if (verify_ip_allocation(ctdb, pnn) != 0) {
+				DEBUG(DEBUG_ERR, (__location__ " Public IPs were inconsistent.\n"));
+				goto again;
+			}
 		}
 	}
 
diff --git a/server/ctdb_takeover.c b/server/ctdb_takeover.c
index f2f57eb..6eb2acf 100644
--- a/server/ctdb_takeover.c
+++ b/server/ctdb_takeover.c
@@ -137,7 +137,7 @@ static void takeover_ip_callback(struct ctdb_context *ctdb, int status,
 	}
 
 	if (!state->vnn->takeover_ctx) {
-		state->vnn->takeover_ctx = talloc_new(ctdb);
+		state->vnn->takeover_ctx = talloc_new(state->vnn);
 		if (!state->vnn->takeover_ctx) {
 			goto failed;
 		}
@@ -219,7 +219,7 @@ int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
 		return 0;
 	}
 
-	state = talloc(ctdb, struct takeover_callback_state);
+	state = talloc(vnn, struct takeover_callback_state);
 	CTDB_NO_MEMORY(ctdb, state);
 
 	state->c = talloc_steal(ctdb, c);
@@ -368,17 +368,18 @@ int32_t ctdb_control_release_ip(struct ctdb_context *ctdb,
 	vnn->takeover_ctx = NULL;
 
 	if (!ctdb_sys_have_ip(&pip->addr)) {
-		DEBUG(DEBUG_INFO,("Redundant release of IP %s/%u on interface %s (ip not held)\n", 
+		DEBUG(DEBUG_NOTICE,("Redundant release of IP %s/%u on interface %s (ip not held)\n", 
 			ctdb_addr_to_str(&pip->addr),
 			vnn->public_netmask_bits, 
 			vnn->iface));
 		return 0;
 	}
 
-	DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s\n", 
+	DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s  node:%u\n", 
 		ctdb_addr_to_str(&pip->addr),
 		vnn->public_netmask_bits, 
-		vnn->iface));
+		vnn->iface,
+		pip->pnn));
 
 	state = talloc(ctdb, struct takeover_callback_state);
 	CTDB_NO_MEMORY(ctdb, state);
diff --git a/tools/ctdb.c b/tools/ctdb.c
index 223be3f..0e8a1dc 100644
--- a/tools/ctdb.c
+++ b/tools/ctdb.c
@@ -855,40 +855,28 @@ static int control_get_tickles(struct ctdb_context *ctdb, int argc, const char *
 }
 
 
-/*
-  move/failover an ip address to a specific node
- */
-static int control_moveip(struct ctdb_context *ctdb, int argc, const char **argv)
+
+static int move_ip(struct ctdb_context *ctdb, ctdb_sock_addr *addr, uint32_t pnn)
 {
-	uint32_t pnn;
-	ctdb_sock_addr addr;
 	struct ctdb_all_public_ips *ips;
 	struct ctdb_public_ip ip;
-	uint32_t *nodes;
 	int i, ret;
+	uint32_t *nodes;
+	uint32_t disable_time;
 	TDB_DATA data;
 	struct ctdb_node_map *nodemap=NULL;
 	TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
 
-	if (argc < 2) {
-		usage();
-		talloc_free(tmp_ctx);
-		return -1;
-	}
-
-	if (parse_ip(argv[0], NULL, 0, &addr) == 0) {
-		DEBUG(DEBUG_ERR,("Wrongly formed ip address '%s'\n", argv[0]));
-		talloc_free(tmp_ctx);
+	disable_time = 30;
+	data.dptr  = (uint8_t*)&disable_time;
+	data.dsize = sizeof(disable_time);
+	ret = ctdb_send_message(ctdb, CTDB_BROADCAST_CONNECTED, CTDB_SRVID_DISABLE_IP_CHECK, data);
+	if (ret != 0) {
+		DEBUG(DEBUG_ERR,("Failed to send message to disable ipcheck\n"));
 		return -1;
 	}
 
 
-	if (sscanf(argv[1], "%u", &pnn) != 1) {
-		DEBUG(DEBUG_ERR, ("Badly formed pnn\n"));
-		talloc_free(tmp_ctx);
-		return -1;
-	}
-
 
 	/* read the public ip list from the node */
 	ret = ctdb_ctrl_get_public_ips(ctdb, TIMELIMIT(), pnn, ctdb, &ips);
@@ -899,25 +887,19 @@ static int control_moveip(struct ctdb_context *ctdb, int argc, const char **argv
 	}
 
 	for (i=0;i<ips->num;i++) {
-		if (ctdb_same_ip(&addr, &ips->ips[i].addr)) {
+		if (ctdb_same_ip(addr, &ips->ips[i].addr)) {
 			break;
 		}
 	}
 	if (i==ips->num) {
 		DEBUG(DEBUG_ERR, ("Node %u can not host ip address '%s'\n",
-			pnn, ctdb_addr_to_str(&addr)));
-		talloc_free(tmp_ctx);
-		return -1;
-	}
-	if (ips->ips[i].pnn == pnn) {
-		DEBUG(DEBUG_ERR, ("Host %u is already hosting '%s'\n",
-			pnn, ctdb_addr_to_str(&ips->ips[i].addr)));
+			pnn, ctdb_addr_to_str(addr)));
 		talloc_free(tmp_ctx);
 		return -1;
 	}
 
 	ip.pnn  = pnn;
-	ip.addr = addr;
+	ip.addr = *addr;
 
 	data.dptr  = (uint8_t *)&ip;
 	data.dsize = sizeof(ip);
@@ -929,7 +911,7 @@ static int control_moveip(struct ctdb_context *ctdb, int argc, const char **argv
 		return ret;
 	}
 
-       	nodes = list_of_active_nodes(ctdb, nodemap, tmp_ctx, true);
+       	nodes = list_of_active_nodes_except_pnn(ctdb, nodemap, tmp_ctx, pnn);
 	ret = ctdb_client_async_control(ctdb, CTDB_CONTROL_RELEASE_IP,
 					nodes, TIMELIMIT(),
 					false, data,
@@ -952,6 +934,38 @@ static int control_moveip(struct ctdb_context *ctdb, int argc, const char **argv
 	return 0;
 }
 
+/*
+  move/failover an ip address to a specific node
+ */
+static int control_moveip(struct ctdb_context *ctdb, int argc, const char **argv)
+{
+	uint32_t pnn;
+	ctdb_sock_addr addr;
+
+	if (argc < 2) {
+		usage();
+		return -1;
+	}
+
+	if (parse_ip(argv[0], NULL, 0, &addr) == 0) {
+		DEBUG(DEBUG_ERR,("Wrongly formed ip address '%s'\n", argv[0]));
+		return -1;
+	}
+
+
+	if (sscanf(argv[1], "%u", &pnn) != 1) {
+		DEBUG(DEBUG_ERR, ("Badly formed pnn\n"));
+		return -1;
+	}
+
+	if (move_ip(ctdb, &addr, pnn) != 0) {
+		DEBUG(DEBUG_ERR,("Failed to move ip to node %d\n", pnn));
+		return -1;
+	}
+
+	return 0;
+}
+
 void getips_store_callback(void *param, void *data)
 {
 	struct ctdb_public_ip *node_ip = (struct ctdb_public_ip *)data;
@@ -1116,12 +1130,14 @@ static int control_addip(struct ctdb_context *ctdb, int argc, const char **argv)
 {
 	int i, ret;
 	int len;
+	uint32_t pnn;
 	unsigned mask;
 	ctdb_sock_addr addr;
 	struct ctdb_control_ip_iface *pub;
 	TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
 	struct ctdb_all_public_ips *ips;
 
+
 	if (argc != 2) {
 		talloc_free(tmp_ctx);
 		usage();
@@ -1166,23 +1182,15 @@ static int control_addip(struct ctdb_context *ctdb, int argc, const char **argv)
 		return ret;
 	}
 
-	/* no one has this ip so we claim it */
 	if (i == ips->num) {
-		struct ctdb_public_ip ip;
-
-		ip.pnn  = options.pnn;
-		ip.addr = addr;
-
-		ret = ctdb_ctrl_takeover_ip(ctdb, TIMELIMIT(), options.pnn, &ip);
-		if (ret != 0) {
-			DEBUG(DEBUG_ERR,("Failed to take over IP on node %d\n", options.pnn));
-			return -1;
-		}
+		/* no one has this ip so we claim it */
+		pnn  = options.pnn;
+	} else {
+		pnn  = ips->ips[i].pnn;
 	}
 
-
-	if (ret != 0) {
-		DEBUG(DEBUG_ERR, ("Failed to send 'change ip' to all nodes\n"));
+	if (move_ip(ctdb, &addr, pnn) != 0) {
+		DEBUG(DEBUG_ERR,("Failed to move ip to node %d\n", pnn));
 		return -1;
 	}
 
@@ -1312,14 +1320,8 @@ static int control_delip(struct ctdb_context *ctdb, int argc, const char **argv)
 	if (ips->ips[i].pnn == options.pnn) {
 		ret = find_other_host_for_public_ip(ctdb, &addr);
 		if (ret != -1) {
-			struct ctdb_public_ip ip;
-
-			ip.pnn  = ret;
-			ip.addr = addr;
-
-			ret = ctdb_ctrl_takeover_ip(ctdb, TIMELIMIT(), ret, &ip);
-			if (ret != 0) {
-				DEBUG(DEBUG_ERR,("Failed to take over IP on node %d\n", options.pnn));
+			if (move_ip(ctdb, &addr, ret) != 0) {
+				DEBUG(DEBUG_ERR,("Failed to move ip to node %d\n", ret));
 				return -1;
 			}
 		}


-- 
CTDB repository


More information about the samba-cvs mailing list