[SCM] CTDB repository - branch master updated - ctdb-1.0.65-1-ga9a1156

Ronnie Sahlberg sahlberg at samba.org
Thu Nov 20 01:44:09 GMT 2008


The branch, master has been updated
       via  a9a1156ea4e10483a4bf4265b8e9203f0af033aa (commit)
      from  1f25958dc739677a487fa496fbeffcda7a0f2204 (commit)

http://gitweb.samba.org/?p=sahlberg/ctdb.git;a=shortlog;h=master


- Log -----------------------------------------------------------------
commit a9a1156ea4e10483a4bf4265b8e9203f0af033aa
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Wed Nov 19 14:43:46 2008 +1100

    reqrite the handling of flag updates across the cluster to eliminate a
    race between the ctdb tool and the recovery daemon both at once
    trying to push flag changes across the cluster.

-----------------------------------------------------------------------

Summary of changes:
 client/ctdb_client.c   |   92 ++++++++++++++++++++++++++++++++++++-----
 include/ctdb.h         |   15 ++++++-
 include/ctdb_private.h |    8 ----
 server/ctdb_control.c  |    2 +-
 server/ctdb_daemon.c   |   34 ---------------
 server/ctdb_monitor.c  |   46 ++++++++++-----------
 server/ctdb_recoverd.c |  107 ++++++++++++++++++++---------------------------
 7 files changed, 163 insertions(+), 141 deletions(-)


Changeset truncated at 500 lines:

diff --git a/client/ctdb_client.c b/client/ctdb_client.c
index fcd10b2..16fc03b 100644
--- a/client/ctdb_client.c
+++ b/client/ctdb_client.c
@@ -2353,23 +2353,59 @@ int ctdb_ctrl_modflags(struct ctdb_context *ctdb, struct timeval timeout, uint32
 {
 	int ret;
 	TDB_DATA data;
-	struct ctdb_node_modflags m;
-	int32_t res;
+	struct ctdb_node_map *nodemap=NULL;
+	struct ctdb_node_flag_change c;
+	TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
+	uint32_t recmaster;
+	uint32_t *nodes;
 
-	m.set = set;
-	m.clear = clear;
 
-	data.dsize = sizeof(m);
-	data.dptr = (unsigned char *)&m;
+	/* find the recovery master */
+	ret = ctdb_ctrl_getrecmaster(ctdb, tmp_ctx, timeout, CTDB_CURRENT_NODE, &recmaster);
+	if (ret != 0) {
+		DEBUG(DEBUG_ERR, (__location__ " Unable to get recmaster from local node\n"));
+		talloc_free(tmp_ctx);
+		return ret;
+	}
 
-	ret = ctdb_control(ctdb, destnode, 0, 
-			   CTDB_CONTROL_MODIFY_FLAGS, 0, data, 
-			   NULL, NULL, &res, &timeout, NULL);
-	if (ret != 0 || res != 0) {
-		DEBUG(DEBUG_ERR,(__location__ " ctdb_control for modflags failed\n"));
+
+	/* read the node flags from the recmaster */
+	ret = ctdb_ctrl_getnodemap(ctdb, timeout, recmaster, tmp_ctx, &nodemap);
+	if (ret != 0) {
+		DEBUG(DEBUG_ERR, (__location__ " Unable to get nodemap from node %u\n", destnode));
+		talloc_free(tmp_ctx);
+		return -1;
+	}
+	if (destnode >= nodemap->num) {
+		DEBUG(DEBUG_ERR,(__location__ " Nodemap from recmaster does not contain node %d\n", destnode));
+		talloc_free(tmp_ctx);
+		return -1;
+	}
+
+	c.pnn       = destnode;
+	c.old_flags = nodemap->nodes[destnode].flags;
+	c.new_flags = c.old_flags;
+	c.new_flags |= set;
+	c.new_flags &= ~clear;
+
+	data.dsize = sizeof(c);
+	data.dptr = (unsigned char *)&c;
+
+	/* send the flags update to all connected nodes */
+	nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
+
+	if (ctdb_client_async_control(ctdb, CTDB_CONTROL_MODIFY_FLAGS,
+					nodes,
+					timeout, false, data,
+					NULL, NULL,
+					NULL) != 0) {
+		DEBUG(DEBUG_ERR, (__location__ " ctdb_control to disable node failed\n"));
+
+		talloc_free(tmp_ctx);
 		return -1;
 	}
 
+	talloc_free(tmp_ctx);
 	return 0;
 }
 
@@ -2959,6 +2995,40 @@ uint32_t *list_of_active_nodes(struct ctdb_context *ctdb,
 	return nodes;
 }
 
+uint32_t *list_of_connected_nodes(struct ctdb_context *ctdb,
+				struct ctdb_node_map *node_map,
+				TALLOC_CTX *mem_ctx,
+				bool include_self)
+{
+	int i, j, num_nodes;
+	uint32_t *nodes;
+
+	for (i=num_nodes=0;i<node_map->num;i++) {
+		if (node_map->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
+			continue;
+		}
+		if (node_map->nodes[i].pnn == ctdb->pnn && !include_self) {
+			continue;
+		}
+		num_nodes++;
+	} 
+
+	nodes = talloc_array(mem_ctx, uint32_t, num_nodes);
+	CTDB_NO_MEMORY_FATAL(ctdb, nodes);
+
+	for (i=j=0;i<node_map->num;i++) {
+		if (node_map->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
+			continue;
+		}
+		if (node_map->nodes[i].pnn == ctdb->pnn && !include_self) {
+			continue;
+		}
+		nodes[j++] = node_map->nodes[i].pnn;
+	} 
+
+	return nodes;
+}
+
 /* 
   this is used to test if a pnn lock exists and if it exists will return
   the number of connections that pnn has reported or -1 if that recovery
diff --git a/include/ctdb.h b/include/ctdb.h
index fd5af20..bf6c4a4 100644
--- a/include/ctdb.h
+++ b/include/ctdb.h
@@ -71,9 +71,9 @@ struct ctdb_call_info {
 #define CTDB_SRVID_RELEASE_IP 0xF300000000000000LL
 
 /* 
-   a message ID meaning that a nodes flags have changed
+   a message ID to set the node flags in the recovery daemon
  */
-#define CTDB_SRVID_NODE_FLAGS_CHANGED 0xF400000000000000LL
+#define CTDB_SRVID_SET_NODE_FLAGS 0xF400000000000000LL
 
 /* 
    a message ID meaning that a node should be banned
@@ -96,6 +96,12 @@ struct ctdb_call_info {
  */
 #define CTDB_SRVID_MEM_DUMP 0xF800000000000000LL
 
+/* 
+   a message ID to get the recovery daemon to push the node flags out
+ */
+#define CTDB_SRVID_PUSH_NODE_FLAGS 0xF900000000000000LL
+
+
 
 /* used on the domain socket, send a pdu to the local daemon */
 #define CTDB_CURRENT_NODE     0xF0000001
@@ -532,6 +538,11 @@ int ctdb_ctrl_getreclock(struct ctdb_context *ctdb,
 	struct timeval timeout, uint32_t destnode, 
 	TALLOC_CTX *mem_ctx, const char **reclock);
 
+
+uint32_t *list_of_connected_nodes(struct ctdb_context *ctdb,
+				struct ctdb_node_map *node_map,
+				TALLOC_CTX *mem_ctx,
+				bool include_self);
 uint32_t *list_of_active_nodes(struct ctdb_context *ctdb,
 				struct ctdb_node_map *node_map,
 				TALLOC_CTX *mem_ctx,
diff --git a/include/ctdb_private.h b/include/ctdb_private.h
index f526f26..cb83f86 100644
--- a/include/ctdb_private.h
+++ b/include/ctdb_private.h
@@ -637,14 +637,6 @@ struct ctdb_node_flag_change {
 };
 
 /*
-  structure to change flags on a node
- */
-struct ctdb_node_modflags {
-	uint32_t set;
-	uint32_t clear;
-};
-
-/*
   struct for admin setting a ban
  */
 struct ctdb_ban_info {
diff --git a/server/ctdb_control.c b/server/ctdb_control.c
index 5f65547..15f5000 100644
--- a/server/ctdb_control.c
+++ b/server/ctdb_control.c
@@ -326,7 +326,7 @@ static int32_t ctdb_control_dispatch(struct ctdb_context *ctdb,
 		return ctdb_control_list_tunables(ctdb, outdata);
 
 	case CTDB_CONTROL_MODIFY_FLAGS:
-		CHECK_CONTROL_DATA_SIZE(sizeof(struct ctdb_node_modflags));
+		CHECK_CONTROL_DATA_SIZE(sizeof(struct ctdb_node_flag_change));
 		return ctdb_control_modflags(ctdb, indata);
 
 	case CTDB_CONTROL_KILL_TCP: 
diff --git a/server/ctdb_daemon.c b/server/ctdb_daemon.c
index 8ddda70..0aaab23 100644
--- a/server/ctdb_daemon.c
+++ b/server/ctdb_daemon.c
@@ -31,36 +31,6 @@
 
 static void daemon_incoming_packet(void *, struct ctdb_req_header *);
 
-/*
-  handler for when a node changes its flags
-*/
-static void flag_change_handler(struct ctdb_context *ctdb, uint64_t srvid, 
-				TDB_DATA data, void *private_data)
-{
-	struct ctdb_node_flag_change *c = (struct ctdb_node_flag_change *)data.dptr;
-
-	if (data.dsize != sizeof(*c) || !ctdb_validate_pnn(ctdb, c->pnn)) {
-		DEBUG(DEBUG_CRIT,(__location__ "Invalid data in ctdb_node_flag_change\n"));
-		return;
-	}
-
-	if (!ctdb_validate_pnn(ctdb, c->pnn)) {
-		DEBUG(DEBUG_CRIT,("Bad pnn %u in flag_change_handler\n", c->pnn));
-		return;
-	}
-
-	/* don't get the disconnected flag from the other node */
-	ctdb->nodes[c->pnn]->flags = 
-		(ctdb->nodes[c->pnn]->flags&NODE_FLAGS_DISCONNECTED) 
-		| (c->new_flags & ~NODE_FLAGS_DISCONNECTED);	
-	DEBUG(DEBUG_DEBUG,("Node flags for node %u are now 0x%x\n", c->pnn, ctdb->nodes[c->pnn]->flags));
-
-	/* make sure we don't hold any IPs when we shouldn't */
-	if (c->pnn == ctdb->pnn &&
-	    (ctdb->nodes[c->pnn]->flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_BANNED))) {
-		ctdb_release_all_ips(ctdb);
-	}
-}
 
 static void print_exit_message(void)
 {
@@ -91,10 +61,6 @@ static void ctdb_start_transport(struct ctdb_context *ctdb)
 	/* Make sure we log something when the daemon terminates */
 	atexit(print_exit_message);
 
-	/* a handler for when nodes are disabled/enabled */
-	ctdb_register_message_handler(ctdb, ctdb, CTDB_SRVID_NODE_FLAGS_CHANGED, 
-				      flag_change_handler, NULL);
-
 	/* start monitoring for connected/disconnected nodes */
 	ctdb_start_keepalive(ctdb);
 
diff --git a/server/ctdb_monitor.c b/server/ctdb_monitor.c
index 6526397..21f0382 100644
--- a/server/ctdb_monitor.c
+++ b/server/ctdb_monitor.c
@@ -81,9 +81,9 @@ static void ctdb_health_callback(struct ctdb_context *ctdb, int status, void *p)
 	data.dptr = (uint8_t *)&c;
 	data.dsize = sizeof(c);
 
-	/* tell the other nodes that something has changed */
-	ctdb_daemon_send_message(ctdb, CTDB_BROADCAST_CONNECTED,
-				 CTDB_SRVID_NODE_FLAGS_CHANGED, data);
+	/* ask the recovery daemon to push these changes out to all nodes */
+	ctdb_daemon_send_message(ctdb, ctdb->pnn,
+				 CTDB_SRVID_PUSH_NODE_FLAGS, data);
 
 }
 
@@ -212,35 +212,33 @@ void ctdb_start_monitoring(struct ctdb_context *ctdb)
  */
 int32_t ctdb_control_modflags(struct ctdb_context *ctdb, TDB_DATA indata)
 {
-	struct ctdb_node_modflags *m = (struct ctdb_node_modflags *)indata.dptr;
-	TDB_DATA data;
-	struct ctdb_node_flag_change c;
-	struct ctdb_node *node = ctdb->nodes[ctdb->pnn];
-	uint32_t old_flags = node->flags;
+	struct ctdb_node_flag_change *c = (struct ctdb_node_flag_change *)indata.dptr;
+	struct ctdb_node *node;
+	
+	if (c->pnn >= ctdb->num_nodes) {
+		DEBUG(DEBUG_ERR,(__location__ " Node %d is invalid, num_nodes :%d\n", c->pnn, ctdb->num_nodes));
+		return -1;
+	}
 
-	node->flags |= m->set;
-	node->flags &= ~m->clear;
+	node         = ctdb->nodes[c->pnn];
+	c->old_flags  = node->flags;
+	node->flags   = c->new_flags & ~NODE_FLAGS_DISCONNECTED;
+	node->flags  |= (c->old_flags & NODE_FLAGS_DISCONNECTED);
 
-	if (node->flags == old_flags) {
-		DEBUG(DEBUG_INFO, ("Control modflags on node %u - Unchanged - flags 0x%x\n", ctdb->pnn, node->flags));
+	if (node->flags == c->old_flags) {
+		DEBUG(DEBUG_INFO, ("Control modflags on node %u - Unchanged - flags 0x%x\n", c->pnn, node->flags));
 		return 0;
 	}
 
-	DEBUG(DEBUG_INFO, ("Control modflags on node %u - flags now 0x%x\n", ctdb->pnn, node->flags));
+	DEBUG(DEBUG_INFO, ("Control modflags on node %u - flags now 0x%x\n", c->pnn, node->flags));
 
-	/* if we have been banned, go into recovery mode */
-	c.pnn = ctdb->pnn;
-	c.old_flags = old_flags;
-	c.new_flags = node->flags;
-
-	data.dptr = (uint8_t *)&c;
-	data.dsize = sizeof(c);
 
-	/* tell the other nodes that something has changed */
-	ctdb_daemon_send_message(ctdb, CTDB_BROADCAST_CONNECTED,
-				 CTDB_SRVID_NODE_FLAGS_CHANGED, data);
+	/* tell the recovery daemon something has changed */
+	ctdb_daemon_send_message(ctdb, ctdb->pnn,
+				 CTDB_SRVID_SET_NODE_FLAGS, indata);
 
-	if ((node->flags & NODE_FLAGS_BANNED) && !(old_flags & NODE_FLAGS_BANNED)) {
+	/* if we have become banned, we should go into recovery mode */
+	if ((node->flags & NODE_FLAGS_BANNED) && !(c->old_flags & NODE_FLAGS_BANNED)) {
 		/* make sure we are frozen */
 		DEBUG(DEBUG_NOTICE,("This node has been banned - forcing freeze and recovery\n"));
 		/* Reset the generation id to 1 to make us ignore any
diff --git a/server/ctdb_recoverd.c b/server/ctdb_recoverd.c
index 350897a..bef4bb1 100644
--- a/server/ctdb_recoverd.c
+++ b/server/ctdb_recoverd.c
@@ -643,37 +643,26 @@ static int update_flags_on_all_nodes(struct ctdb_context *ctdb, struct ctdb_node
 {
 	int i;
 	for (i=0;i<nodemap->num;i++) {
-		struct ctdb_node_flag_change c;
-		TDB_DATA data;
-
-		c.pnn = nodemap->nodes[i].pnn;
-		c.old_flags = nodemap->nodes[i].flags;
-		c.new_flags = nodemap->nodes[i].flags;
-
-		data.dptr = (uint8_t *)&c;
-		data.dsize = sizeof(c);
-
-		ctdb_send_message(ctdb, CTDB_BROADCAST_CONNECTED,
-				  CTDB_SRVID_NODE_FLAGS_CHANGED, data);
+		int ret;
 
+		ret = ctdb_ctrl_modflags(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[i].pnn, nodemap->nodes[i].flags, ~nodemap->nodes[i].flags);
+		if (ret != 0) {
+			DEBUG(DEBUG_ERR, (__location__ " Unable to update nodeflags on remote nodes\n"));
+			return -1;
+		}
 	}
 	return 0;
 }
 
 static int update_our_flags_on_all_nodes(struct ctdb_context *ctdb, uint32_t pnn, struct ctdb_node_map *nodemap)
 {
-	struct ctdb_node_flag_change c;
-	TDB_DATA data;
-
-	c.pnn = nodemap->nodes[pnn].pnn;
-	c.old_flags = nodemap->nodes[pnn].flags;
-	c.new_flags = nodemap->nodes[pnn].flags;
-
-	data.dptr = (uint8_t *)&c;
-	data.dsize = sizeof(c);
+	int ret;
 
-	ctdb_send_message(ctdb, CTDB_BROADCAST_CONNECTED,
-			  CTDB_SRVID_NODE_FLAGS_CHANGED, data);
+	ret = ctdb_ctrl_modflags(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[pnn].pnn, nodemap->nodes[pnn].flags, ~nodemap->nodes[pnn].flags);
+	if (ret != 0) {
+		DEBUG(DEBUG_ERR, (__location__ " Unable to update nodeflags on remote nodes\n"));
+		return -1;
+	}
 
 	return 0;
 }
@@ -1038,8 +1027,14 @@ static int update_local_flags(struct ctdb_recoverd *rec, struct ctdb_node_map *n
 			return MONITOR_FAILED;
 		}
 		if (nodemap->nodes[j].flags != remote_nodemap->nodes[j].flags) {
-			struct ctdb_node_flag_change c;
-			TDB_DATA data;
+			int ban_changed = (nodemap->nodes[j].flags ^ remote_nodemap->nodes[j].flags) & NODE_FLAGS_BANNED;
+
+			if (ban_changed) {
+				DEBUG(DEBUG_NOTICE,("Remote node %u had different BANNED flags 0x%x, local had 0x%x - trigger a re-election\n",
+				nodemap->nodes[j].pnn,
+				remote_nodemap->nodes[j].flags,
+				nodemap->nodes[j].flags));
+			}
 
 			/* We should tell our daemon about this so it
 			   updates its flags or else we will log the same 
@@ -1047,16 +1042,11 @@ static int update_local_flags(struct ctdb_recoverd *rec, struct ctdb_node_map *n
 			   Since we are the recovery master we can just as
 			   well update the flags on all nodes.
 			*/
-			c.pnn = nodemap->nodes[j].pnn;
-			c.old_flags = nodemap->nodes[j].flags;
-			c.new_flags = remote_nodemap->nodes[j].flags;
-
-			data.dptr = (uint8_t *)&c;
-			data.dsize = sizeof(c);
-
-			ctdb_send_message(ctdb, ctdb->pnn,
-					CTDB_SRVID_NODE_FLAGS_CHANGED, 
-					data);
+			ret = ctdb_ctrl_modflags(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].pnn, nodemap->nodes[j].flags, ~nodemap->nodes[j].flags);
+			if (ret != 0) {
+				DEBUG(DEBUG_ERR, (__location__ " Unable to update nodeflags on remote nodes\n"));
+				return -1;
+			}
 
 			/* Update our local copy of the flags in the recovery
 			   daemon.
@@ -1069,10 +1059,7 @@ static int update_local_flags(struct ctdb_recoverd *rec, struct ctdb_node_map *n
 			/* If the BANNED flag has changed for the node
 			   this is a good reason to do a new election.
 			 */
-			if ((c.old_flags ^ c.new_flags) & NODE_FLAGS_BANNED) {
-				DEBUG(DEBUG_NOTICE,("Remote node %u had different BANNED flags 0x%x, local had 0x%x - trigger a re-election\n",
-				 nodemap->nodes[j].pnn, c.new_flags,
-				 c.old_flags));
+			if (ban_changed) {
 				talloc_free(mem_ctx);
 				return MONITOR_ELECTION_NEEDED;
 			}
@@ -1940,15 +1927,6 @@ static void monitor_handler(struct ctdb_context *ctdb, uint64_t srvid,
 
 	changed_flags = c->old_flags ^ c->new_flags;
 
-	/* Dont let messages from remote nodes change the DISCONNECTED flag. 
-	   This flag is handled locally based on whether the local node
-	   can communicate with the node or not.
-	*/
-	c->new_flags &= ~NODE_FLAGS_DISCONNECTED;
-	if (nodemap->nodes[i].flags&NODE_FLAGS_DISCONNECTED) {
-		c->new_flags |= NODE_FLAGS_DISCONNECTED;
-	}
-
 	if (nodemap->nodes[i].flags != c->new_flags) {
 		DEBUG(DEBUG_NOTICE,("Node %u has changed flags - now 0x%x  was 0x%x\n", c->pnn, c->new_flags, c->old_flags));
 	}
@@ -1981,6 +1959,20 @@ static void monitor_handler(struct ctdb_context *ctdb, uint64_t srvid,
 	talloc_free(tmp_ctx);
 }
 
+/*
+  handler for when we need to push out flag changes ot all other nodes
+*/
+static void push_flags_handler(struct ctdb_context *ctdb, uint64_t srvid, 
+			    TDB_DATA data, void *private_data)
+{
+	int ret;
+	struct ctdb_node_flag_change *c = (struct ctdb_node_flag_change *)data.dptr;
+
+	ret = ctdb_ctrl_modflags(ctdb, CONTROL_TIMEOUT(), c->pnn, c->new_flags, ~c->new_flags);
+	if (ret != 0) {
+		DEBUG(DEBUG_ERR, (__location__ " Unable to update nodeflags on remote nodes\n"));
+	}
+}
 
 
 struct verify_recmode_normal_data {
@@ -2312,10 +2304,13 @@ static void monitor_cluster(struct ctdb_context *ctdb)
 	/* register a message port for recovery elections */
 	ctdb_set_message_handler(ctdb, CTDB_SRVID_RECOVERY, election_handler, rec);
 
-	/* and one for when nodes are disabled/enabled */
-	ctdb_set_message_handler(ctdb, CTDB_SRVID_NODE_FLAGS_CHANGED, monitor_handler, rec);
+	/* when nodes are disabled/enabled */
+	ctdb_set_message_handler(ctdb, CTDB_SRVID_SET_NODE_FLAGS, monitor_handler, rec);
+
+	/* when we are asked to puch out a flag change */
+	ctdb_set_message_handler(ctdb, CTDB_SRVID_PUSH_NODE_FLAGS, push_flags_handler, rec);
 
-	/* and one for when nodes are banned */
+	/* when nodes are banned */
 	ctdb_set_message_handler(ctdb, CTDB_SRVID_BAN_NODE, ban_handler, rec);
 
 	/* and one for when nodes are unbanned */
@@ -2794,16 +2789,6 @@ again:
 	}
 
 
-	DEBUG(DEBUG_DEBUG, (__location__ " Update flags on all nodes\n"));
-	/*
-	  update all nodes to have the same flags that we have
-	 */
-	ret = update_flags_on_all_nodes(ctdb, nodemap);
-	if (ret != 0) {
-		DEBUG(DEBUG_ERR, (__location__ " Unable to update flags on all nodes\n"));
-		goto again;
-	}
-
 	goto again;
 
 }


-- 
CTDB repository


More information about the samba-cvs mailing list