[SCM] CTDB repository - branch master updated - ctdb-1.0.114-15-g2a79d97

Ronnie Sahlberg sahlberg at samba.org
Wed Apr 7 22:40:07 MDT 2010


The branch, master has been updated
       via  2a79d976f975ec1ecbd06c210cbb5e6a93068cec (commit)
       via  8825a8e3936e4ddfc1482d07a63a620249f01fe7 (commit)
       via  f3bf2ab61f8dbbc806ec23a68a87aaedd458e712 (commit)
      from  685be0a7e830464e01dccc744362040a75bc96b5 (commit)

http://gitweb.samba.org/?p=sahlberg/ctdb.git;a=shortlog;h=master


- Log -----------------------------------------------------------------
commit 2a79d976f975ec1ecbd06c210cbb5e6a93068cec
Merge: 685be0a7e830464e01dccc744362040a75bc96b5 8825a8e3936e4ddfc1482d07a63a620249f01fe7
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Thu Apr 8 14:30:01 2010 +1000

    Merge root at 10.1.1.27:/shared/ctdb/ctdb-git

commit 8825a8e3936e4ddfc1482d07a63a620249f01fe7
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Thu Apr 8 14:28:52 2010 +1000

    Fix a compiler warning

commit f3bf2ab61f8dbbc806ec23a68a87aaedd458e712
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Thu Apr 8 14:07:57 2010 +1000

    In the recovery daemon, keep track of which node we have assigned public ip
    addresses and verify that the remote nodes have/keep a consistent view of
    assigned addresses.
    
    If a remote node has an inconsistent view of addresses visavi the recovery
    master this will trigger a full ip reallocation.

-----------------------------------------------------------------------

Summary of changes:
 client/ctdb_client.c   |    2 +-
 common/rb_tree.h       |   12 +++++-----
 include/ctdb_private.h |    9 ++++++-
 server/ctdb_recoverd.c |   18 ++++++++++-----
 server/ctdb_takeover.c |   57 ++++++++++++++++++++++++++++++++++++++++++------
 5 files changed, 77 insertions(+), 21 deletions(-)


Changeset truncated at 500 lines:

diff --git a/client/ctdb_client.c b/client/ctdb_client.c
index 9e5f7fc..946335c 100644
--- a/client/ctdb_client.c
+++ b/client/ctdb_client.c
@@ -1906,7 +1906,7 @@ int ctdb_dumpdb_record(struct ctdb_context *ctdb, TDB_DATA key, TDB_DATA data, v
 	fprintf(f, "dmaster: %u\n", h->dmaster);
 	fprintf(f, "rsn: %llu\n", (unsigned long long)h->rsn);
 
-	fprintf(f, "data(%u) = \"", (unsigned)data.dsize - sizeof(*h));
+	fprintf(f, "data(%u) = \"", (unsigned)(data.dsize - sizeof(*h)));
 	for (i=sizeof(*h);i<data.dsize;i++) {
 		if (ISASCII(data.dptr[i])) {
 			fprintf(f, "%c", data.dptr[i]);
diff --git a/common/rb_tree.h b/common/rb_tree.h
index cb7cba3..eef0bc5 100644
--- a/common/rb_tree.h
+++ b/common/rb_tree.h
@@ -21,17 +21,17 @@
 
 #define TRBT_RED		0x00
 #define TRBT_BLACK		0x01
-typedef struct _trbt_node_t {
-	struct _trbt_tree_t *tree;
-	struct _trbt_node_t *parent;
-	struct _trbt_node_t *left;
-	struct _trbt_node_t *right;
+typedef struct trbt_node {
+	struct trbt_tree *tree;
+	struct trbt_node *parent;
+	struct trbt_node *left;
+	struct trbt_node *right;
 	uint32_t rb_color;
 	uint32_t key32;
 	void *data;
 } trbt_node_t;
 
-typedef struct _trbt_tree_t {
+typedef struct trbt_tree {
 	trbt_node_t *root;
 /* automatically free the tree when the last node has been deleted */
 #define TRBT_AUTOFREE		0x00000001
diff --git a/include/ctdb_private.h b/include/ctdb_private.h
index 953552a..888b626 100644
--- a/include/ctdb_private.h
+++ b/include/ctdb_private.h
@@ -442,7 +442,7 @@ struct ctdb_context {
 	struct ctdb_call_state *pending_calls;
 	struct ctdb_client_ip *client_ip_list;
 	bool do_checkpublicip;
-	struct _trbt_tree_t *server_ids;	
+	struct trbt_tree *server_ids;	
 	const char *event_script_dir;
 	const char *notification_script;
 	const char *default_public_interface;
@@ -469,6 +469,9 @@ struct ctdb_context {
 
 	/* mapping from pid to ctdb_client * */
 	struct ctdb_client_pid_list *client_pids;
+
+	/* used in the recovery daemon to remember the ip allocation */
+	struct trbt_tree *ip_tree;
 };
 
 struct ctdb_db_context {
@@ -1633,4 +1636,8 @@ int ctdb_recheck_persistent_health(struct ctdb_context *ctdb);
 void ctdb_run_notification_script(struct ctdb_context *ctdb, const char *event);
 
 void ctdb_fault_setup(void);
+
+int verify_remote_ip_allocation(struct ctdb_context *ctdb, 
+				struct ctdb_all_public_ips *ips);
+
 #endif
diff --git a/server/ctdb_recoverd.c b/server/ctdb_recoverd.c
index f5aa77b..5f34711 100644
--- a/server/ctdb_recoverd.c
+++ b/server/ctdb_recoverd.c
@@ -1228,6 +1228,7 @@ static void reload_nodes_file(struct ctdb_context *ctdb)
 }
 
 static int ctdb_reload_remote_public_ips(struct ctdb_context *ctdb,
+					 struct ctdb_recoverd *rec,
 					 struct ctdb_node_map *nodemap,
 					 uint32_t *culprit)
 {
@@ -1274,6 +1275,11 @@ static int ctdb_reload_remote_public_ips(struct ctdb_context *ctdb,
 			return -1;
 		}
 
+		if (verify_remote_ip_allocation(ctdb, ctdb->nodes[j]->known_public_ips)) {
+			DEBUG(DEBUG_ERR,("Node %d has inconsistent public ip allocation and needs update.\n", ctdb->nodes[j]->pnn));
+			rec->need_takeover_run = true;
+		}
+
 		/* grab a new shiny list of public ips from the node */
 		ret = ctdb_ctrl_get_public_ips_flags(ctdb,
 					CONTROL_TIMEOUT(),
@@ -1568,7 +1574,7 @@ static int do_recovery(struct ctdb_recoverd *rec,
 	/*
 	  tell nodes to takeover their public IPs
 	 */
-	ret = ctdb_reload_remote_public_ips(ctdb, nodemap, &culprit);
+	ret = ctdb_reload_remote_public_ips(ctdb, rec, nodemap, &culprit);
 	if (ret != 0) {
 		DEBUG(DEBUG_ERR,("Failed to read public ips from remote node %d\n",
 				 culprit));
@@ -1961,7 +1967,7 @@ static void process_ipreallocate_requests(struct ctdb_context *ctdb, struct ctdb
 	/* update the list of public ips that a node can handle for
 	   all connected nodes
 	*/
-	ret = ctdb_reload_remote_public_ips(ctdb, rec->nodemap, &culprit);
+	ret = ctdb_reload_remote_public_ips(ctdb, rec, rec->nodemap, &culprit);
 	if (ret != 0) {
 		DEBUG(DEBUG_ERR,("Failed to read public ips from remote node %d\n",
 				 culprit));
@@ -2414,9 +2420,9 @@ static enum monitor_result verify_recmaster(struct ctdb_recoverd *rec, struct ct
 }
 
 
-/* called to check that the allocation of public ip addresses is ok.
+/* called to check that the local allocation of public ip addresses is ok.
 */
-static int verify_ip_allocation(struct ctdb_context *ctdb, struct ctdb_recoverd *rec, uint32_t pnn)
+static int verify_local_ip_allocation(struct ctdb_context *ctdb, struct ctdb_recoverd *rec, uint32_t pnn)
 {
 	TALLOC_CTX *mem_ctx = talloc_new(NULL);
 	struct ctdb_control_get_ifaces *ifaces = NULL;
@@ -3088,7 +3094,7 @@ again:
 	 */ 
 	if (ctdb->do_checkpublicip) {
 		if (rec->ip_check_disable_ctx == NULL) {
-			if (verify_ip_allocation(ctdb, rec, pnn) != 0) {
+			if (verify_local_ip_allocation(ctdb, rec, pnn) != 0) {
 				DEBUG(DEBUG_ERR, (__location__ " Public IPs were inconsistent.\n"));
 			}
 		}
@@ -3353,7 +3359,7 @@ again:
 		/* update the list of public ips that a node can handle for
 		   all connected nodes
 		*/
-		ret = ctdb_reload_remote_public_ips(ctdb, nodemap, &culprit);
+		ret = ctdb_reload_remote_public_ips(ctdb, rec, nodemap, &culprit);
 		if (ret != 0) {
 			DEBUG(DEBUG_ERR,("Failed to read public ips from remote node %d\n",
 					 culprit));
diff --git a/server/ctdb_takeover.c b/server/ctdb_takeover.c
index 166ca5a..5433172 100644
--- a/server/ctdb_takeover.c
+++ b/server/ctdb_takeover.c
@@ -1159,14 +1159,17 @@ void getips_count_callback(void *param, void *data)
 }
 
 struct ctdb_public_ip_list *
-create_merged_ip_list(struct ctdb_context *ctdb, TALLOC_CTX *tmp_ctx)
+create_merged_ip_list(struct ctdb_context *ctdb)
 {
 	int i, j;
 	struct ctdb_public_ip_list *ip_list;
 	struct ctdb_all_public_ips *public_ips;
-	trbt_tree_t *ip_tree;
 
-	ip_tree = trbt_create(tmp_ctx, 0);
+	if (ctdb->ip_tree != NULL) {
+		talloc_free(ctdb->ip_tree);
+		ctdb->ip_tree = NULL;
+	}
+	ctdb->ip_tree = trbt_create(ctdb, 0);
 
 	for (i=0;i<ctdb->num_nodes;i++) {
 		public_ips = ctdb->nodes[i]->known_public_ips;
@@ -1183,13 +1186,13 @@ create_merged_ip_list(struct ctdb_context *ctdb, TALLOC_CTX *tmp_ctx)
 		for (j=0;j<public_ips->num;j++) {
 			struct ctdb_public_ip_list *tmp_ip; 
 
-			tmp_ip = talloc_zero(tmp_ctx, struct ctdb_public_ip_list);
+			tmp_ip = talloc_zero(ctdb->ip_tree, struct ctdb_public_ip_list);
 			CTDB_NO_MEMORY_NULL(ctdb, tmp_ip);
 			tmp_ip->pnn  = public_ips->ips[j].pnn;
 			tmp_ip->addr = public_ips->ips[j].addr;
 			tmp_ip->next = NULL;
 
-			trbt_insertarray32_callback(ip_tree,
+			trbt_insertarray32_callback(ctdb->ip_tree,
 				IP_KEYLEN, ip_key(&public_ips->ips[j].addr),
 				add_ip_callback,
 				tmp_ip);
@@ -1197,7 +1200,7 @@ create_merged_ip_list(struct ctdb_context *ctdb, TALLOC_CTX *tmp_ctx)
 	}
 
 	ip_list = NULL;
-	trbt_traversearray32(ip_tree, IP_KEYLEN, getips_count_callback, &ip_list);
+	trbt_traversearray32(ctdb->ip_tree, IP_KEYLEN, getips_count_callback, &ip_list);
 
 	return ip_list;
 }
@@ -1247,8 +1250,10 @@ int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap)
 	   a full list of all public addresses that exist in the cluster.
 	   Walk over all node structures and create a merged list of
 	   all public addresses that exist in the cluster.
+
+	   keep the tree of ips around as ctdb->ip_tree
 	*/
-	all_ips = create_merged_ip_list(ctdb, tmp_ctx);
+	all_ips = create_merged_ip_list(ctdb);
 
 	/* If we want deterministic ip allocations, i.e. that the ip addresses
 	   will always be allocated the same way for a specific set of
@@ -2806,3 +2811,41 @@ int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA inda
 	return -1;
 }
 
+/* This function is called from the recovery daemon to verify that a remote
+   node has the expected ip allocation.
+   This is verified against ctdb->ip_tree
+*/
+int verify_remote_ip_allocation(struct ctdb_context *ctdb, struct ctdb_all_public_ips *ips)
+{
+	struct ctdb_public_ip_list *tmp_ip; 
+	int i;
+
+	if (ctdb->ip_tree == NULL) {
+		/* dont know the expected allocation yet, assume remote node
+		   is correct. */
+		return 0;
+	}
+
+	if (ips == NULL) {
+		return 0;
+	}
+
+	for (i=0; i<ips->num; i++) {
+		tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ips->ips[i].addr));
+		if (tmp_ip == NULL) {
+			DEBUG(DEBUG_ERR,(__location__ " Could not find host for address %s, reassign ips\n", ctdb_addr_to_str(&ips->ips[i].addr)));
+			return -1;
+		}
+
+		if (tmp_ip->pnn == -1 || ips->ips[i].pnn == -1) {
+			continue;
+		}
+
+		if (tmp_ip->pnn != ips->ips[i].pnn) {
+			DEBUG(DEBUG_ERR,("Inconsistent ip allocation. Trigger reallocation.\n"));
+			return -1;
+		}
+	}
+
+	return 0;
+}


-- 
CTDB repository


More information about the samba-cvs mailing list