Rev 696: prevent a re-ban loop for single node clusters in http://samba.org/~tridge/ctdb

tridge at samba.org tridge at samba.org
Fri Jan 4 01:11:31 GMT 2008


------------------------------------------------------------
revno: 696
revision-id:tridge at samba.org-20080104011129-bsprbhxtczom25gx
parent: tridge at samba.org-20080103224104-59slj1v4vg63fk7a
committer: Andrew Tridgell <tridge at samba.org>
branch nick: tridge.kantana
timestamp: Fri 2008-01-04 12:11:29 +1100
message:
  prevent a re-ban loop for single node clusters
modified:
  server/ctdb_recoverd.c         recoverd.c-20070503213540-bvxuyd9jm1f7ig90-1
=== modified file 'server/ctdb_recoverd.c'
--- a/server/ctdb_recoverd.c	2008-01-02 11:44:46 +0000
+++ b/server/ctdb_recoverd.c	2008-01-04 01:11:29 +0000
@@ -1736,25 +1736,47 @@
 		goto again;
 	}
 
+	/* check which node is the recovery master */
+	ret = ctdb_ctrl_getrecmaster(ctdb, mem_ctx, CONTROL_TIMEOUT(), pnn, &recmaster);
+	if (ret != 0) {
+		DEBUG(0, (__location__ " Unable to get recmaster from node %u\n", pnn));
+		goto again;
+	}
+
+	if (recmaster == (uint32_t)-1) {
+		DEBUG(0,(__location__ " Initial recovery master set - forcing election\n"));
+		force_election(rec, mem_ctx, pnn, nodemap);
+		goto again;
+	}
+	
 	/* check that we (recovery daemon) and the local ctdb daemon
 	   agrees on whether we are banned or not
 	*/
 	if (nodemap->nodes[pnn].flags & NODE_FLAGS_BANNED) {
 		if (rec->banned_nodes[pnn] == NULL) {
-			DEBUG(0,("Local ctdb daemon thinks this node is BANNED but the recovery master disagrees. Re-banning the node\n"));
-
-			ctdb_ban_node(rec, pnn, ctdb->tunable.recovery_ban_period);
-			ctdb_set_culprit(rec, pnn);
-
+			if (recmaster == pnn) {
+				DEBUG(0,("Local ctdb daemon on recmaster thinks this node is BANNED but the recovery master disagrees. Unbanning the node\n"));
+
+				ctdb_unban_node(rec, pnn);
+			} else {
+				DEBUG(0,("Local ctdb daemon on non-recmaster thinks this node is BANNED but the recovery master disagrees. Re-banning the node\n"));
+				ctdb_ban_node(rec, pnn, ctdb->tunable.recovery_ban_period);
+				ctdb_set_culprit(rec, pnn);
+			}
 			goto again;
 		}
 	} else {
 		if (rec->banned_nodes[pnn] != NULL) {
-			DEBUG(0,("Local ctdb daemon does not think this node is BANNED but the recovery master disagrees. Re-banning the node\n"));
-
-			ctdb_ban_node(rec, pnn, ctdb->tunable.recovery_ban_period);
-			ctdb_set_culprit(rec, pnn);
-
+			if (recmaster == pnn) {
+				DEBUG(0,("Local ctdb daemon on recmaster does not think this node is BANNED but the recovery master disagrees. Unbanning the node\n"));
+
+				ctdb_unban_node(rec, pnn);
+			} else {
+				DEBUG(0,("Local ctdb daemon on non-recmaster does not think this node is BANNED but the recovery master disagrees. Re-banning the node\n"));
+
+				ctdb_ban_node(rec, pnn, ctdb->tunable.recovery_ban_period);
+				ctdb_set_culprit(rec, pnn);
+			}
 			goto again;
 		}
 	}
@@ -1771,19 +1793,6 @@
 	}
 
 
-	/* check which node is the recovery master */
-	ret = ctdb_ctrl_getrecmaster(ctdb, mem_ctx, CONTROL_TIMEOUT(), pnn, &recmaster);
-	if (ret != 0) {
-		DEBUG(0, (__location__ " Unable to get recmaster from node %u\n", pnn));
-		goto again;
-	}
-
-	if (recmaster == (uint32_t)-1) {
-		DEBUG(0,(__location__ " Initial recovery master set - forcing election\n"));
-		force_election(rec, mem_ctx, pnn, nodemap);
-		goto again;
-	}
-	
 	/* verify that the recmaster node is still active */
 	for (j=0; j<nodemap->num; j++) {
 		if (nodemap->nodes[j].pnn==recmaster) {



More information about the samba-cvs mailing list