Rev 644: we are the culprit if we can't get the reclock in http://samba.org/~tridge/ctdb

tridge at samba.org tridge at samba.org
Fri Oct 5 02:01:42 GMT 2007


------------------------------------------------------------
revno: 644
revision-id: tridge at samba.org-20071005020140-in6xybrd2o71xrlk
parent: tridge at samba.org-20070924052701-49dtutlsgg9ylbal
committer: Andrew Tridgell <tridge at samba.org>
branch nick: tridge
timestamp: Fri 2007-10-05 12:01:40 +1000
message:
  we are the culprit if we can't get the reclock
modified:
  server/ctdb_recoverd.c         recoverd.c-20070503213540-bvxuyd9jm1f7ig90-1
=== modified file 'server/ctdb_recoverd.c'
--- a/server/ctdb_recoverd.c	2007-09-24 00:52:26 +0000
+++ b/server/ctdb_recoverd.c	2007-10-05 02:01:40 +0000
@@ -724,6 +724,23 @@
 
 	return generation;
 }
+
+/*
+  remember the trouble maker
+ */
+static void ctdb_set_culprit(struct ctdb_recoverd *rec, uint32_t culprit)
+{
+	struct ctdb_context *ctdb = rec->ctdb;
+
+	if (rec->last_culprit != culprit ||
+	    timeval_elapsed(&rec->first_recover_time) > ctdb->tunable.recovery_grace_period) {
+		/* either a new node is the culprit, or we've decide to forgive them */
+		rec->last_culprit = culprit;
+		rec->first_recover_time = timeval_current();
+		rec->culprit_counter = 0;
+	}
+	rec->culprit_counter++;
+}
 		
 /*
   we are the recmaster, and recovery is needed - start a recovery run
@@ -741,14 +758,7 @@
 	/* if recovery fails, force it again */
 	rec->need_recovery = true;
 
-	if (rec->last_culprit != culprit ||
-	    timeval_elapsed(&rec->first_recover_time) > ctdb->tunable.recovery_grace_period) {
-		/* either a new node is the culprit, or we've decide to forgive them */
-		rec->last_culprit = culprit;
-		rec->first_recover_time = timeval_current();
-		rec->culprit_counter = 0;
-	}
-	rec->culprit_counter++;
+	ctdb_set_culprit(rec, culprit);
 
 	if (rec->culprit_counter > 2*nodemap->num) {
 		DEBUG(0,("Node %u has caused %u recoveries in %.0f seconds - banning it for %u seconds\n",
@@ -758,6 +768,7 @@
 	}
 
 	if (!ctdb_recovery_lock(ctdb, true)) {
+		ctdb_set_culprit(rec, pnn);
 		DEBUG(0,("Unable to get recovery lock - aborting recovery\n"));
 		return -1;
 	}



More information about the samba-cvs mailing list