Rev 640: run monitoring more quickly when unhealthy and at startup in http://samba.org/~tridge/ctdb

tridge at samba.org tridge at samba.org
Mon Sep 24 00:12:19 GMT 2007


------------------------------------------------------------
revno: 640
revision-id: tridge at samba.org-20070924001218-jt245q36pojpzrrt
parent: tridge at samba.org-20070924000014-gkjpqevicat8n13y
committer: Andrew Tridgell <tridge at samba.org>
branch nick: tridge
timestamp: Mon 2007-09-24 10:12:18 +1000
message:
  run monitoring more quickly when unhealthy and at startup
modified:
  include/ctdb_private.h         ctdb_private.h-20061117234101-o3qt14umlg9en8z0-13
  server/ctdb_monitor.c          ctdb_monitor.c-20070518100625-8jf4ft1mjzmb22ck-1
  server/ctdb_tunables.c         ctdb_tunables.c-20070604095258-4m34d7cm1qa7yos9-1
=== modified file 'include/ctdb_private.h'
--- a/include/ctdb_private.h	2007-09-21 02:24:02 +0000
+++ b/include/ctdb_private.h	2007-09-24 00:12:18 +0000
@@ -80,6 +80,7 @@
 	uint32_t election_timeout;
 	uint32_t takeover_timeout;
 	uint32_t monitor_interval;
+	uint32_t monitor_retry;
 	uint32_t tickle_update_interval;
 	uint32_t script_timeout;
 	uint32_t recovery_grace_period;

=== modified file 'server/ctdb_monitor.c'
--- a/server/ctdb_monitor.c	2007-09-04 00:33:10 +0000
+++ b/server/ctdb_monitor.c	2007-09-24 00:12:18 +0000
@@ -98,10 +98,7 @@
 	struct ctdb_node *node = ctdb->nodes[ctdb->pnn];
 	TDB_DATA data;
 	struct ctdb_node_flag_change c;
-
-	event_add_timed(ctdb->ev, ctdb->monitor_context, 
-			timeval_current_ofs(ctdb->tunable.monitor_interval, 0), 
-			ctdb_check_health, ctdb);
+	uint32_t next_interval;
 
 	c.pnn = ctdb->pnn;
 	c.old_flags = node->flags;
@@ -111,9 +108,20 @@
 		node->flags |= NODE_FLAGS_UNHEALTHY;
 	} else if (status == 0 && (node->flags & NODE_FLAGS_UNHEALTHY)) {
 		DEBUG(0,("monitor event OK - node re-enabled\n"));
-		ctdb->nodes[ctdb->pnn]->flags &= ~NODE_FLAGS_UNHEALTHY;
+		node->flags &= ~NODE_FLAGS_UNHEALTHY;
+	}
+
+	if (node->flags & NODE_FLAGS_UNHEALTHY) {
+		next_interval = ctdb->tunable.monitor_retry;
 	} else {
-		/* no change */
+		next_interval = ctdb->tunable.monitor_interval;
+	}
+
+	event_add_timed(ctdb->ev, ctdb->monitor_context, 
+			timeval_current_ofs(next_interval, 0), 
+			ctdb_check_health, ctdb);
+
+	if (c.old_flags == node->flags) {
 		return;
 	}
 
@@ -151,7 +159,7 @@
 	if (ret != 0) {
 		DEBUG(0,("Unable to launch monitor event script\n"));
 		event_add_timed(ctdb->ev, ctdb->monitor_context, 
-				timeval_current_ofs(ctdb->tunable.monitor_interval, 0), 
+				timeval_current_ofs(ctdb->tunable.monitor_retry, 0), 
 				ctdb_check_health, ctdb);
 	}	
 }
@@ -179,7 +187,7 @@
 	CTDB_NO_MEMORY_FATAL(ctdb, te);
 
 	te = event_add_timed(ctdb->ev, ctdb->monitor_context,
-			     timeval_current_ofs(ctdb->tunable.monitor_interval, 0), 
+			     timeval_current_ofs(ctdb->tunable.monitor_retry, 0), 
 			     ctdb_check_health, ctdb);
 	CTDB_NO_MEMORY_FATAL(ctdb, te);
 }

=== modified file 'server/ctdb_tunables.c'
--- a/server/ctdb_tunables.c	2007-07-20 05:05:55 +0000
+++ b/server/ctdb_tunables.c	2007-09-24 00:12:18 +0000
@@ -36,6 +36,7 @@
 	{ "ElectionTimeout",      3,  offsetof(struct ctdb_tunable, election_timeout) },
 	{ "TakeoverTimeout",      5,  offsetof(struct ctdb_tunable, takeover_timeout) },
 	{ "MonitorInterval",     15,  offsetof(struct ctdb_tunable, monitor_interval) },
+	{ "MonitorRetry",         5,  offsetof(struct ctdb_tunable, monitor_retry) },
 	{ "TickleUpdateInterval",20,  offsetof(struct ctdb_tunable, tickle_update_interval) },
 	{ "EventScriptTimeout",  20,  offsetof(struct ctdb_tunable, script_timeout) },
 	{ "RecoveryGracePeriod", 60,  offsetof(struct ctdb_tunable, recovery_grace_period) },



More information about the samba-cvs mailing list