Rev 676: prevent a deadly embrace between smbd and ctdbd by moving the calling in http://samba.org/~tridge/ctdb

tridge at samba.org tridge at samba.org
Sun Nov 11 23:54:14 GMT 2007


------------------------------------------------------------
revno: 676
revision-id:tridge at samba.org-20071111235311-be18d7889z943fa2
parent: tridge at samba.org-20071102022029-f2sj3koc251qt203
committer: Andrew Tridgell <tridge at samba.org>
branch nick: tridge.stable
timestamp: Mon 2007-11-12 10:53:11 +1100
message:
  prevent a deadly embrace between smbd and ctdbd by moving the calling
  of the startup event scripts after the point where recovery has
  started and the node is in normal operation
  
  This makes the 'startup' script just a special type of the 'monitor'
  script which is called first
modified:
  include/ctdb_private.h         ctdb_private.h-20061117234101-o3qt14umlg9en8z0-13
  server/ctdb_daemon.c           ctdb_daemon.c-20070409200331-3el1kqgdb9m4ib0g-1
  server/ctdb_monitor.c          ctdb_monitor.c-20070518100625-8jf4ft1mjzmb22ck-1
=== modified file 'include/ctdb_private.h'
--- a/include/ctdb_private.h	2007-10-22 02:34:08 +0000
+++ b/include/ctdb_private.h	2007-11-11 23:53:11 +0000
@@ -366,6 +366,7 @@
 	const char *event_script_dir;
 	const char *default_public_interface;
 	pid_t recoverd_pid;
+	bool done_startup;
 };
 
 struct ctdb_db_context {

=== modified file 'server/ctdb_daemon.c'
--- a/server/ctdb_daemon.c	2007-09-24 00:00:14 +0000
+++ b/server/ctdb_daemon.c	2007-11-11 23:53:11 +0000
@@ -68,13 +68,8 @@
 
 
 /* called when the "startup" event script has finished */
-static void ctdb_start_transport(struct ctdb_context *ctdb, int status, void *p)
+static void ctdb_start_transport(struct ctdb_context *ctdb)
 {
-	if (status != 0) {
-		DEBUG(0,("startup event failed!\n"));
-		ctdb_fatal(ctdb, "startup event script failed");		
-	}
-
 	/* start the transport running */
 	if (ctdb->methods->start(ctdb) != 0) {
 		DEBUG(0,("transport failed to start!\n"));
@@ -664,12 +659,8 @@
 	/* release any IPs we hold from previous runs of the daemon */
 	ctdb_release_all_ips(ctdb);
 
-	ret = ctdb_event_script_callback(ctdb, timeval_zero(), ctdb, 
-					 ctdb_start_transport, NULL, "startup");
-	if (ret != 0) {
-		DEBUG(0,("Failed startup event script\n"));
-		return -1;
-	}
+	/* start the transport going */
+	ctdb_start_transport(ctdb);
 
 	/* go into a wait loop to allow other nodes to complete */
 	event_loop_wait(ctdb->ev);

=== modified file 'server/ctdb_monitor.c'
--- a/server/ctdb_monitor.c	2007-09-24 00:12:18 +0000
+++ b/server/ctdb_monitor.c	2007-11-11 23:53:11 +0000
@@ -138,6 +138,31 @@
 
 
 /*
+  called when the startup event script finishes
+ */
+static void ctdb_startup_callback(struct ctdb_context *ctdb, int status, void *p)
+{
+	if (status != 0) {
+		DEBUG(0,("startup event failed\n"));
+	} else if (status == 0) {
+		DEBUG(0,("startup event OK - enabling monitoring\n"));
+		ctdb->done_startup = true;
+	}
+
+	if (ctdb->done_startup) {
+		event_add_timed(ctdb->ev, ctdb->monitor_context, 
+				timeval_zero(),
+				ctdb_check_health, ctdb);
+	} else {
+		event_add_timed(ctdb->ev, ctdb->monitor_context, 
+				timeval_current_ofs(ctdb->tunable.monitor_interval, 0), 
+				ctdb_check_health, ctdb);
+	}
+
+}
+
+
+/*
   see if the event scripts think we are healthy
  */
 static void ctdb_check_health(struct event_context *ev, struct timed_event *te, 
@@ -146,16 +171,25 @@
 	struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
 	int ret;
 
-	if (ctdb->monitoring_mode == CTDB_MONITORING_DISABLED) {
+	if (ctdb->monitoring_mode == CTDB_MONITORING_DISABLED && ctdb->done_startup) {
 		event_add_timed(ctdb->ev, ctdb->monitor_context,
 				timeval_current_ofs(ctdb->tunable.monitor_interval, 0), 
 				ctdb_check_health, ctdb);
 		return;
 	}
 	
-	ret = ctdb_event_script_callback(ctdb, 
-					 timeval_current_ofs(ctdb->tunable.script_timeout, 0),
-					 ctdb->monitor_context, ctdb_health_callback, ctdb, "monitor");
+	if (!ctdb->done_startup) {
+		ret = ctdb_event_script_callback(ctdb, 
+						 timeval_current_ofs(ctdb->tunable.script_timeout, 0),
+						 ctdb->monitor_context, ctdb_startup_callback, 
+						 ctdb, "startup");
+	} else {
+		ret = ctdb_event_script_callback(ctdb, 
+						 timeval_current_ofs(ctdb->tunable.script_timeout, 0),
+						 ctdb->monitor_context, ctdb_health_callback, 
+						 ctdb, "monitor");
+	}
+
 	if (ret != 0) {
 		DEBUG(0,("Unable to launch monitor event script\n"));
 		event_add_timed(ctdb->ev, ctdb->monitor_context, 



More information about the samba-cvs mailing list