[SCM] CTDB repository - branch master updated - bfba5c7249eff8a10a43b53c1b89dd44b625fd10

Ronnie Sahlberg sahlberg at samba.org
Mon Jul 7 17:53:02 GMT 2008


The branch, master has been updated
       via  bfba5c7249eff8a10a43b53c1b89dd44b625fd10 (commit)
      from  60f3c04bd8b20ecbe937ffed08875cdc6898b422 (commit)

http://gitweb.samba.org/?p=sahlberg/ctdb.git;a=shortlog;h=master


- Log -----------------------------------------------------------------
commit bfba5c7249eff8a10a43b53c1b89dd44b625fd10
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Tue Jul 8 03:48:11 2008 +1000

    waitpid() can block if it takes a long time before the child terminates
    so we should not call it from the main daemon.
    
    1, set SIGCHLD to SIG_DFL to make sure we ignore this signal
    
    2, get rid of all waitpid() calls
    
    3, change reporting of event script status code from _exit()/waitpid()   to write()/read() one byte across the pipe.

-----------------------------------------------------------------------

Summary of changes:
 server/ctdb_daemon.c     |    3 +++
 server/ctdb_freeze.c     |    1 -
 server/ctdb_lockwait.c   |    2 --
 server/ctdb_persistent.c |    2 --
 server/ctdb_recover.c    |    1 -
 server/ctdb_recoverd.c   |    5 -----
 server/ctdb_traverse.c   |    1 -
 server/eventscript.c     |   26 ++++++++++++++++++--------
 8 files changed, 21 insertions(+), 20 deletions(-)


Changeset truncated at 500 lines:

diff --git a/server/ctdb_daemon.c b/server/ctdb_daemon.c
index 326ab60..a7f9086 100644
--- a/server/ctdb_daemon.c
+++ b/server/ctdb_daemon.c
@@ -662,6 +662,9 @@ int ctdb_start_daemon(struct ctdb_context *ctdb, bool do_fork)
 		}
 	}
 	block_signal(SIGPIPE);
+	
+	/* we dont want any SIGCHLD */
+	signal(SIGCHLD, SIG_DFL);
 
 	if (ctdb->do_setsched) {
 		/* try to set us up as realtime */
diff --git a/server/ctdb_freeze.c b/server/ctdb_freeze.c
index 8e75f61..e39332e 100644
--- a/server/ctdb_freeze.c
+++ b/server/ctdb_freeze.c
@@ -87,7 +87,6 @@ static int ctdb_freeze_handle_destructor(struct ctdb_freeze_handle *h)
 	ctdb->freeze_handle = NULL;
 
 	kill(h->child, SIGKILL);
-	waitpid(h->child, NULL, 0);
 	return 0;
 }
 
diff --git a/server/ctdb_lockwait.c b/server/ctdb_lockwait.c
index 5b00198..03a7eed 100644
--- a/server/ctdb_lockwait.c
+++ b/server/ctdb_lockwait.c
@@ -72,7 +72,6 @@ static void lockwait_handler(struct event_context *ev, struct fd_event *fde,
 	tdb_chainlock_unmark(tdb, key);
 
 	kill(child, SIGKILL);
-	waitpid(child, NULL, 0);
 	talloc_free(tmp_ctx);
 }
 
@@ -80,7 +79,6 @@ static int lockwait_destructor(struct lockwait_handle *h)
 {
 	h->ctdb->statistics.pending_lockwait_calls--;
 	kill(h->child, SIGKILL);
-	waitpid(h->child, NULL, 0);
 	return 0;
 }
 
diff --git a/server/ctdb_persistent.c b/server/ctdb_persistent.c
index 3ba961e..713950a 100644
--- a/server/ctdb_persistent.c
+++ b/server/ctdb_persistent.c
@@ -222,7 +222,6 @@ static int childwrite_destructor(struct childwrite_handle *h)
 {
 	h->ctdb->statistics.pending_childwrite_calls--;
 	kill(h->child, SIGKILL);
-	waitpid(h->child, NULL, 0);
 	return 0;
 }
 
@@ -260,7 +259,6 @@ static void childwrite_handler(struct event_context *ev, struct fd_event *fde,
 	callback(c, p);
 
 	kill(child, SIGKILL);
-	waitpid(child, NULL, 0);
 	talloc_free(tmp_ctx);
 }
 
diff --git a/server/ctdb_recover.c b/server/ctdb_recover.c
index 29673ea..7b8d28c 100644
--- a/server/ctdb_recover.c
+++ b/server/ctdb_recover.c
@@ -459,7 +459,6 @@ static void ctdb_set_recmode_timeout(struct event_context *ev, struct timed_even
 static int set_recmode_destructor(struct ctdb_set_recmode_state *state)
 {
 	kill(state->child, SIGKILL);
-	waitpid(state->child, NULL, 0);
 	return 0;
 }
 
diff --git a/server/ctdb_recoverd.c b/server/ctdb_recoverd.c
index 80443bb..837c0b1 100644
--- a/server/ctdb_recoverd.c
+++ b/server/ctdb_recoverd.c
@@ -2933,11 +2933,6 @@ static void ctdb_check_recd(struct event_context *ev, struct timed_event *te,
 {
 	struct ctdb_context *ctdb = talloc_get_type(p, struct ctdb_context);
 
-	/* make sure we harvest the child if signals are blocked for some
-	   reason
-	*/
-	waitpid(ctdb->recoverd_pid, 0, WNOHANG);
-
 	if (kill(ctdb->recoverd_pid, 0) != 0) {
 		DEBUG(DEBUG_ERR,("Recovery daemon (pid:%d) is no longer running. Shutting down main daemon\n", (int)ctdb->recoverd_pid));
 
diff --git a/server/ctdb_traverse.c b/server/ctdb_traverse.c
index 6c84d02..10895ed 100644
--- a/server/ctdb_traverse.c
+++ b/server/ctdb_traverse.c
@@ -74,7 +74,6 @@ static void ctdb_traverse_local_handler(uint8_t *rawdata, size_t length, void *p
 static int traverse_local_destructor(struct ctdb_traverse_local_handle *h)
 {
 	kill(h->child, SIGKILL);
-	waitpid(h->child, NULL, 0);
 	return 0;
 }
 
diff --git a/server/eventscript.c b/server/eventscript.c
index 54d914b..9002007 100644
--- a/server/eventscript.c
+++ b/server/eventscript.c
@@ -210,18 +210,20 @@ static void ctdb_event_script_handler(struct event_context *ev, struct fd_event
 {
 	struct ctdb_event_script_state *state = 
 		talloc_get_type(p, struct ctdb_event_script_state);
-	int status = -1;
 	void (*callback)(struct ctdb_context *, int, void *) = state->callback;
 	void *private_data = state->private_data;
 	struct ctdb_context *ctdb = state->ctdb;
+	signed char rt = -1;
+	int ret;
 
-	waitpid(state->child, &status, 0);
-	if (status != -1) {
-		status = WEXITSTATUS(status);
+	ret = read(state->fd[0], &rt, sizeof(rt));
+	if (ret	!= sizeof(rt)) {
+		DEBUG(DEBUG_ERR, (__location__ " Failed to read from pipe to eventscript child.\n"));
 	}
+
 	talloc_set_destructor(state, NULL);
 	talloc_free(state);
-	callback(ctdb, status, private_data);
+	callback(ctdb, rt, private_data);
 
 	ctdb->event_script_timeouts = 0;
 }
@@ -293,7 +295,6 @@ static int event_script_destructor(struct ctdb_event_script_state *state)
 {
 	DEBUG(DEBUG_ERR,(__location__ " Sending SIGTERM to child pid:%d\n", state->child));
 	kill(state->child, SIGTERM);
-	waitpid(state->child, NULL, 0);
 	return 0;
 }
 
@@ -336,13 +337,22 @@ static int ctdb_event_script_callback_v(struct ctdb_context *ctdb,
 	}
 
 	if (state->child == 0) {
+		signed char rt;
+
 		close(state->fd[0]);
 		if (ctdb->do_setsched) {
 			ctdb_restore_scheduler(ctdb);
 		}
 		set_close_on_exec(state->fd[1]);
-		ret = ctdb_event_script_v(ctdb, state->options);
-		_exit(ret);
+		rt = ctdb_event_script_v(ctdb, state->options);
+		do {
+			ret = write(state->fd[1], &rt, sizeof(rt));
+			if (ret != sizeof(rt)) {
+				DEBUG(DEBUG_ERR, (__location__ " Failed to write to pipe from eventscript child. Trying again in one second\n"));
+				sleep(1);
+			}
+		} while (ret != sizeof(rt));
+		_exit(rt);
 	}
 
 	talloc_set_destructor(state, event_script_destructor);


-- 
CTDB repository


More information about the samba-cvs mailing list