[PATCH] Handle SIGCHLD better in process_standard, remove process_{onefork, prefork}

Stefan (metze) Metzmacher metze at samba.org
Mon Mar 16 16:01:12 MDT 2015


Am 16.03.2015 um 03:02 schrieb Andrew Bartlett:
> On Mon, 2015-03-02 at 17:14 +1300, Andrew Bartlett wrote:
>> Attached is a patch set to properly handle SIGCHLD in process_standard
>> in the source4/ code, and in doing so ensure we catch errors from
>> samba_kcc and samba_dnsupdate.
>>
>> Metze,
>>
>> Can you look over this and if you are happy, review/push?  I would like
>> to have tests for a change like this, so if you have any good ideas
>> about how to do that, please let me know.  I've tested it manually by
>> killing off children with kill -9, and natrually the success case will
>> be covered well in autobuild. 
>>
>> Also, what do you think about removing process_prefork and
>> process_onefork?  I added them both - prefork I did as an exercise to
>> teach ddiss Samba4 development, and I can't remember why I did onefork -
>> I think it may have been for ideas around s3compat.  As far as I can
>> tell they are unused, but could in theory be invoked from the command
>> line. 
>>
>> Otherwise, they would need to be rewritten to use some common lib code
>> with process_standard. 
> 
> Metze,
> 
> Can I please get a review of the first of the patches I attached,
> re-attached here?  The others Jelmer has kindly reviewed, but are not
> much point until I can fix up process_standard and SIGCHLD.

Is this version ok for you?

I did some reformating and fixed a memory leak in
standard_child_pipe_handler().

metze
-------------- next part --------------
From 1de91c949a4efa39ce6448194dbb4ea24b79d0b2 Mon Sep 17 00:00:00 2001
From: Andrew Bartlett <abartlet at samba.org>
Date: Thu, 19 Feb 2015 12:45:31 +1300
Subject: [PATCH] s4-process_standard: Remove signal(SIGCHLD, SIG_IGN)

We replace this with a pipe between parent and child, and then watch
for a read event in the parent to indicate that the child has gone away.

The removal of signal(SIGCHLD, SIG_IGN) requires us to then call
waitpid().  We can't do that in a main loop as we want to get the exit
status to the legitimate waitpid calls in routines like
samba_runcmd_*().

Signed-off-by: Andrew Bartlett <abartlet at samba.org>
Reviewed-by: Stefan Metzmacher <metze at samba.org>
---
 source4/smbd/process_standard.c | 163 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 161 insertions(+), 2 deletions(-)

diff --git a/source4/smbd/process_standard.c b/source4/smbd/process_standard.c
index c5377b3..950bef3 100644
--- a/source4/smbd/process_standard.c
+++ b/source4/smbd/process_standard.c
@@ -29,6 +29,13 @@
 #include "param/param.h"
 #include "ldb_wrap.h"
 
+struct standard_child_state {
+	pid_t pid;
+	int to_parent_fd;
+	int from_child_fd;
+	struct tevent_fd *from_child_fde;
+};
+
 NTSTATUS process_model_standard_init(void);
 
 /* we hold a pipe open in the parent, and the any child
@@ -42,11 +49,10 @@ static int child_pipe[2];
 static void standard_model_init(void)
 {
 	pipe(child_pipe);
-	signal(SIGCHLD, SIG_IGN);
 }
 
 /*
-  handle EOF on the child pipe
+  handle EOF on the parent-to-all-children pipe in the child
 */
 static void standard_pipe_handler(struct tevent_context *event_ctx, struct tevent_fd *fde, 
 				  uint16_t flags, void *private_data)
@@ -56,6 +62,121 @@ static void standard_pipe_handler(struct tevent_context *event_ctx, struct teven
 }
 
 /*
+  handle EOF on the child pipe in the parent, so we know when a
+  process terminates without using SIGCHLD or waiting on all possible pids.
+
+  We need to ensure we do not ignore SIGCHLD because we need it to
+  work to get a valid error code from samba_runcmd_*().
+ */
+static void standard_child_pipe_handler(struct tevent_context *ev,
+					struct tevent_fd *fde,
+					uint16_t flags,
+					void *private_data)
+{
+	struct standard_child_state *state
+		= talloc_get_type_abort(private_data, struct standard_child_state);
+	int status = 0;
+	pid_t pid;
+
+	/* the child has closed the pipe, assume its dead */
+	errno = 0;
+	pid = waitpid(state->pid, &status, 0);
+
+	if (pid != state->pid) {
+		if (errno == ECHILD) {
+			/*
+			 * this happens when the
+			 * parent has set SIGCHLD to
+			 * SIG_IGN. In that case we
+			 * can only get error
+			 * information for the child
+			 * via its logging. We should
+			 * stop using SIG_IGN on
+			 * SIGCHLD in the standard
+			 * process model.
+			 */
+			DEBUG(0, ("Error in waitpid() unexpectedly got ECHILD "
+				  "for child %d - %s, someone has set SIGCHLD "
+				  "to SIG_IGN!\n",
+				  state->pid, strerror(errno)));
+			TALLOC_FREE(state);
+			return;
+		}
+		DEBUG(0, ("Error in waitpid() for child %d - %s \n",
+			  state->pid, strerror(errno)));
+		if (errno == 0) {
+			errno = ECHILD;
+		}
+		TALLOC_FREE(state);
+		return;
+	}
+	if (WIFEXITED(status)) {
+		status = WEXITSTATUS(status);
+		DEBUG(2, ("Child %d exited with status %d - %s\n",
+			  state->pid, status, strerror(status)));
+	} else if (WIFSIGNALED(status)) {
+		status = WTERMSIG(status);
+		DEBUG(0, ("Child %d terminated with signal %d\n",
+			  state->pid, status));
+	}
+	TALLOC_FREE(state);
+	return;
+}
+
+static struct standard_child_state *setup_standard_child_pipe(struct tevent_context *ev)
+{
+	struct standard_child_state *state;
+	int parent_child_pipe[2];
+	int ret;
+
+	/*
+	 * Prepare a pipe to allow us to know when the child exits,
+	 * because it will trigger a read event on this private
+	 * pipe.
+	 *
+	 * We do all this before the accept and fork(), so we can
+	 * clean up if it fails.
+	 */
+	state = talloc_zero(ev, struct standard_child_state);
+	if (state == NULL) {
+		return NULL;
+	}
+
+	ret = pipe(parent_child_pipe);
+	if (ret == -1) {
+		DEBUG(0, ("Failed to create parent-child pipe to handle "
+			  "SIGCHLD to track new process for socket\n"));
+		TALLOC_FREE(state);
+		return NULL;
+	}
+
+	smb_set_close_on_exec(parent_child_pipe[0]);
+	smb_set_close_on_exec(parent_child_pipe[1]);
+
+	state->from_child_fd = parent_child_pipe[0];
+	state->to_parent_fd = parent_child_pipe[1];
+
+	/*
+	 * The basic purpose of calling this handler is to ensure we
+	 * call waitpid() and so avoid zombies (now that we no longer
+	 * user SIGIGN on for SIGCHLD), but it also allows us to clean
+	 * up other resources in the future.
+	 */
+	state->from_child_fde = tevent_add_fd(ev, state,
+					      state->from_child_fd,
+					      TEVENT_FD_READ,
+					      standard_child_pipe_handler,
+					      state);
+	if (state->from_child_fde == NULL) {
+		TALLOC_FREE(state);
+		return NULL;
+	}
+	tevent_fd_set_auto_close(state->from_child_fde);
+
+	return state;
+}
+
+/*
   called when a listening socket becomes readable. 
 */
 static void standard_accept_connection(struct tevent_context *ev, 
@@ -70,6 +191,12 @@ static void standard_accept_connection(struct tevent_context *ev,
 	struct socket_context *sock2;
 	pid_t pid;
 	struct socket_address *c, *s;
+	struct standard_child_state *state;
+
+	state = setup_standard_child_pipe(ev);
+	if (state == NULL) {
+		return;
+	}
 
 	/* accept an incoming connection. */
 	status = socket_accept(sock, &sock2);
@@ -79,18 +206,32 @@ static void standard_accept_connection(struct tevent_context *ev,
 		/* this looks strange, but is correct. We need to throttle things until
 		   the system clears enough resources to handle this new socket */
 		sleep(1);
+		close(state->parent_fd);
+		TALLOC_FREE(state);
 		return;
 	}
 
 	pid = fork();
 
 	if (pid != 0) {
+		close(state->to_parent_fd);
+		state->to_parent_fd = -1;
+
+		if (pid > 0) {
+			state->pid = pid;
+		} else {
+			TALLOC_FREE(state);
+		}
+
 		/* parent or error code ... */
 		talloc_free(sock2);
 		/* go back to the event loop */
 		return;
 	}
 
+	/* this leaves state->to_parent_fd open */
+	TALLOC_FREE(state);
+
 	pid = getpid();
 
 	/* This is now the child code. We need a completely new event_context to work with */
@@ -149,14 +290,32 @@ static void standard_new_task(struct tevent_context *ev,
 			      void *private_data)
 {
 	pid_t pid;
+	struct standard_child_state *state;
+
+	state = setup_standard_child_pipe(ev);
+	if (state == NULL) {
+		return;
+	}
 
 	pid = fork();
 
 	if (pid != 0) {
+		close(state->to_parent_fd);
+		state->to_parent_fd = -1;
+
+		if (pid > 0) {
+			state->pid = pid;
+		} else {
+			TALLOC_FREE(state);
+		}
+
 		/* parent or error code ... go back to the event loop */
 		return;
 	}
 
+	/* this leaves state->to_parent_fd open */
+	TALLOC_FREE(state);
+
 	pid = getpid();
 
 	/* this will free all the listening sockets and all state that
-- 
1.9.1
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 181 bytes
Desc: OpenPGP digital signature
URL: <http://lists.samba.org/pipermail/samba-technical/attachments/20150316/942afa49/attachment.pgp>


More information about the samba-technical mailing list