Process hierarchy on a DC?

Ralph Böhme slow at samba.org
Sat Apr 29 14:36:49 UTC 2017


On Fri, Apr 28, 2017 at 01:11:30PM +0200, Stefan Metzmacher wrote:
> Am 28.04.2017 um 12:06 schrieb Ralph Böhme:
> > On Thu, Apr 27, 2017 at 02:39:21PM -0700, Jeremy Allison wrote:
> >> Much though I hate to revert, I think this might be a good case
> >> for it temporarily.
> > 
> > two patchsets attached:
> > 
> > 1) revert the use of tfork in samba_runcmd_send(). This restores the process
> > hierarchy and stopping samba by sending a single SIGTERM
> 
> Please add a large explaination to the commit message, basically a summary
> of this mail thread.

is this enough?

-slow
-------------- next part --------------
From 8339d6ff620af14ed3c7aa54952a4d57a290d637 Mon Sep 17 00:00:00 2001
From: Ralph Boehme <slow at samba.org>
Date: Fri, 28 Apr 2017 11:33:24 +0200
Subject: [PATCH] Revert "lib/util: make use of tfork in samba_runcmd_send()"

This reverts commit 292e46ab12d8ec172c9d3b26330d8d6028a1d5a5.

Processes run by tfork will have a parent pid of 1, they won't be childs
of the caller anymore.

When the source4 samba process uses samba_runcmd_send() to launch smbd
and winbindd the resulting process hierarchy becomes:

 PPID   PID  PGID   SID TTY      TPGID STAT   UID   TIME COMMAND
    1   516   510   510 ?           -1 S      111   0:02 avahi-daemon: running [samba-ad.local]
    1 29209 29209 29209 ?           -1 Ss       0   0:00 ./bin/samba
29209 29210 29209 29209 ?           -1 S        0   0:00  \_ ./bin/samba
29209 29211 29209 29209 ?           -1 S        0   0:00  \_ ./bin/samba
29209 29213 29209 29209 ?           -1 S        0   0:00  \_ ./bin/samba
29209 29215 29209 29209 ?           -1 S        0   0:00  \_ ./bin/samba
29209 29216 29209 29209 ?           -1 R        0   0:00  \_ ./bin/samba
29209 29217 29209 29209 ?           -1 S        0   0:00  \_ ./bin/samba
29209 29218 29209 29209 ?           -1 S        0   0:00  \_ ./bin/samba
29209 29220 29209 29209 ?           -1 S        0   0:00  \_ ./bin/samba
29209 29221 29209 29209 ?           -1 S        0   0:00  \_ ./bin/samba
29209 29222 29209 29209 ?           -1 S        0   0:00  \_ ./bin/samba
29209 29223 29209 29209 ?           -1 S        0   0:00  \_ ./bin/samba
29209 29224 29209 29209 ?           -1 S        0   0:00  \_ ./bin/samba
29209 29225 29209 29209 ?           -1 S        0   0:00  \_ ./bin/samba
    1 29214 29209 29209 ?           -1 S        0   0:00 ./bin/samba
29214 29219 29219 29219 ?           -1 Ss       0   0:00  \_ /home/slow/git/samba/scratch/bin/smbd -D --option=server role check:inhibit=yes --foreground
29219 29236 29219 29219 ?           -1 S        0   0:00      \_ /home/slow/git/samba/scratch/bin/smbd -D --option=server role check:inhibit=yes --foreground
29219 29237 29219 29219 ?           -1 S        0   0:00      \_ /home/slow/git/samba/scratch/bin/smbd -D --option=server role check:inhibit=yes --foreground
29219 29238 29219 29219 ?           -1 S        0   0:00      \_ /home/slow/git/samba/scratch/bin/smbd -D --option=server role check:inhibit=yes --foreground
    1 29228 29209 29209 ?           -1 S        0   0:00 ./bin/samba
29228 29230 29230 29230 ?           -1 Ss       0   0:00  \_ /home/slow/git/samba/scratch/bin/winbindd -D --option=server role check:inhibit=yes --foreground
29230 29239 29230 29230 ?           -1 S        0   0:00      \_ /home/slow/git/samba/scratch/bin/winbindd -D --option=server role check:inhibit=yes --foreground

They will still be in the same process group and session, but just not
be a child or subchild. For childs of the source4 samba process this
might be non desirable.

killing all processes by sending a signal to the main samba process
still works, because a pipe is used between the samba process and the
smbd and winbindd childs. Both watch for EOF on the pipe.

In the output above smbd and winbindd are in their own process group ans
session because they call become_daemon().

See also the discussion in this mailthread:
<https://lists.samba.org/archive/samba-technical/2017-April/120257.html>

Signed-off-by: Ralph Boehme <slow at samba.org>
---
 lib/util/util_runcmd.c | 103 ++++++++++++++++++++++---------------------------
 lib/util/util_runcmd.h |   3 +-
 2 files changed, 48 insertions(+), 58 deletions(-)

diff --git a/lib/util/util_runcmd.c b/lib/util/util_runcmd.c
index 7a2e1c6..9264cbb 100644
--- a/lib/util/util_runcmd.c
+++ b/lib/util/util_runcmd.c
@@ -29,8 +29,6 @@
 #include "system/filesys.h"
 #include "../lib/util/tevent_unix.h"
 #include "../lib/util/util_runcmd.h"
-#include "../lib/util/tfork.h"
-#include "../lib/util/sys_rw.h"
 
 static int samba_runcmd_state_destructor(struct samba_runcmd_state *state)
 {
@@ -108,7 +106,7 @@ struct tevent_req *samba_runcmd_send(TALLOC_CTX *mem_ctx,
 		return tevent_req_post(req, ev);
 	}
 
-	state->pid = tfork(&state->fd_status, NULL);
+	state->pid = fork();
 	if (state->pid == (pid_t)-1) {
 		close(p1[0]);
 		close(p1[1]);
@@ -132,12 +130,10 @@ struct tevent_req *samba_runcmd_send(TALLOC_CTX *mem_ctx,
 		set_blocking(state->fd_stdout, false);
 		set_blocking(state->fd_stderr, false);
 		set_blocking(state->fd_stdin,  false);
-		set_blocking(state->fd_status, false);
 
 		smb_set_close_on_exec(state->fd_stdin);
 		smb_set_close_on_exec(state->fd_stdout);
 		smb_set_close_on_exec(state->fd_stderr);
-		smb_set_close_on_exec(state->fd_status);
 
 		talloc_set_destructor(state, samba_runcmd_state_destructor);
 
@@ -149,7 +145,6 @@ struct tevent_req *samba_runcmd_send(TALLOC_CTX *mem_ctx,
 		if (tevent_req_nomem(state->fde_stdout, req)) {
 			close(state->fd_stdout);
 			close(state->fd_stderr);
-			close(state->fd_status);
 			return tevent_req_post(req, ev);
 		}
 		tevent_fd_set_auto_close(state->fde_stdout);
@@ -160,26 +155,11 @@ struct tevent_req *samba_runcmd_send(TALLOC_CTX *mem_ctx,
 						  samba_runcmd_io_handler,
 						  req);
 		if (tevent_req_nomem(state->fde_stdout, req)) {
-			close(state->fd_stdout);
 			close(state->fd_stderr);
-			close(state->fd_status);
 			return tevent_req_post(req, ev);
 		}
 		tevent_fd_set_auto_close(state->fde_stderr);
 
-		state->fde_status = tevent_add_fd(ev, state,
-						  state->fd_status,
-						  TEVENT_FD_READ,
-						  samba_runcmd_io_handler,
-						  req);
-		if (tevent_req_nomem(state->fde_stdout, req)) {
-			close(state->fd_stdout);
-			close(state->fd_stderr);
-			close(state->fd_status);
-			return tevent_req_post(req, ev);
-		}
-		tevent_fd_set_auto_close(state->fde_status);
-
 		if (!timeval_is_zero(&endtime)) {
 			tevent_req_set_endtime(req, ev, endtime);
 		}
@@ -251,10 +231,6 @@ static void samba_runcmd_io_handler(struct tevent_context *ev,
 	char *p;
 	int n, fd;
 
-	if (!(flags & TEVENT_FD_READ)) {
-		return;
-	}
-
 	if (fde == state->fde_stdout) {
 		level = state->stdout_log_level;
 		fd = state->fd_stdout;
@@ -262,39 +238,12 @@ static void samba_runcmd_io_handler(struct tevent_context *ev,
 		level = state->stderr_log_level;
 		fd = state->fd_stderr;
 	} else {
-		int status;
-		ssize_t nread;
-
-		/*
-		 * Note: reading on a non-blocking pipe is guaranteed to deliver
-		 * any pending data up to some kernel limit which is way beyond
-		 * what we're reading here.
-		 */
-		nread = sys_read(state->fd_status, &status, sizeof(status));
-		if (nread != sizeof(status)) {
-			DBG_ERR("Bad read on status pipe\n");
-			tevent_req_error(req, EIO);
-			return;
-		}
-
-		if (WIFEXITED(status)) {
-			status = WEXITSTATUS(status);
-		} else if (WIFSIGNALED(status)) {
-			status = WTERMSIG(status);
-		} else {
-			status = ECHILD;
-		}
-
-		DBG_NOTICE("Child %s exited %d\n", state->arg0, status);
-		if (status != 0) {
-			tevent_req_error(req, status);
-			return;
-		}
-
-		tevent_req_done(req);
 		return;
 	}
 
+	if (!(flags & TEVENT_FD_READ)) {
+		return;
+	}
 
 	n = read(fd, &state->buf[state->buf_used],
 		 sizeof(state->buf) - state->buf_used);
@@ -304,11 +253,53 @@ static void samba_runcmd_io_handler(struct tevent_context *ev,
 		if (fde == state->fde_stdout) {
 			talloc_free(fde);
 			state->fde_stdout = NULL;
-			return;
 		}
 		if (fde == state->fde_stderr) {
 			talloc_free(fde);
 			state->fde_stderr = NULL;
+		}
+		if (state->fde_stdout == NULL &&
+		    state->fde_stderr == NULL) {
+			int status;
+			/* the child has closed both stdout and
+			 * stderr, assume its dead */
+			pid_t pid = waitpid(state->pid, &status, 0);
+			if (pid != state->pid) {
+				if (errno == ECHILD) {
+					/* this happens when the
+					   parent has set SIGCHLD to
+					   SIG_IGN. In that case we
+					   can only get error
+					   information for the child
+					   via its logging. We should
+					   stop using SIG_IGN on
+					   SIGCHLD in the standard
+					   process model.
+					*/
+					DEBUG(0, ("Error in waitpid() unexpectedly got ECHILD "
+						  "for %s child %d - %s, "
+						  "someone has set SIGCHLD to SIG_IGN!\n",
+					state->arg0, (int)state->pid, strerror(errno)));
+					tevent_req_error(req, errno);
+					return;
+				}
+				DEBUG(0,("Error in waitpid() for child %s - %s \n",
+					 state->arg0, strerror(errno)));
+				if (errno == 0) {
+					errno = ECHILD;
+				}
+				tevent_req_error(req, errno);
+				return;
+			}
+			status = WEXITSTATUS(status);
+			DEBUG(3,("Child %s exited with status %d\n",
+				 state->arg0, status));
+			if (status != 0) {
+				tevent_req_error(req, status);
+				return;
+			}
+
+			tevent_req_done(req);
 			return;
 		}
 		return;
diff --git a/lib/util/util_runcmd.h b/lib/util/util_runcmd.h
index a887658..26fdbc6 100644
--- a/lib/util/util_runcmd.h
+++ b/lib/util/util_runcmd.h
@@ -27,8 +27,7 @@ struct samba_runcmd_state {
 	int stderr_log_level;
 	struct tevent_fd *fde_stdout;
 	struct tevent_fd *fde_stderr;
-	struct tevent_fd *fde_status;
-	int fd_stdin, fd_stdout, fd_stderr, fd_status;
+	int fd_stdin, fd_stdout, fd_stderr;
 	char *arg0;
 	pid_t pid;
 	char buf[1024];
-- 
2.9.3



More information about the samba-technical mailing list