Make winbind use epoll where available

Volker Lendecke Volker.Lendecke at SerNet.DE
Thu Jan 17 08:57:26 MST 2013


Hi!

Attached find a patchset to make winbind use the standard
tevent_context_init function which uses epoll where
available. Before this winbind would always do poll. Ran a
successful private autobuild.

One customer test brought winbind from 100% CPU over minutes
down to barely being seen in "top" for obvious reasons.

Comments?

Thanks,

Volker

-- 
SerNet GmbH, Bahnhofsallee 1b, 37081 Göttingen
phone: +49-551-370000-0, fax: +49-551-370000-9
AG Göttingen, HRB 2816, GF: Dr. Johannes Loxen
http://www.sernet.de, mailto:kontakt at sernet.de
-------------- next part --------------
From 605c93d4893e6975f213062638ad91dda0f53edc Mon Sep 17 00:00:00 2001
From: Volker Lendecke <vl at samba.org>
Date: Wed, 16 Jan 2013 12:00:00 +0100
Subject: [PATCH 1/4] winbind: Use standard tevent_context_init

This makes winbind use epoll instead of poll
---
 source3/winbindd/winbindd.c       |   38 ++++++++++++++++++++++++++++++++-----
 source3/winbindd/winbindd.h       |    2 --
 source3/winbindd/winbindd_proto.h |    1 +
 3 files changed, 34 insertions(+), 7 deletions(-)

diff --git a/source3/winbindd/winbindd.c b/source3/winbindd/winbindd.c
index 698c96c..ebaa080 100644
--- a/source3/winbindd/winbindd.c
+++ b/source3/winbindd/winbindd.c
@@ -49,14 +49,42 @@ static bool interactive = False;
 
 extern bool override_logfile;
 
+struct tevent_context *winbind_event_context(void)
+{
+	static struct tevent_context *ev = NULL;
+
+	if (ev != NULL) {
+		return ev;
+	}
+
+	/*
+	 * Note we MUST use the NULL context here, not the autofree context,
+	 * to avoid side effects in forked children exiting.
+	 */
+	ev = tevent_context_init(NULL);
+	if (ev == NULL) {
+		smb_panic("Could not init winbindd's messaging context.\n");
+	}
+	return ev;
+}
+
 struct messaging_context *winbind_messaging_context(void)
 {
-	struct messaging_context *msg_ctx = server_messaging_context();
-	if (likely(msg_ctx != NULL)) {
-		return msg_ctx;
+	static struct messaging_context *msg = NULL;
+
+	if (msg != NULL) {
+		return msg;
+	}
+
+	/*
+	 * Note we MUST use the NULL context here, not the autofree context,
+	 * to avoid side effects in forked children exiting.
+	 */
+	msg = messaging_init(NULL, winbind_event_context());
+	if (msg == NULL) {
+		smb_panic("Could not init winbindd's messaging context.\n");
 	}
-	smb_panic("Could not init winbindd's messaging context.\n");
-	return NULL;
+	return msg;
 }
 
 /* Reload configuration */
diff --git a/source3/winbindd/winbindd.h b/source3/winbindd/winbindd.h
index 33c7bbe..c01cac7 100644
--- a/source3/winbindd/winbindd.h
+++ b/source3/winbindd/winbindd.h
@@ -396,6 +396,4 @@ struct WINBINDD_CCACHE_ENTRY {
 #define WINBINDD_PAM_AUTH_KRB5_RENEW_TIME 2592000 /* one month */
 #define DOM_SEQUENCE_NONE ((uint32)-1)
 
-#define winbind_event_context server_event_context
-
 #endif /* _WINBINDD_H */
diff --git a/source3/winbindd/winbindd_proto.h b/source3/winbindd/winbindd_proto.h
index 44693d7..33b0f01 100644
--- a/source3/winbindd/winbindd_proto.h
+++ b/source3/winbindd/winbindd_proto.h
@@ -34,6 +34,7 @@ bool winbindd_use_idmap_cache(void);
 bool winbindd_use_cache(void);
 const char *get_winbind_pipe_dir(void);
 char *get_winbind_priv_pipe_dir(void);
+struct tevent_context *winbind_event_context(void);
 int main(int argc, char **argv, char **envp);
 
 /* The following definitions come from winbindd/winbindd_ads.c  */
-- 
1.7.9.5


From 6da5ddb72a4d9e68c4aa9852332c491f0db86a96 Mon Sep 17 00:00:00 2001
From: Volker Lendecke <vl at samba.org>
Date: Thu, 17 Jan 2013 13:49:08 +0100
Subject: [PATCH 2/4] winbind: Introduce "struct child_handler_state"

This will make the next patch simpler. child_handler_state contains the
information that the handler for the parent fde needs to pass to
process_child_request
---
 source3/winbindd/winbindd_dual.c |   52 ++++++++++++++++++++++----------------
 1 file changed, 30 insertions(+), 22 deletions(-)

diff --git a/source3/winbindd/winbindd_dual.c b/source3/winbindd/winbindd_dual.c
index 60b15e3..47e3a2d 100644
--- a/source3/winbindd/winbindd_dual.c
+++ b/source3/winbindd/winbindd_dual.c
@@ -1289,10 +1289,15 @@ struct winbindd_domain *wb_child_domain(void)
 	return child_domain;
 }
 
+struct child_handler_state {
+	struct winbindd_child *child;
+	struct winbindd_cli_state cli;
+};
+
 static bool fork_domain_child(struct winbindd_child *child)
 {
 	int fdpair[2];
-	struct winbindd_cli_state state;
+	struct child_handler_state state;
 	struct winbindd_request request;
 	struct winbindd_response response;
 	struct winbindd_domain *primary_domain = NULL;
@@ -1313,9 +1318,10 @@ static bool fork_domain_child(struct winbindd_child *child)
 	}
 
 	ZERO_STRUCT(state);
-	state.pid = getpid();
-	state.request = &request;
-	state.response = &response;
+	state.child = child;
+	state.cli.pid = getpid();
+	state.cli.request = &request;
+	state.cli.response = &response;
 
 	child->pid = fork();
 
@@ -1356,12 +1362,12 @@ static bool fork_domain_child(struct winbindd_child *child)
 
 	DEBUG(10, ("Child process %d\n", (int)getpid()));
 
-	state.sock = fdpair[0];
+	state.cli.sock = fdpair[0];
 	close(fdpair[1]);
 
 	status = winbindd_reinit_after_fork(child, child->logfilename);
 
-	nwritten = sys_write(state.sock, &status, sizeof(status));
+	nwritten = sys_write(state.cli.sock, &status, sizeof(status));
 	if (nwritten != sizeof(status)) {
 		DEBUG(1, ("fork_domain_child: Could not write status: "
 			  "nwritten=%d, error=%s\n", (int)nwritten,
@@ -1490,7 +1496,7 @@ static bool fork_domain_child(struct winbindd_child *child)
 			_exit(1);
 		}
 
-		pfds->fd = state.sock;
+		pfds->fd = state.cli.sock;
 		pfds->events = POLLIN|POLLHUP;
 		num_pfds = 1;
 
@@ -1539,41 +1545,43 @@ static bool fork_domain_child(struct winbindd_child *child)
 		}
 
 		/* fetch a request from the main daemon */
-		status = child_read_request(&state);
+		status = child_read_request(&state.cli);
 
 		if (!NT_STATUS_IS_OK(status)) {
 			/* we lost contact with our parent */
 			_exit(0);
 		}
 
-		DEBUG(4,("child daemon request %d\n", (int)state.request->cmd));
+		DEBUG(4,("child daemon request %d\n",
+			 (int)state.cli.request->cmd));
 
-		ZERO_STRUCTP(state.response);
-		state.request->null_term = '\0';
-		state.mem_ctx = frame;
-		child_process_request(child, &state);
+		ZERO_STRUCTP(state.cli.response);
+		state.cli.request->null_term = '\0';
+		state.cli.mem_ctx = frame;
+		child_process_request(child, &state.cli);
 
 		DEBUG(4, ("Finished processing child request %d\n",
-			  (int)state.request->cmd));
+			  (int)state.cli.request->cmd));
 
-		SAFE_FREE(state.request->extra_data.data);
+		SAFE_FREE(state.cli.request->extra_data.data);
 
-		iov[0].iov_base = (void *)state.response;
+		iov[0].iov_base = (void *)state.cli.response;
 		iov[0].iov_len = sizeof(struct winbindd_response);
 		iov_count = 1;
 
-		if (state.response->length > sizeof(struct winbindd_response)) {
+		if (state.cli.response->length >
+		    sizeof(struct winbindd_response)) {
 			iov[1].iov_base =
-				(void *)state.response->extra_data.data;
-			iov[1].iov_len = state.response->length-iov[0].iov_len;
+				(void *)state.cli.response->extra_data.data;
+			iov[1].iov_len = state.cli.response->length-iov[0].iov_len;
 			iov_count = 2;
 		}
 
 		DEBUG(10, ("Writing %d bytes to parent\n",
-			   (int)state.response->length));
+			   (int)state.cli.response->length));
 
-		if (write_data_iov(state.sock, iov, iov_count) !=
-		    state.response->length) {
+		if (write_data_iov(state.cli.sock, iov, iov_count) !=
+		    state.cli.response->length) {
 			DEBUG(0, ("Could not write result\n"));
 			exit(1);
 		}
-- 
1.7.9.5


From adcd7d07fa587e7ad49598572b7f9113853bc301 Mon Sep 17 00:00:00 2001
From: Volker Lendecke <vl at samba.org>
Date: Thu, 17 Jan 2013 14:34:35 +0100
Subject: [PATCH 3/4] winbind: Handle child requests in a tevent_fd

This enables the use of standard tevent_loop_once in the child, which
now also uses epoll where available.
---
 source3/winbindd/winbindd_dual.c |  170 +++++++++++++++-----------------------
 1 file changed, 65 insertions(+), 105 deletions(-)

diff --git a/source3/winbindd/winbindd_dual.c b/source3/winbindd/winbindd_dual.c
index 47e3a2d..14e60f5 100644
--- a/source3/winbindd/winbindd_dual.c
+++ b/source3/winbindd/winbindd_dual.c
@@ -1294,6 +1294,58 @@ struct child_handler_state {
 	struct winbindd_cli_state cli;
 };
 
+static void child_handler(struct tevent_context *ev, struct tevent_fd *fde,
+			  uint16_t flags, void *private_data)
+{
+	struct child_handler_state *state =
+		(struct child_handler_state *)private_data;
+	NTSTATUS status;
+	struct iovec iov[2];
+	int iov_count;
+
+	/* fetch a request from the main daemon */
+	status = child_read_request(&state->cli);
+
+	if (!NT_STATUS_IS_OK(status)) {
+		/* we lost contact with our parent */
+		_exit(0);
+	}
+
+	DEBUG(4,("child daemon request %d\n",
+		 (int)state->cli.request->cmd));
+
+	ZERO_STRUCTP(state->cli.response);
+	state->cli.request->null_term = '\0';
+	state->cli.mem_ctx = talloc_tos();
+	child_process_request(state->child, &state->cli);
+
+	DEBUG(4, ("Finished processing child request %d\n",
+		  (int)state->cli.request->cmd));
+
+	SAFE_FREE(state->cli.request->extra_data.data);
+
+	iov[0].iov_base = (void *)state->cli.response;
+	iov[0].iov_len = sizeof(struct winbindd_response);
+	iov_count = 1;
+
+	if (state->cli.response->length >
+	    sizeof(struct winbindd_response)) {
+		iov[1].iov_base =
+			(void *)state->cli.response->extra_data.data;
+		iov[1].iov_len = state->cli.response->length-iov[0].iov_len;
+		iov_count = 2;
+	}
+
+	DEBUG(10, ("Writing %d bytes to parent\n",
+		   (int)state->cli.response->length));
+
+	if (write_data_iov(state->cli.sock, iov, iov_count) !=
+	    state->cli.response->length) {
+		DEBUG(0, ("Could not write result\n"));
+		exit(1);
+	}
+}
+
 static bool fork_domain_child(struct winbindd_child *child)
 {
 	int fdpair[2];
@@ -1303,6 +1355,7 @@ static bool fork_domain_child(struct winbindd_child *child)
 	struct winbindd_domain *primary_domain = NULL;
 	NTSTATUS status;
 	ssize_t nwritten;
+	struct tevent_fd *fde;
 
 	if (child->domain) {
 		DEBUG(10, ("fork_domain_child called for domain '%s'\n",
@@ -1465,21 +1518,23 @@ static bool fork_domain_child(struct winbindd_child *child)
 		}
 	}
 
+	fde = tevent_add_fd(winbind_event_context(), NULL, state.cli.sock,
+			    TEVENT_FD_READ, child_handler, &state);
+	if (fde == NULL) {
+		DEBUG(1, ("tevent_add_fd failed\n"));
+		_exit(1);
+	}
+
 	while (1) {
 
 		int ret;
-		struct pollfd *pfds;
-		int num_pfds;
-		int timeout;
-		struct timeval t;
-		struct timeval *tp;
 		TALLOC_CTX *frame = talloc_stackframe();
-		struct iovec iov[2];
-		int iov_count;
 
-		if (run_events_poll(winbind_event_context(), 0, NULL, 0)) {
-			TALLOC_FREE(frame);
-			continue;
+		ret = tevent_loop_once(winbind_event_context());
+		if (ret != 0) {
+			DEBUG(1, ("tevent_loop_once failed: %s\n",
+				  strerror(errno)));
+			_exit(1);
 		}
 
 		if (child->domain && child->domain->startup &&
@@ -1490,101 +1545,6 @@ static bool fork_domain_child(struct winbindd_child *child)
 			child->domain->startup = False;
 		}
 
-		pfds = talloc_zero(talloc_tos(), struct pollfd);
-		if (pfds == NULL) {
-			DEBUG(1, ("talloc failed\n"));
-			_exit(1);
-		}
-
-		pfds->fd = state.cli.sock;
-		pfds->events = POLLIN|POLLHUP;
-		num_pfds = 1;
-
-		timeout = INT_MAX;
-
-		if (!event_add_to_poll_args(
-			    winbind_event_context(), talloc_tos(),
-			    &pfds, &num_pfds, &timeout)) {
-			DEBUG(1, ("event_add_to_poll_args failed\n"));
-			_exit(1);
-		}
-		tp = get_timed_events_timeout(winbind_event_context(), &t);
-		if (tp) {
-			DEBUG(11,("select will use timeout of %u.%u seconds\n",
-				(unsigned int)tp->tv_sec, (unsigned int)tp->tv_usec ));
-		}
-
-		ret = poll(pfds, num_pfds, timeout);
-
-		if (run_events_poll(winbind_event_context(), ret,
-				    pfds, num_pfds)) {
-			/* We got a signal - continue. */
-			TALLOC_FREE(frame);
-			continue;
-		}
-
-		TALLOC_FREE(pfds);
-
-		if (ret == 0) {
-			DEBUG(11,("nothing is ready yet, continue\n"));
-			TALLOC_FREE(frame);
-			continue;
-		}
-
-		if (ret == -1 && errno == EINTR) {
-			/* We got a signal - continue. */
-			TALLOC_FREE(frame);
-			continue;
-		}
-
-		if (ret == -1 && errno != EINTR) {
-			DEBUG(0,("poll error occured\n"));
-			TALLOC_FREE(frame);
-			perror("poll");
-			_exit(1);
-		}
-
-		/* fetch a request from the main daemon */
-		status = child_read_request(&state.cli);
-
-		if (!NT_STATUS_IS_OK(status)) {
-			/* we lost contact with our parent */
-			_exit(0);
-		}
-
-		DEBUG(4,("child daemon request %d\n",
-			 (int)state.cli.request->cmd));
-
-		ZERO_STRUCTP(state.cli.response);
-		state.cli.request->null_term = '\0';
-		state.cli.mem_ctx = frame;
-		child_process_request(child, &state.cli);
-
-		DEBUG(4, ("Finished processing child request %d\n",
-			  (int)state.cli.request->cmd));
-
-		SAFE_FREE(state.cli.request->extra_data.data);
-
-		iov[0].iov_base = (void *)state.cli.response;
-		iov[0].iov_len = sizeof(struct winbindd_response);
-		iov_count = 1;
-
-		if (state.cli.response->length >
-		    sizeof(struct winbindd_response)) {
-			iov[1].iov_base =
-				(void *)state.cli.response->extra_data.data;
-			iov[1].iov_len = state.cli.response->length-iov[0].iov_len;
-			iov_count = 2;
-		}
-
-		DEBUG(10, ("Writing %d bytes to parent\n",
-			   (int)state.cli.response->length));
-
-		if (write_data_iov(state.cli.sock, iov, iov_count) !=
-		    state.cli.response->length) {
-			DEBUG(0, ("Could not write result\n"));
-			exit(1);
-		}
 		TALLOC_FREE(frame);
 	}
 }
-- 
1.7.9.5


From 7e167e4913091941de0b1fd4e5c1ae45c15e27be Mon Sep 17 00:00:00 2001
From: Volker Lendecke <vl at samba.org>
Date: Thu, 17 Jan 2013 15:22:32 +0100
Subject: [PATCH 4/4] smbtorture: Satisfy a linker dependency

---
 source3/torture/torture.c |    9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/source3/torture/torture.c b/source3/torture/torture.c
index d31c907..799c911 100644
--- a/source3/torture/torture.c
+++ b/source3/torture/torture.c
@@ -9153,7 +9153,14 @@ static struct {
 	{ "LOCAL-DBWRAP-CTDB", run_local_dbwrap_ctdb, 0 },
 	{NULL, NULL, 0}};
 
-
+/*
+ * dummy function to satisfy linker dependency
+ */
+struct tevent_context *winbind_event_context(void);
+struct tevent_context *winbind_event_context(void)
+{
+	return NULL;
+}
 
 /****************************************************************************
 run a specified test or "ALL"
-- 
1.7.9.5



More information about the samba-technical mailing list