From 00b0bcb071b2cab12efcee022fa38661260b7607 Mon Sep 17 00:00:00 2001 From: Uri Simchoni Date: Tue, 2 Jun 2015 21:49:25 +0300 Subject: [PATCH 1/6] winbindd: count completed requests on on active client connection Count the number of completed requests made on a client connection while it is active. Signed-off-by: Uri Simchoni Reviewed-by: Jeremy Allison --- source3/winbindd/winbindd.c | 1 + source3/winbindd/winbindd.h | 1 + 2 files changed, 2 insertions(+) diff --git a/source3/winbindd/winbindd.c b/source3/winbindd/winbindd.c index 4ffe79c..14d0b1a 100644 --- a/source3/winbindd/winbindd.c +++ b/source3/winbindd/winbindd.c @@ -854,6 +854,7 @@ static void winbind_client_response_written(struct tevent_req *req) state->response = NULL; state->cmd_name = "no request"; state->recv_fn = NULL; + ++state->completed_requests; req = wb_req_read_send(state, winbind_event_context(), state->sock, WINBINDD_MAX_EXTRA_DATA); diff --git a/source3/winbindd/winbindd.h b/source3/winbindd/winbindd.h index b2105e3..e829d44 100644 --- a/source3/winbindd/winbindd.h +++ b/source3/winbindd/winbindd.h @@ -58,6 +58,7 @@ struct winbindd_cli_state { int sock; /* Open socket from client */ pid_t pid; /* pid of client */ time_t last_access; /* Time of last access (read or write) */ + unsigned completed_requests; /* Number of requests completed on this connection */ bool privileged; /* Is the client 'privileged' */ TALLOC_CTX *mem_ctx; /* memory per request */ -- 2.2.0.rc0.207.ga3a616c From 049bf674c1dc8896a1b6bf24e4167083b21b88bf Mon Sep 17 00:00:00 2001 From: Uri Simchoni Date: Tue, 2 Jun 2015 21:53:10 +0300 Subject: [PATCH 2/6] winbindd: forcefully remove clients only if they did useful work Sometimes winbindd shuts down client connections in order to make room for newer connections. In the name of fairness, a connection can be shut down even if there's reason to believe the client needs this connection (i.e. only connection state is taken into account, not the last use time). This change restricts this fairness, and only allows shutting down a connection if the client has been served. Signed-off-by: Uri Simchoni Reviewed-by: Jeremy Allison --- source3/winbindd/winbindd.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/source3/winbindd/winbindd.c b/source3/winbindd/winbindd.c index 14d0b1a..a7baf67 100644 --- a/source3/winbindd/winbindd.c +++ b/source3/winbindd/winbindd.c @@ -1030,6 +1030,24 @@ static bool client_is_idle(struct winbindd_cli_state *state) { !state->pwent_state && !state->grent_state); } +/* Has client done any useful work + To shut down an idle client in order to make room for another, + we want to make sure it has done something useful - + otherwise we've just wasted resources on it. + Every winbindd connection starts on non-privileged socket + checking version and location of privileged socket. That's + two requests. After that, the client switches to privileged + socket if it can or tries to continue on non-privileged if + it cannot. + Therefore, to say that something useful was done, we need one + request completed on a privileged connection, or three on an + unprivileged one. +*/ +static bool client_did_work(struct winbindd_cli_state *state) { + return ((state->privileged && state->completed_requests > 0) || + (!state->privileged && state->completed_requests > 2)); +} + /* Shutdown client connection which has been idle for the longest time */ static bool remove_idle_client(void) @@ -1039,7 +1057,7 @@ static bool remove_idle_client(void) int nidle = 0; for (state = winbindd_client_list(); state; state = state->next) { - if (client_is_idle(state)) { + if (client_is_idle(state) && client_did_work(state)) { nidle++; if (!last_access || state->last_access < last_access) { last_access = state->last_access; -- 2.2.0.rc0.207.ga3a616c From 2ea07a04ae64c59e02286b0120fa0166ae15186f Mon Sep 17 00:00:00 2001 From: Uri Simchoni Date: Tue, 2 Jun 2015 22:09:44 +0300 Subject: [PATCH 3/6] winbindd: Add infrastructure to pause and resume listeners Add infrastructure, which allows pausing and resuming acceptance of new client connections Signed-off-by: Uri Simchoni Reviewed-by: Jeremy Allison --- source3/winbindd/winbindd.c | 54 +++++++++++++++++++++++++++++++++------------ 1 file changed, 40 insertions(+), 14 deletions(-) diff --git a/source3/winbindd/winbindd.c b/source3/winbindd/winbindd.c index a7baf67..0674bf5 100644 --- a/source3/winbindd/winbindd.c +++ b/source3/winbindd/winbindd.c @@ -54,6 +54,40 @@ static bool interactive = False; extern bool override_logfile; +struct winbindd_listen_state { + bool privileged; + int fd; + struct tevent_fd *fde; +}; + +static struct winbindd_listen_state *pub_state; +static struct winbindd_listen_state *priv_state; + +bool winbindd_is_accepting(void) +{ + return (tevent_fd_get_flags(pub_state->fde) & TEVENT_FD_READ) != 0; +} + +void winbindd_pause_accepting(void) +{ + tevent_fd_set_flags(pub_state->fde, + tevent_fd_get_flags(pub_state->fde) & + ~TEVENT_FD_READ); + tevent_fd_set_flags(priv_state->fde, + tevent_fd_get_flags(priv_state->fde) & + ~TEVENT_FD_READ); +} + +void winbindd_resume_accepting(void) +{ + tevent_fd_set_flags(pub_state->fde, + tevent_fd_get_flags(pub_state->fde) | + TEVENT_FD_READ); + tevent_fd_set_flags(priv_state->fde, + tevent_fd_get_flags(priv_state->fde) | + TEVENT_FD_READ); +} + struct tevent_context *winbind_event_context(void) { static struct tevent_context *ev = NULL; @@ -1111,11 +1145,6 @@ static void remove_timed_out_clients(void) } } -struct winbindd_listen_state { - bool privileged; - int fd; -}; - static void winbindd_listen_fde_handler(struct tevent_context *ev, struct tevent_fd *fde, uint16_t flags, @@ -1151,9 +1180,6 @@ char *get_winbind_priv_pipe_dir(void) static bool winbindd_setup_listeners(void) { - struct winbindd_listen_state *pub_state = NULL; - struct winbindd_listen_state *priv_state = NULL; - struct tevent_fd *fde; int rc; char *socket_path; @@ -1174,14 +1200,14 @@ static bool winbindd_setup_listeners(void) goto failed; } - fde = tevent_add_fd(winbind_event_context(), pub_state, pub_state->fd, + pub_state->fde = tevent_add_fd(winbind_event_context(), pub_state,pub_state->fd, TEVENT_FD_READ, winbindd_listen_fde_handler, pub_state); - if (fde == NULL) { + if (pub_state->fde == NULL) { close(pub_state->fd); goto failed; } - tevent_fd_set_auto_close(fde); + tevent_fd_set_auto_close(pub_state->fde); priv_state = talloc(winbind_event_context(), struct winbindd_listen_state); @@ -1206,14 +1232,14 @@ static bool winbindd_setup_listeners(void) goto failed; } - fde = tevent_add_fd(winbind_event_context(), priv_state, + priv_state->fde = tevent_add_fd(winbind_event_context(), priv_state, priv_state->fd, TEVENT_FD_READ, winbindd_listen_fde_handler, priv_state); - if (fde == NULL) { + if (priv_state->fde == NULL) { close(priv_state->fd); goto failed; } - tevent_fd_set_auto_close(fde); + tevent_fd_set_auto_close(priv_state->fde); return true; failed: -- 2.2.0.rc0.207.ga3a616c From f6f59d0a19d6479f23230a8e57f0ea67ea12e6a2 Mon Sep 17 00:00:00 2001 From: Uri Simchoni Date: Tue, 2 Jun 2015 22:11:39 +0300 Subject: [PATCH 4/6] winbindd: stop accepting new client connections if limit is reached Make winbindd client connection limit a hard limit. This keeps winbindd resource usage under control and prevents it from reaching OS file descriptor limit. Signed-off-by: Uri Simchoni Reviewed-by: Jeremy Allison --- source3/winbindd/winbindd.c | 36 +++++++++++++++++++++++++++++++++--- 1 file changed, 33 insertions(+), 3 deletions(-) diff --git a/source3/winbindd/winbindd.c b/source3/winbindd/winbindd.c index 0674bf5..21aac66 100644 --- a/source3/winbindd/winbindd.c +++ b/source3/winbindd/winbindd.c @@ -48,6 +48,8 @@ static bool client_is_idle(struct winbindd_cli_state *state); static void remove_client(struct winbindd_cli_state *state); +static bool remove_idle_client(void); +static void resume_accepting_if_needed(void); static bool opt_nocache = False; static bool interactive = False; @@ -898,6 +900,10 @@ static void winbind_client_response_written(struct tevent_req *req) } tevent_req_set_callback(req, winbind_client_request_read, state); state->io_req = req; + + if (!winbindd_is_accepting() && remove_idle_client()) { + resume_accepting_if_needed(); + } } void request_error(struct winbindd_cli_state *state) @@ -1002,6 +1008,17 @@ static void winbind_client_request_read(struct tevent_req *req) process_request(state); } +static void resume_accepting_if_needed(void) +{ + if (winbindd_num_clients() <= lp_winbind_max_clients() * 9 / 10) { + DEBUG(5,("winbindd: Resuming accept of connections " + "at %d client connections\n", + winbindd_num_clients())); + + winbindd_resume_accepting(); + } +} + /* Remove a client connection from client connection list */ static void remove_client(struct winbindd_cli_state *state) @@ -1054,6 +1071,10 @@ static void remove_client(struct winbindd_cli_state *state) winbindd_remove_client(state); TALLOC_FREE(state); + + if (!winbindd_is_accepting()) { + resume_accepting_if_needed(); + } } /* Is a client idle? */ @@ -1153,7 +1174,17 @@ static void winbindd_listen_fde_handler(struct tevent_context *ev, struct winbindd_listen_state *s = talloc_get_type_abort(private_data, struct winbindd_listen_state); - while (winbindd_num_clients() > lp_winbind_max_clients() - 1) { + remove_timed_out_clients(); + new_connection(s->fd, s->privileged); + + while (winbindd_num_clients() >= lp_winbind_max_clients()) { + /* Having to remove client connections may be a result + of mis-configuration. For example, there could be more + smbd connections than lp_winbind_max_clients(). + OTOH, the tevent backend and system/process limits on + open file descriptors may pose limits on the value + that can be set. + */ DEBUG(5,("winbindd: Exceeding %d client " "connections, removing idle " "connection.\n", lp_winbind_max_clients())); @@ -1162,11 +1193,10 @@ static void winbindd_listen_fde_handler(struct tevent_context *ev, "client connections, no idle " "connection found\n", lp_winbind_max_clients())); + winbindd_pause_accepting(); break; } } - remove_timed_out_clients(); - new_connection(s->fd, s->privileged); } /* -- 2.2.0.rc0.207.ga3a616c From 1697df93211e5d69a73b5cb25d60486042cde83e Mon Sep 17 00:00:00 2001 From: Uri Simchoni Date: Wed, 3 Jun 2015 00:36:27 +0300 Subject: [PATCH 5/6] winbind client: avoid vicious cycle created by client retry This patch cancels the retry policy of the winbind client. When winbindd fails to respond to a request within 30 seconds, the winbind client closes the connection and retries up to 10 times. In some cases, delayed response is a result of multiple requests from multiple clients piling up on the winbind domain child process. Retrying just piles more and more requests, creating a vicious cycle. Even in the case of a single request taking long to complete, there's no point in retrying because the retry request would just wait for the current request to complete. Better to wait patiently. There's one possible benefit in the retry, namely that winbindd typically caches the results, and therefore a retry might take a cached result, so the net effect of the retry may be to increase the timeout to 300 seconds. But a more straightforward way to have a 300 second timeout is to modify the timeout. Therefore the timeout is modified from 30 seconds to 300 seconds (IMHO 300 seconds is too much, but we have "winbind rquest timeout" with a default of 60 to make sure the request completes or fails within 60 seconds) Signed-off-by: Uri Simchoni Reviewed-by: Jeremy Allison --- nsswitch/wb_common.c | 26 +++++++++----------------- 1 file changed, 9 insertions(+), 17 deletions(-) diff --git a/nsswitch/wb_common.c b/nsswitch/wb_common.c index 036557a..8fbf279 100644 --- a/nsswitch/wb_common.c +++ b/nsswitch/wb_common.c @@ -537,7 +537,7 @@ static int winbind_read_sock(struct winbindd_context *ctx, if (ret == 0) { /* Not ready for read yet... */ - if (total_time >= 30) { + if (total_time >= 300) { /* Timeout */ winbind_close_sock(ctx); return -1; @@ -721,20 +721,16 @@ NSS_STATUS winbindd_request_response(struct winbindd_context *ctx, struct winbindd_response *response) { NSS_STATUS status = NSS_STATUS_UNAVAIL; - int count = 0; struct winbindd_context *wb_ctx = ctx; if (ctx == NULL) { wb_ctx = &wb_global_ctx; } - while ((status == NSS_STATUS_UNAVAIL) && (count < 10)) { - status = winbindd_send_request(wb_ctx, req_type, 0, request); - if (status != NSS_STATUS_SUCCESS) - return(status); - status = winbindd_get_response(wb_ctx, response); - count += 1; - } + status = winbindd_send_request(wb_ctx, req_type, 0, request); + if (status != NSS_STATUS_SUCCESS) + return(status); + status = winbindd_get_response(wb_ctx, response); return status; } @@ -745,20 +741,16 @@ NSS_STATUS winbindd_priv_request_response(struct winbindd_context *ctx, struct winbindd_response *response) { NSS_STATUS status = NSS_STATUS_UNAVAIL; - int count = 0; struct winbindd_context *wb_ctx = ctx; if (ctx == NULL) { wb_ctx = &wb_global_ctx; } - while ((status == NSS_STATUS_UNAVAIL) && (count < 10)) { - status = winbindd_send_request(wb_ctx, req_type, 1, request); - if (status != NSS_STATUS_SUCCESS) - return(status); - status = winbindd_get_response(wb_ctx, response); - count += 1; - } + status = winbindd_send_request(wb_ctx, req_type, 1, request); + if (status != NSS_STATUS_SUCCESS) + return(status); + status = winbindd_get_response(wb_ctx, response); return status; } -- 2.2.0.rc0.207.ga3a616c From 0b26a0921ad52f1db6d108d5981a725af5b2860f Mon Sep 17 00:00:00 2001 From: Uri Simchoni Date: Thu, 4 Jun 2015 09:04:15 +0300 Subject: [PATCH 6/6] winbindd: periodically remove timed out clients Periodically scan winbind client list and close connections in which either the client is idle, or the request is taking too long to complete. Signed-off-by: Uri Simchoni Reviewed-by: Jeremy Allison --- source3/winbindd/winbindd.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/source3/winbindd/winbindd.c b/source3/winbindd/winbindd.c index 21aac66..774bd0c 100644 --- a/source3/winbindd/winbindd.c +++ b/source3/winbindd/winbindd.c @@ -46,6 +46,8 @@ #undef DBGC_CLASS #define DBGC_CLASS DBGC_WINBIND +#define SCRUB_CLIENTS_INTERVAL 5 + static bool client_is_idle(struct winbindd_cli_state *state); static void remove_client(struct winbindd_cli_state *state); static bool remove_idle_client(void); @@ -1166,6 +1168,21 @@ static void remove_timed_out_clients(void) } } +static void winbindd_scrub_clients_handler(struct tevent_context *ev, + struct tevent_timer *te, + struct timeval current_time, + void *private_data) +{ + remove_timed_out_clients(); + if (tevent_add_timer(ev, + ev, + timeval_current_ofs(SCRUB_CLIENTS_INTERVAL,0), + winbindd_scrub_clients_handler, + NULL) == NULL) { + DEBUG(0, ("winbindd: failed to reschedule client scrubber\n")); + exit (1); + } +} static void winbindd_listen_fde_handler(struct tevent_context *ev, struct tevent_fd *fde, uint16_t flags, @@ -1271,6 +1288,10 @@ static bool winbindd_setup_listeners(void) } tevent_fd_set_auto_close(priv_state->fde); + winbindd_scrub_clients_handler(winbind_event_context(), + NULL, + timeval_current(), + NULL); return true; failed: TALLOC_FREE(pub_state); -- 2.2.0.rc0.207.ga3a616c