[PATCH] (partial) tevent_req_poll() loops for ever....

Bo Yang boyang at novell.com
Mon Apr 20 16:04:55 GMT 2009


Hi, everyone:
     Taking cm_connect_lsa() for example. Assuming network connectionis good initially, but when cli_rpc_pipe_open_spnego_ntlmssp() isinvoked, remote end is closed. So, it returnsNT_STATUS_BROKEN_PIPE(mapped from EPIPE). Then we go to schannel oranonymous, which makes no difference. Because the socket is brokennow(but fd is owned by current process, winbindd), the fd(socket) won'tbe writable or readable any more. Then have a look at whattevent_req_poll() does, req is always in progress, tevent_loop_once()--> s3_event_loop_once() ---> add the fd to select's writefds set---> select times out and returns 0. again and again,tevent_req_poll() loops forever........
     I think we should do a test after each rpc call to see if thestatus is BROKEN PIPE, if it is, we must close the socket and free allpipes. We must close socket first, and set cli->fd to -1. Otherwise,pipe's destructor will try to write to the fd, which causetevent_req_poll() loops forever. My previous patch to test ifcli->fd must be there to make this work, otherwise, winbindd justcrash in FD_SET(-1, &writefds)....
     I have add test of the  status after rpc call in winbindd_cm.c,but that is not complete. We have to add it other places after rpccall, which would be a lot of repeated work.....
     Pls correct me if I am wrong. Thanks!
     patch is for master!

Best
       Regards
BoYang
20th, April

Best
   Regards
BoYang

-------------------------
Bo Yang, Software Engineer
Novell, Beijing, China
boyang at novell.com
-------------- next part --------------
>From a7e65a32d520e4991d2193dace081a11e607b58d Mon Sep 17 00:00:00 2001
From: Bo Yang <boyang at novell.com>
Date: Mon, 20 Apr 2009 23:16:18 +0800
Subject: [PATCH] s3: handle EPIPE in pipes

---
 source3/winbindd/winbindd_cm.c |   88 ++++++++++++++++++++++++++++++++++++++++
 1 files changed, 88 insertions(+), 0 deletions(-)

diff --git a/source3/winbindd/winbindd_cm.c b/source3/winbindd/winbindd_cm.c
index ed0a33a..8e59ad6 100644
--- a/source3/winbindd/winbindd_cm.c
+++ b/source3/winbindd/winbindd_cm.c
@@ -1652,6 +1652,29 @@ NTSTATUS init_dc_connection(struct winbindd_domain *domain)
 	return init_dc_connection_network(domain);
 }
 
+static bool handle_pipe_error(NTSTATUS err, struct winbindd_cm_conn *conn)
+{
+	if (!NT_STATUS_EQUAL(NT_STATUS_PIPE_BROKEN, err)) {
+		return false;
+	}
+	
+	/* 
+	 * It is broken pipe, we should tear apart the old connections.
+	 * Then connection will be reestablished when pipe request comes
+	 * again. We must not do anything more on the socket if BROKEN pipe
+	 * is reported from previous operation. We have to close the first, 
+	 * otherwise destructors of pipes will try to send something to it 
+	 * and poll around it, which never returns. */
+	if (conn->cli->fd != -1) {
+		close(conn->cli->fd);
+		conn->cli->fd = -1;
+	}
+
+	/* Close all opened pipe now. */
+	invalidate_cm_connection(conn);
+	return true;
+}
+
 /******************************************************************************
  Set the trust flags (direction and forest location) for a domain
 ******************************************************************************/
@@ -1666,6 +1689,7 @@ static bool set_dc_type_and_flags_trustinfo( struct winbindd_domain *domain )
 			NETR_TRUST_FLAG_OUTBOUND |
 			NETR_TRUST_FLAG_INBOUND);
 	struct rpc_pipe_client *cli;
+	struct winbindd_cm_conn *conn;
 	TALLOC_CTX *mem_ctx = NULL;
 
 	DEBUG(5, ("set_dc_type_and_flags_trustinfo: domain %s\n", domain->name ));
@@ -1689,6 +1713,8 @@ static bool set_dc_type_and_flags_trustinfo( struct winbindd_domain *domain )
 		return False;
 	}
 
+	conn = &our_domain->conn;
+
 	/* Use DsEnumerateDomainTrusts to get us the trust direction
 	   and type */
 
@@ -1698,6 +1724,7 @@ static bool set_dc_type_and_flags_trustinfo( struct winbindd_domain *domain )
 		DEBUG(5, ("set_dc_type_and_flags_trustinfo: Could not open "
 			  "a connection to %s for PIPE_NETLOGON (%s)\n", 
 			  domain->name, nt_errstr(result)));
+		handle_pipe_error(result, conn);
 		return False;
 	}
 
@@ -1715,6 +1742,7 @@ static bool set_dc_type_and_flags_trustinfo( struct winbindd_domain *domain )
 		DEBUG(0,("set_dc_type_and_flags_trustinfo: "
 			"failed to query trusted domain list: %s\n",
 			nt_errstr(result)));
+		handle_pipe_error(result, conn);
 		talloc_destroy(mem_ctx);
 		return false;
 	}
@@ -1776,6 +1804,7 @@ static void set_dc_type_and_flags_connect( struct winbindd_domain *domain )
 	struct policy_handle pol;
 	union dssetup_DsRoleInfo info;
 	union lsa_PolicyInformation *lsa_info = NULL;
+	struct winbindd_cm_conn *conn = &domain->conn;
 
 	if (!connection_ok(domain)) {
 		return;
@@ -1803,6 +1832,13 @@ static void set_dc_type_and_flags_connect( struct winbindd_domain *domain )
 		 * identifying so that we can in the end return with
 		 * domain->initialized = True - gd */
 
+		/* If handle_pipe_error() returns true, it is
+		 * broken pipe. connection was teared apart, just 
+		 * return. */
+		if (handle_pipe_error(result, conn)) {
+			return;
+		}
+
 		goto no_dssetup;
 	}
 
@@ -1817,6 +1853,9 @@ static void set_dc_type_and_flags_connect( struct winbindd_domain *domain )
 			  "on domain %s failed: (%s)\n",
 			  domain->name, nt_errstr(result)));
 
+		if (handle_pipe_error(result, conn)) {
+			return;
+		}
 		/* older samba3 DCs will return DCERPC_FAULT_OP_RNG_ERROR for
 		 * every opcode on the DSSETUP pipe, continue with
 		 * no_dssetup mode here as well to get domain->initialized
@@ -1845,6 +1884,7 @@ no_dssetup:
 		DEBUG(5, ("set_dc_type_and_flags_connect: Could not bind to "
 			  "PI_LSARPC on domain %s: (%s)\n",
 			  domain->name, nt_errstr(result)));
+		handle_pipe_error(result, conn);
 		TALLOC_FREE(cli);
 		TALLOC_FREE(mem_ctx);
 		return;
@@ -1925,6 +1965,8 @@ done:
 	DEBUG(5,("set_dc_type_and_flags_connect: domain %s is %srunning active directory.\n",
 		  domain->name, domain->active_directory ? "" : "NOT "));
 
+	handle_pipe_error(result, conn);
+
 	TALLOC_FREE(cli);
 
 	TALLOC_FREE(mem_ctx);
@@ -1968,6 +2010,7 @@ static bool cm_get_schannel_dcinfo(struct winbindd_domain *domain,
 {
 	NTSTATUS result;
 	struct rpc_pipe_client *netlogon_pipe;
+	struct winbindd_cm_conn *conn = &domain->conn;
 
 	if (lp_client_schannel() == False) {
 		return False;
@@ -1975,6 +2018,7 @@ static bool cm_get_schannel_dcinfo(struct winbindd_domain *domain,
 
 	result = cm_connect_netlogon(domain, &netlogon_pipe);
 	if (!NT_STATUS_IS_OK(result)) {
+		handle_pipe_error(result, conn);
 		return False;
 	}
 
@@ -2057,6 +2101,14 @@ NTSTATUS cm_connect_sam(struct winbindd_domain *domain, TALLOC_CTX *mem_ctx,
 			  "authenticated pipe: user %s\\%s. Error was "
 			  "%s\n", domain->name, domain_name,
 			  machine_account, nt_errstr(result)));
+
+		/* When pipe is broken, tear apart the connection.
+		 * Because after EPIPE, another round of select won't
+		 * return until timeout(9999s). */
+		if (handle_pipe_error(result, conn)) {
+			return result;
+		}
+		
 		goto schannel;
 	}
 
@@ -2075,6 +2127,11 @@ NTSTATUS cm_connect_sam(struct winbindd_domain *domain, TALLOC_CTX *mem_ctx,
 	DEBUG(10,("cm_connect_sam: ntlmssp-sealed rpccli_samr_Connect2 "
 		  "failed for domain %s, error was %s. Trying schannel\n",
 		  domain->name, nt_errstr(result) ));
+	
+	if (handle_pipe_error(result, conn)) {
+		return result;
+	}
+	
 	TALLOC_FREE(conn->samr_pipe);
 
  schannel:
@@ -2095,6 +2152,9 @@ NTSTATUS cm_connect_sam(struct winbindd_domain *domain, TALLOC_CTX *mem_ctx,
 		DEBUG(10,("cm_connect_sam: failed to connect to SAMR pipe for "
 			  "domain %s using schannel. Error was %s\n",
 			  domain->name, nt_errstr(result) ));
+		if (handle_pipe_error(result, conn)) {
+			return result;
+		}
 		goto anonymous;
 	}
 	DEBUG(10,("cm_connect_sam: connected to SAMR pipe for domain %s using "
@@ -2110,6 +2170,11 @@ NTSTATUS cm_connect_sam(struct winbindd_domain *domain, TALLOC_CTX *mem_ctx,
 	DEBUG(10,("cm_connect_sam: schannel-sealed rpccli_samr_Connect2 failed "
 		  "for domain %s, error was %s. Trying anonymous\n",
 		  domain->name, nt_errstr(result) ));
+
+	if (handle_pipe_error(result, conn)) {
+		return result;
+	}
+	
 	TALLOC_FREE(conn->samr_pipe);
 
  anonymous:
@@ -2144,6 +2209,7 @@ NTSTATUS cm_connect_sam(struct winbindd_domain *domain, TALLOC_CTX *mem_ctx,
  done:
 
 	if (!NT_STATUS_IS_OK(result)) {
+		handle_pipe_error(result, conn);
 		invalidate_cm_connection(conn);
 		return result;
 	}
@@ -2194,6 +2260,11 @@ NTSTATUS cm_connect_lsa(struct winbindd_domain *domain, TALLOC_CTX *mem_ctx,
 			  "%s\\%s. Error was %s. Trying schannel.\n",
 			  domain->name, conn->cli->domain,
 			  conn->cli->user_name, nt_errstr(result)));
+
+		if (handle_pipe_error(result, conn)) {
+			return result;
+		}
+		
 		goto schannel;
 	}
 
@@ -2211,6 +2282,10 @@ NTSTATUS cm_connect_lsa(struct winbindd_domain *domain, TALLOC_CTX *mem_ctx,
 	DEBUG(10,("cm_connect_lsa: rpccli_lsa_open_policy failed, trying "
 		  "schannel\n"));
 
+	if (handle_pipe_error(result, conn)) {
+		return result;
+	}
+
 	TALLOC_FREE(conn->lsa_pipe);
 
  schannel:
@@ -2232,6 +2307,10 @@ NTSTATUS cm_connect_lsa(struct winbindd_domain *domain, TALLOC_CTX *mem_ctx,
 		DEBUG(10,("cm_connect_lsa: failed to connect to LSA pipe for "
 			  "domain %s using schannel. Error was %s\n",
 			  domain->name, nt_errstr(result) ));
+		if (handle_pipe_error(result, conn)) {
+			return result;
+		}
+		
 		goto anonymous;
 	}
 	DEBUG(10,("cm_connect_lsa: connected to LSA pipe for domain %s using "
@@ -2247,6 +2326,10 @@ NTSTATUS cm_connect_lsa(struct winbindd_domain *domain, TALLOC_CTX *mem_ctx,
 	DEBUG(10,("cm_connect_lsa: rpccli_lsa_open_policy failed, trying "
 		  "anonymous\n"));
 
+	if (handle_pipe_error(result, conn)) {
+		return result;
+	}
+
 	TALLOC_FREE(conn->lsa_pipe);
 
  anonymous:
@@ -2264,6 +2347,7 @@ NTSTATUS cm_connect_lsa(struct winbindd_domain *domain, TALLOC_CTX *mem_ctx,
 					&conn->lsa_policy);
  done:
 	if (!NT_STATUS_IS_OK(result)) {
+		handle_pipe_error(result, conn);
 		invalidate_cm_connection(conn);
 		return result;
 	}
@@ -2308,6 +2392,7 @@ NTSTATUS cm_connect_netlogon(struct winbindd_domain *domain,
 					  &ndr_table_netlogon.syntax_id,
 					  &netlogon_pipe);
 	if (!NT_STATUS_IS_OK(result)) {
+		handle_pipe_error(result, conn);
 		return result;
 	}
 
@@ -2339,6 +2424,7 @@ NTSTATUS cm_connect_netlogon(struct winbindd_domain *domain,
 		 &neg_flags);
 
 	if (!NT_STATUS_IS_OK(result)) {
+		handle_pipe_error(result, conn);
 		TALLOC_FREE(netlogon_pipe);
 		return result;
 	}
@@ -2381,6 +2467,8 @@ NTSTATUS cm_connect_netlogon(struct winbindd_domain *domain,
 	if (!NT_STATUS_IS_OK(result)) {
 		DEBUG(3, ("Could not open schannel'ed NETLOGON pipe. Error "
 			  "was %s\n", nt_errstr(result)));
+		
+		handle_pipe_error(result, conn);
 
 		/* make sure we return something besides OK */
 		return !NT_STATUS_IS_OK(result) ? result : NT_STATUS_PIPE_NOT_AVAILABLE;
-- 
1.5.3



More information about the samba-technical mailing list