recursive mutexes in appl_head winbindd_cm.c?

Martin Pool mbp at samba.org
Fri Jan 10 06:33:00 GMT 2003


Here's my idea for fixing this in appliance-head, without reworking
the mutex reference count.

Basically it tries to 

 - avoid undefined behaviour in the case where we fail to acquire the
   mutex

 - avoid leaking locks in the case where we fail to connect to the
   server

 - avoid releasing the mutex more times than it has been acquired,
   because this causes a panic

I haven't tested this in place yet, but I thought I'd send it in the
hope that jra could tell me if I'm on the right track.



Index: winbindd_cm.c
===================================================================
RCS file: /data/cvs/samba/source/nsswitch/winbindd_cm.c,v
retrieving revision 1.33.2.19
diff -u -u -p -r1.33.2.19 winbindd_cm.c
--- winbindd_cm.c	10 Dec 2002 00:50:28 -0000	1.33.2.19
+++ winbindd_cm.c	10 Jan 2003 06:27:09 -0000
@@ -45,6 +45,22 @@
  */
 
 /*
+  The per-server mutex on opening server connections is required to
+  work around a suspected bug in NT, which causes failures if the same
+  client host tries to authenticate on two connections at the same
+  time.
+
+  In addition, the mutex is still held after opening the connection
+  when trying to do a NetLogon.
+
+  If we fail to acquire the mutex because somebody else is hogging it,
+  then we can still proceed to open the connection and we take our
+  chances with NT.  However we must then be careful not to release it.
+
+  This whole mechanism is quite different in HEAD.
+*/
+
+/*
    TODO:
 
      - I'm pretty annoyed by all the make_nmb_name() stuff.  It should be
@@ -68,7 +84,12 @@ struct winbindd_cm_conn {
 	fstring domain;
 	fstring controller;
 	fstring pipe_name;
+
+	/** Tells how many callers inside this process are using the
+	 * lock on connections to this server.  When 0, the
+	 * system-wide mutex in the tdb is released. **/
 	size_t mutex_ref_count;
+
 	struct cli_state *cli;
 	POLICY_HND pol;
 };
@@ -163,10 +184,16 @@ static void add_failed_connection_entry(
 
 
 
-/* Open a connction to the remote server, cache failures for 30 seconds */
-
+/**
+ * Open a connection to the remote server, cache failures for 30 seconds
+ *
+ * @param keep_mutex If true, a reservation on the server mutex is
+ * still held on successful return, so that the caller can use it and
+ * release it later.
+ **/
 static NTSTATUS cm_open_connection(const char *domain, const int pipe_index,
-				   struct winbindd_cm_conn *new_conn, BOOL keep_mutex)
+				   struct winbindd_cm_conn *new_conn,
+				   BOOL keep_mutex)
 {
 	struct failed_connection_cache *fcc;
 	NTSTATUS result;
@@ -228,13 +255,15 @@ static NTSTATUS cm_open_connection(const
 	DEBUG(5, ("connecting to %s from %s with username [%s]\\[%s]\n", 
 	      new_conn->controller, global_myname_unix(), ipc_domain, ipc_username));
 
+	if (!secrets_named_mutex(new_conn->controller, WINBIND_SERVER_MUTEX_WAIT_TIME,
+				 &new_conn->mutex_ref_count)) {
+		DEBUG(0,("cm_open_connection: mutex grab failed for %s\n",
+			 new_conn->controller));
+		/* continue anyway; note that the mutex may not actually be
+		 * held during the rest of this function. */
+	}
+	
 	for (i = 0; retry && (i < NUM_CLI_AUTH_CONNECT_RETRIES); i++) {
-
-		if (!secrets_named_mutex(new_conn->controller, WINBIND_SERVER_MUTEX_WAIT_TIME, &new_conn->mutex_ref_count)) {
-			DEBUG(0,("cm_open_connection: mutex grab failed for %s\n", new_conn->controller));
-			continue;
-		}
-
 		result = cli_full_connection(&new_conn->cli, global_myname_unix(), new_conn->controller, 
 			     &dc_ip, 0, CLI_AUTH_TIMEOUT, "IPC$", 
 			     "IPC", ipc_username, ipc_domain, 
@@ -249,7 +278,8 @@ static NTSTATUS cm_open_connection(const
 	SAFE_FREE(ipc_password);
 
 	if (!NT_STATUS_IS_OK(result)) {
-		secrets_named_mutex_release(new_conn->controller, &new_conn->mutex_ref_count);
+		if (new_conn->mutex_ref_count > 0)
+			secrets_named_mutex_release(new_conn->controller, &new_conn->mutex_ref_count);
 		add_failed_connection_entry(new_conn, result);
 		return result;
 	}
@@ -264,15 +294,19 @@ static NTSTATUS cm_open_connection(const
 		 * if the PDC is an NT4 box.   but since there is only one 2k 
 		 * specific UUID right now, i'm not going to bother.  --jerry
 		 */
-		secrets_named_mutex_release(new_conn->controller, &new_conn->mutex_ref_count);
+		if (new_conn->mutex_ref_count > 0)
+			secrets_named_mutex_release(new_conn->controller, &new_conn->mutex_ref_count);
 		if ( !is_win2k_pipe(pipe_index) )
 			add_failed_connection_entry(new_conn, result);
 		cli_shutdown(new_conn->cli);
 		return result;
 	}
 
-	if (!keep_mutex)
+	if (!keep_mutex && new_conn->mutex_ref_count > 0) {
+		/* Need to release it if we hold it */
 		secrets_named_mutex_release(new_conn->controller, &new_conn->mutex_ref_count);
+	}
+	
 	return NT_STATUS_OK;
 }
 
@@ -308,8 +342,15 @@ static BOOL connection_ok(struct winbind
 	return True;
 }
 
-/* Get a connection to the remote DC and open the pipe.  If there is already a connection, use that */
-
+/**
+ * Get a connection to the remote DC and open the pipe.  If there is
+ * already a connection, use that.
+ *
+ * @param keep_mutex If true, we attempt to hold a reservation on the server
+ * mutex on successful return, so that the caller can use it and release it
+ * later.  The caller needs to check the reference count to see whether it's
+ * actually held or not.
+ **/
 static NTSTATUS get_connection_from_cache(const char *domain, const char *pipe_name, struct winbindd_cm_conn **conn_out, BOOL keep_mutex) 
 {
 	struct winbindd_cm_conn *conn, conn_temp;


-- 
Martin 



More information about the samba-technical mailing list