[SCM] Samba Shared Repository - branch master updated

Fri Feb 5 23:18:13 MST 2010

The branch, master has been updated
       via  5dbf175... s3-events: make the old timed events compatible with tevent
       via  dd498d2... s3-smbd: add a rate limited cleanup of brl, connections and locking db
       via  74267d6... s3-brlock: we don't need these MSG_SMB_UNLOCK calls now
       via  5b398ed... s3-brlock: add a minimim retry time for pending blocking locks
      from  5bb89bc... s4-ldb: fixed api.py selftest

http://gitweb.samba.org/?p=samba.git;a=shortlog;h=master


- Log -----------------------------------------------------------------
commit 5dbf175c75bd6139f3238f36665000641f7f7f79
Author: Andrew Tridgell <tridge at samba.org>
Date:   Fri Feb 5 19:14:45 2010 -0800

    s3-events: make the old timed events compatible with tevent
    
    tevent ensures that a timed event is only called once. The old events
    code relied on the called handler removing the event itself. If the
    handler removed the event after calling a function which invoked the
    event loop then the timed event could loop forever.
    
    This change makes the two timed event systems more compatible, by
    allowing the handler to free the te if it wants to, but ensuring it is
    off the linked list of events before the handler is called, and
    ensuring it is freed even if the handler doesn't free it.

commit dd498d2eecf124a03b6117ddab892a1112f9e9db
Author: Andrew Tridgell <tridge at samba.org>
Date:   Fri Feb 5 21:08:56 2010 -0800

    s3-smbd: add a rate limited cleanup of brl, connections and locking db
    
    On unclean shutdown we can end up with stale entries in the brlock,
    connections and locking db. Previously we would do the cleanup on
    every unclean exit, but that can cause smbd to be completely
    unavailable for several minutes when a large number of child smbd
    processes exit.
    
    This adds a rate limited cleanup of the databases, with the default
    that cleanup happens at most every 20s

commit 74267d652485cdcb711f734f0d80da0fb1495867
Author: Andrew Tridgell <tridge at samba.org>
Date:   Fri Feb 5 21:02:24 2010 -0800

    s3-brlock: we don't need these MSG_SMB_UNLOCK calls now
    
    These have been replaced with the min timeout in blocking.c

commit 5b398edbee672392f2cea260ab17445ecca927d7
Author: Andrew Tridgell <tridge at samba.org>
Date:   Fri Feb 5 20:59:43 2010 -0800

    s3-brlock: add a minimim retry time for pending blocking locks
    
    When we are waiting on a pending byte range lock, another smbd might
    exit uncleanly, and therefore not notify us of the removal of the
    lock, and thus not trigger the lock to be retried.
    
    We coped with this up to now by adding a message_send_all() in the
    SIGCHLD and cluster reconfigure handlers to send a MSG_SMB_UNLOCK to
    all smbd processes. That would generate O(N^2) work when a large
    number of clients disconnected at once (such as on a network outage),
    which could leave the whole system unusable for a very long time (many
    minutes, or even longer).
    
    By adding a minimum re-check time for pending byte range locks we
    avoid this problem by ensuring that pending locks are retried at a
    more regular interval.

-----------------------------------------------------------------------

Summary of changes:
 source3/lib/ctdbd_conn.c |    8 -------
 source3/lib/events.c     |   21 +++++++++++++++++-
 source3/smbd/blocking.c  |   20 ++++++++++++++++++
 source3/smbd/server.c    |   50 ++++++++++++++++++++++++++++++++++++++-------
 4 files changed, 81 insertions(+), 18 deletions(-)


Changeset truncated at 500 lines:

diff --git a/source3/lib/ctdbd_conn.c b/source3/lib/ctdbd_conn.c
index 84bba3b..8ddb12a 100644
--- a/source3/lib/ctdbd_conn.c
+++ b/source3/lib/ctdbd_conn.c
@@ -542,15 +542,7 @@ static NTSTATUS ctdb_handle_message(uint8_t *buf, size_t length,
 		messaging_send(conn->msg_ctx, procid_self(),
 			       MSG_SMB_BRL_VALIDATE, &data_blob_null);
 
-		/*
-		 * it's possible that we have just rejoined the cluster after
-		 * an outage. In that case our pending locks could have been
-		 * removed from the lockdb, so retry them once more
-		 */
-		message_send_all(conn->msg_ctx, MSG_SMB_UNLOCK, NULL, 0, NULL);
-
 		TALLOC_FREE(buf);
-
 		return NT_STATUS_OK;
 	}
 
diff --git a/source3/lib/events.c b/source3/lib/events.c
index 7a06ad0..75aa250 100644
--- a/source3/lib/events.c
+++ b/source3/lib/events.c
@@ -105,12 +105,29 @@ bool run_events(struct tevent_context *ev,
 
 	if ((ev->timer_events != NULL)
 	    && (timeval_compare(&now, &ev->timer_events->next_event) >= 0)) {
+		/* this older events system did not auto-free timed
+		   events on running them, and had a race condition
+		   where the event could be called twice if the
+		   talloc_free of the te happened after the callback
+		   made a call which invoked the event loop. To avoid
+		   this while still allowing old code which frees the
+		   te, we need to create a temporary context which
+		   will be used to ensure the te is freed. We also
+		   remove the te from the timed event list before we
+		   call the handler, to ensure we can't loop */
+
+		struct tevent_timer *te = ev->timer_events;
+		TALLOC_CTX *tmp_ctx = talloc_new(ev);
 
 		DEBUG(10, ("Running timed event \"%s\" %p\n",
 			   ev->timer_events->handler_name, ev->timer_events));
 
-		ev->timer_events->handler(ev, ev->timer_events, now,
-					  ev->timer_events->private_data);
+		DLIST_REMOVE(ev->timer_events, te);
+		talloc_steal(tmp_ctx, te);
+
+		te->handler(ev, te, now, te->private_data);
+
+		talloc_free(tmp_ctx);
 		return true;
 	}
 
diff --git a/source3/smbd/blocking.c b/source3/smbd/blocking.c
index deb7f8f..6c7c167 100644
--- a/source3/smbd/blocking.c
+++ b/source3/smbd/blocking.c
@@ -72,6 +72,7 @@ static bool recalc_brl_timeout(void)
 {
 	struct blocking_lock_record *blr;
 	struct timeval next_timeout;
+	int max_brl_timeout = lp_parm_int(-1, "brl", "recalctime", 5);
 
 	TALLOC_FREE(brl_timeout);
 
@@ -100,6 +101,25 @@ static bool recalc_brl_timeout(void)
 		return True;
 	}
 
+	/* 
+	 to account for unclean shutdowns by clients we need a
+	 maximum timeout that we use for checking pending locks. If
+	 we have any pending locks at all, then check if the pending
+	 lock can continue at least every brl:recalctime seconds
+	 (default 5 seconds).
+
+	 This saves us needing to do a message_send_all() in the
+	 SIGCHLD handler in the parent daemon. That
+	 message_send_all() caused O(n^2) work to be done when IP
+	 failovers happened in clustered Samba, which could make the
+	 entire system unusable for many minutes.
+	*/
+
+	if (max_brl_timeout > 0) {
+		struct timeval min_to = timeval_current_ofs(max_brl_timeout, 0);
+		next_timeout = timeval_min(&next_timeout, &min_to);             
+	}
+
 	if (DEBUGLVL(10)) {
 		struct timeval cur, from_now;
 
diff --git a/source3/smbd/server.c b/source3/smbd/server.c
index fb0efd2..37716c4 100644
--- a/source3/smbd/server.c
+++ b/source3/smbd/server.c
@@ -219,19 +219,53 @@ static void add_child_pid(pid_t pid)
 	num_children += 1;
 }
 
+/*
+  at most every smbd:cleanuptime seconds (default 20), we scan the BRL
+  and locking database for entries to cleanup. As a side effect this
+  also cleans up dead entries in the connections database (due to the
+  traversal in message_send_all()
+
+  Using a timer for this prevents a flood of traversals when a large
+  number of clients disconnect at the same time (perhaps due to a
+  network outage).  
+*/
+
+static void cleanup_timeout_fn(struct event_context *event_ctx,
+				struct timed_event *te,
+				struct timeval now,
+				void *private_data)
+{
+	struct timed_event **cleanup_te = (struct timed_event **)private_data;
+
+	DEBUG(1,("Cleaning up brl and lock database after unclean shutdown\n"));
+	message_send_all(smbd_messaging_context(), MSG_SMB_UNLOCK, NULL, 0, NULL);
+	messaging_send_buf(smbd_messaging_context(), procid_self(), 
+				MSG_SMB_BRL_VALIDATE, NULL, 0);
+	/* mark the cleanup as having been done */
+	(*cleanup_te) = NULL;
+}
+
 static void remove_child_pid(pid_t pid, bool unclean_shutdown)
 {
 	struct child_pid *child;
+	static struct timed_event *cleanup_te;
 
 	if (unclean_shutdown) {
-		/* a child terminated uncleanly so tickle all processes to see 
-		   if they can grab any of the pending locks
-		*/
-		DEBUG(3,(__location__ " Unclean shutdown of pid %u\n", (unsigned int)pid));
-		messaging_send_buf(smbd_messaging_context(), procid_self(), 
-				   MSG_SMB_BRL_VALIDATE, NULL, 0);
-		message_send_all(smbd_messaging_context(), 
-				 MSG_SMB_UNLOCK, NULL, 0, NULL);
+		/* a child terminated uncleanly so tickle all
+		   processes to see if they can grab any of the
+		   pending locks
+                */
+		DEBUG(3,(__location__ " Unclean shutdown of pid %u\n", 
+			(unsigned int)pid));
+		if (!cleanup_te) {
+			/* call the cleanup timer, but not too often */
+			int cleanup_time = lp_parm_int(-1, "smbd", "cleanuptime", 20);
+			cleanup_te = event_add_timed(smbd_event_context(), NULL,
+						timeval_current_ofs(cleanup_time, 0),
+						cleanup_timeout_fn, 
+						&cleanup_te);
+			DEBUG(1,("Scheduled cleanup of brl and lock database after unclean shutdown\n"));
+		}
 	}
 
 	for (child = children; child != NULL; child = child->next) {


-- 
Samba Shared Repository