[SCM] Samba Shared Repository - branch master updated

Tue Feb 16 05:21:41 MST 2010

The branch, master has been updated
       via  83542d9... s3: Slightly increase parallelism in g_lock
       via  be919d6... s3: Avoid starving locks when many processes die at the same time
       via  725b365... s3: Avoid a thundering herd in g_lock_unlock
       via  07978bd... s3: Optimize g_lock_lock for a heavily contended case
       via  f3bdb16... s3: Fix handling of processes that died in g_lock
      from  eda16f2... s4-kcc: remove a qsort() that snuck into the new topology code

http://gitweb.samba.org/?p=samba.git;a=shortlog;h=master


- Log -----------------------------------------------------------------
commit 83542d973ca771353109c7da4b0391d6ba910f53
Author: Volker Lendecke <vl at samba.org>
Date:   Tue Feb 16 12:31:58 2010 +0100

    s3: Slightly increase parallelism in g_lock
    
    There's no need to still hold the g_lock tdb-level lock while telling the
    waiters to retry

commit be919d6faed198cdc29322a4d9491946c0b044b3
Author: Volker Lendecke <vl at samba.org>
Date:   Tue Feb 16 12:28:53 2010 +0100

    s3: Avoid starving locks when many processes die at the same time
    
    In g_lock_unlock we have a little race between the process_exists and
    messaging_send call: We only send to 5 waiters now, they all might have died
    between us checking their existence and sending the message. This change makes
    g_lock_lock retry at least once every minute.

commit 725b3654f831fbe0388cc09f46269903c9eef1d7
Author: Volker Lendecke <vl at samba.org>
Date:   Tue Feb 16 12:22:08 2010 +0100

    s3: Avoid a thundering herd in g_lock_unlock
    
    Only notify the first 5 pending lock waiters. This avoids a thundering herd
    problem that is really nasty in a cluster. It also makes acquiring a lock a bit
    more FIFO, lock waiters are added to the end of the array.

commit 07978bd175395e0dc770f68fff5b8bd8b0fdeb51
Author: Volker Lendecke <vl at samba.org>
Date:   Mon Feb 15 16:57:16 2010 +0100

    s3: Optimize g_lock_lock for a heavily contended case
    
    Only check the existence of the lock owner in g_lock_parse, check the rest of
    the records only when we got the lock successfully. This reduces the load on
    process_exists which can involve a network roundtrip in the clustered case.

commit f3bdb163f461175c50b4930fa3464beaee30f4a8
Author: Volker Lendecke <vl at samba.org>
Date:   Mon Feb 15 16:49:46 2010 +0100

    s3: Fix handling of processes that died in g_lock
    
    g_lock_parse might have thrown away entries from the locks array because the
    processes were not around anymore. Don't store the orphaned entries.

-----------------------------------------------------------------------

Summary of changes:
 source3/lib/g_lock.c |   82 +++++++++++++++++++++++++++++++++++++++++--------
 1 files changed, 68 insertions(+), 14 deletions(-)


Changeset truncated at 500 lines:

diff --git a/source3/lib/g_lock.c b/source3/lib/g_lock.c
index 26b079d..add670c 100644
--- a/source3/lib/g_lock.c
+++ b/source3/lib/g_lock.c
@@ -108,6 +108,34 @@ static bool g_lock_parse(TALLOC_CTX *mem_ctx, TDB_DATA data,
 			      (locks[i].lock_type & G_LOCK_PENDING) ?
 			      "(pending)" : "(owner)"));
 
+		if (((locks[i].lock_type & G_LOCK_PENDING) == 0)
+		    && !process_exists(locks[i].pid)) {
+
+			DEBUGADD(10, ("lock owner %s died -- discarding\n",
+				      procid_str(talloc_tos(),
+						 &locks[i].pid)));
+
+			if (i < (num_locks-1)) {
+				locks[i] = locks[num_locks-1];
+			}
+			num_locks -= 1;
+		}
+	}
+
+	*plocks = locks;
+	*pnum_locks = num_locks;
+	return true;
+}
+
+static void g_lock_cleanup(int *pnum_locks, struct g_lock_rec *locks)
+{
+	int i, num_locks;
+
+	num_locks = *pnum_locks;
+
+	DEBUG(10, ("g_lock_cleanup: %d locks\n", num_locks));
+
+	for (i=0; i<num_locks; i++) {
 		if (process_exists(locks[i].pid)) {
 			continue;
 		}
@@ -119,19 +147,18 @@ static bool g_lock_parse(TALLOC_CTX *mem_ctx, TDB_DATA data,
 		}
 		num_locks -= 1;
 	}
-
-	*plocks = locks;
 	*pnum_locks = num_locks;
-	return true;
+	return;
 }
 
 static struct g_lock_rec *g_lock_addrec(TALLOC_CTX *mem_ctx,
 					struct g_lock_rec *locks,
-					int num_locks,
+					int *pnum_locks,
 					const struct server_id pid,
 					enum g_lock_type lock_type)
 {
 	struct g_lock_rec *result;
+	int num_locks = *pnum_locks;
 
 	result = talloc_realloc(mem_ctx, locks, struct g_lock_rec,
 				num_locks+1);
@@ -141,6 +168,7 @@ static struct g_lock_rec *g_lock_addrec(TALLOC_CTX *mem_ctx,
 
 	result[num_locks].pid = pid;
 	result[num_locks].lock_type = lock_type;
+	*pnum_locks += 1;
 	return result;
 }
 
@@ -221,7 +249,7 @@ again:
 	if (our_index == -1) {
 		/* First round, add ourself */
 
-		locks = g_lock_addrec(talloc_tos(), locks, num_locks,
+		locks = g_lock_addrec(talloc_tos(), locks, &num_locks,
 				      self, lock_type);
 		if (locks == NULL) {
 			DEBUG(10, ("g_lock_addrec failed\n"));
@@ -237,7 +265,14 @@ again:
 		locks[our_index].lock_type = lock_type;
 	}
 
-	data = make_tdb_data((uint8_t *)locks, talloc_get_size(locks));
+	if (NT_STATUS_IS_OK(status) && ((lock_type & G_LOCK_PENDING) == 0)) {
+		/*
+		 * Walk through the list of locks, search for dead entries
+		 */
+		g_lock_cleanup(&num_locks, locks);
+	}
+
+	data = make_tdb_data((uint8_t *)locks, num_locks * sizeof(*locks));
 	store_status = rec->store(rec, data, 0);
 	if (!NT_STATUS_IS_OK(store_status)) {
 		DEBUG(1, ("rec->store failed: %s\n",
@@ -263,7 +298,6 @@ NTSTATUS g_lock_lock(struct g_lock_ctx *ctx, const char *name,
 	NTSTATUS status;
 	bool retry = false;
 	struct timeval timeout_end;
-	struct timeval timeout_remaining;
 	struct timeval time_now;
 
 	DEBUG(10, ("Trying to acquire lock %d for %s\n", (int)lock_type,
@@ -304,6 +338,7 @@ NTSTATUS g_lock_lock(struct g_lock_ctx *ctx, const char *name,
 		fd_set *r_fds = NULL;
 		int max_fd = 0;
 		int ret;
+		struct timeval select_timeout;
 
 		status = g_lock_trylock(ctx, name, lock_type);
 		if (NT_STATUS_IS_OK(status)) {
@@ -360,12 +395,10 @@ NTSTATUS g_lock_lock(struct g_lock_ctx *ctx, const char *name,
 		}
 #endif
 
-		time_now = timeval_current();
-		timeout_remaining = timeval_until(&time_now, &timeout_end);
+		select_timeout = timeval_set(60, 0);
 
 		ret = sys_select(max_fd + 1, r_fds, NULL, NULL,
-				 &timeout_remaining);
-
+				 &select_timeout);
 		if (ret == -1) {
 			if (errno != EINTR) {
 				DEBUG(1, ("error calling select: %s\n",
@@ -386,7 +419,7 @@ NTSTATUS g_lock_lock(struct g_lock_ctx *ctx, const char *name,
 				break;
 			} else {
 				DEBUG(10, ("select returned 0 but timeout not "
-					   "not expired: strange - retrying\n"));
+					   "not expired, retrying\n"));
 			}
 		} else if (ret != 1) {
 			DEBUG(1, ("invalid return code of select: %d\n", ret));
@@ -494,14 +527,26 @@ static NTSTATUS g_lock_force_unlock(struct g_lock_ctx *ctx, const char *name,
 		goto done;
 	}
 
+	TALLOC_FREE(rec);
+
 	if ((lock_type & G_LOCK_PENDING) == 0) {
+		int num_wakeups = 0;
+
 		/*
-		 * We've been the lock holder. Tell all others to retry.
+		 * We've been the lock holder. Others to retry. Don't
+		 * tell all others to avoid a thundering herd. In case
+		 * this leads to a complete stall because we miss some
+		 * processes, the loop in g_lock_lock tries at least
+		 * once a minute.
 		 */
+
 		for (i=0; i<num_locks; i++) {
 			if ((locks[i].lock_type & G_LOCK_PENDING) == 0) {
 				continue;
 			}
+			if (!process_exists(locks[i].pid)) {
+				continue;
+			}
 
 			/*
 			 * Ping all waiters to retry
@@ -514,13 +559,22 @@ static NTSTATUS g_lock_force_unlock(struct g_lock_ctx *ctx, const char *name,
 					  procid_str(talloc_tos(),
 						     &locks[i].pid),
 					  nt_errstr(status)));
+			} else {
+				num_wakeups += 1;
+			}
+			if (num_wakeups > 5) {
+				break;
 			}
 		}
 	}
 done:
+	/*
+	 * For the error path, TALLOC_FREE(rec) as well. In the good
+	 * path we have already freed it.
+	 */
+	TALLOC_FREE(rec);
 
 	TALLOC_FREE(locks);
-	TALLOC_FREE(rec);
 	return status;
 }
 


-- 
Samba Shared Repository