[PATCHSET] Change brlock autocleanup

Volker Lendecke Volker.Lendecke at SerNet.DE
Wed Aug 20 09:45:21 MDT 2014


Hi!

The attached patchset changes our brlock dead process
autocleanup from "once per fsp" to "when a conflict
happens".

This makes us a bit more precise, see the cleanup2 torture
test change. Also, it removes the last real caller of
serverids_exist. When this is in, a lot of clumsy code can
go.

Review&push would be appreciated!

Thanks,

Volker

-- 
SerNet GmbH, Bahnhofsallee 1b, 37081 Göttingen
phone: +49-551-370000-0, fax: +49-551-370000-9
AG Göttingen, HRB 2816, GF: Dr. Johannes Loxen
http://www.sernet.de, mailto:kontakt at sernet.de
-------------- next part --------------
From 49f9df45e05bb39d15d53c8503c3ab45459d62d5 Mon Sep 17 00:00:00 2001
From: Volker Lendecke <vl at samba.org>
Date: Tue, 19 Aug 2014 12:36:55 +0000
Subject: [PATCH 1/4] brlock: Do auto-cleanup at conflict time

This avoids the need to do sweeping validate_lock_entries calls

Signed-off-by: Volker Lendecke <vl at samba.org>
---
 source3/include/vfs.h     |    1 -
 source3/locking/brlock.c  |   92 ++++++++++++++++++++++-----------------------
 source3/locking/locking.c |   10 +++++
 source3/locking/proto.h   |    2 +-
 4 files changed, 57 insertions(+), 48 deletions(-)

diff --git a/source3/include/vfs.h b/source3/include/vfs.h
index a81fb6c..3702b75 100644
--- a/source3/include/vfs.h
+++ b/source3/include/vfs.h
@@ -236,7 +236,6 @@ typedef struct files_struct {
 	bool modified;
 	bool is_directory;
 	bool aio_write_behind;
-	bool lockdb_clean;
 	bool initial_delete_on_close; /* Only set at NTCreateX if file was created. */
 	bool delete_on_close;
 	bool posix_open;
diff --git a/source3/locking/brlock.c b/source3/locking/brlock.c
index 84849e6..078077f 100644
--- a/source3/locking/brlock.c
+++ b/source3/locking/brlock.c
@@ -418,6 +418,11 @@ NTSTATUS brl_lock_windows_default(struct byte_range_lock *br_lck,
 	for (i=0; i < br_lck->num_locks; i++) {
 		/* Do any Windows or POSIX locks conflict ? */
 		if (brl_conflict(&locks[i], plock)) {
+			if (!serverid_exists(&locks[i].context.pid)) {
+				locks[i].context.pid.pid = 0;
+				br_lck->modified = true;
+				continue;
+			}
 			/* Remember who blocked us. */
 			plock->context.smblctx = locks[i].context.smblctx;
 			return brl_lock_failed(fsp,plock,blocking_lock);
@@ -822,6 +827,11 @@ static NTSTATUS brl_lock_posix(struct messaging_context *msg_ctx,
 		if (curr_lock->lock_flav == WINDOWS_LOCK) {
 			/* Do any Windows flavour locks conflict ? */
 			if (brl_conflict(curr_lock, plock)) {
+				if (!serverid_exists(&curr_lock->context.pid)) {
+					curr_lock->context.pid.pid = 0;
+					br_lck->modified = true;
+					continue;
+				}
 				/* No games with error messages. */
 				TALLOC_FREE(tp);
 				/* Remember who blocked us. */
@@ -836,6 +846,11 @@ static NTSTATUS brl_lock_posix(struct messaging_context *msg_ctx,
 
 			/* POSIX conflict semantics are different. */
 			if (brl_conflict_posix(curr_lock, plock)) {
+				if (!serverid_exists(&curr_lock->context.pid)) {
+					curr_lock->context.pid.pid = 0;
+					br_lck->modified = true;
+					continue;
+				}
 				/* Can't block ourselves with POSIX locks. */
 				/* No games with error messages. */
 				TALLOC_FREE(tp);
@@ -1345,12 +1360,12 @@ bool brl_unlock(struct messaging_context *msg_ctx,
  Returns True if the region required is currently unlocked, False if locked.
 ****************************************************************************/
 
-bool brl_locktest(const struct byte_range_lock *br_lck,
+bool brl_locktest(struct byte_range_lock *br_lck,
 		  const struct lock_struct *rw_probe)
 {
 	bool ret = True;
 	unsigned int i;
-	const struct lock_struct *locks = br_lck->lock_data;
+	struct lock_struct *locks = br_lck->lock_data;
 	files_struct *fsp = br_lck->fsp;
 
 	/* Make sure existing locks don't conflict */
@@ -1359,6 +1374,17 @@ bool brl_locktest(const struct byte_range_lock *br_lck,
 		 * Our own locks don't conflict.
 		 */
 		if (brl_conflict_other(&locks[i], rw_probe)) {
+			if (br_lck->record == NULL) {
+				/* readonly */
+				return false;
+			}
+
+			if (!serverid_exists(&locks[i].context.pid)) {
+				locks[i].context.pid.pid = 0;
+				br_lck->modified = true;
+				continue;
+			}
+
 			return False;
 		}
 	}
@@ -1673,7 +1699,6 @@ bool brl_reconnect_disconnected(struct files_struct *fsp)
 	 * and thereby remove our own (disconnected) entries but reactivate
 	 * them instead.
 	 */
-	fsp->lockdb_clean = true;
 
 	br_lck = brl_get_locks(talloc_tos(), fsp);
 	if (br_lck == NULL) {
@@ -1910,11 +1935,29 @@ int brl_forall(void (*fn)(struct file_id id, struct server_id pid,
 static void byte_range_lock_flush(struct byte_range_lock *br_lck)
 {
 	size_t data_len;
+	unsigned i;
+	struct lock_struct *locks = br_lck->lock_data;
+
 	if (!br_lck->modified) {
 		DEBUG(10, ("br_lck not modified\n"));
 		goto done;
 	}
 
+	i = 0;
+
+	while (i < br_lck->num_locks) {
+		if (locks[i].context.pid.pid == 0) {
+			/*
+			 * Autocleanup, the process conflicted and does not
+			 * exist anymore.
+			 */
+			locks[i] = locks[br_lck->num_locks-1];
+			br_lck->num_locks -= 1;
+		} else {
+			i += 1;
+		}
+	}
+
 	data_len = br_lck->num_locks * sizeof(struct lock_struct);
 
 	if (br_lck->have_read_oplocks) {
@@ -2025,37 +2068,6 @@ struct byte_range_lock *brl_get_locks(TALLOC_CTX *mem_ctx, files_struct *fsp)
 		br_lck->have_read_oplocks = (data.dptr[data.dsize-1] == 1);
 	}
 
-	if (!fsp->lockdb_clean) {
-		int orig_num_locks = br_lck->num_locks;
-
-		/*
-		 * This is the first time we access the byte range lock
-		 * record with this fsp. Go through and ensure all entries
-		 * are valid - remove any that don't.
-		 * This makes the lockdb self cleaning at low cost.
-		 *
-		 * Note: Disconnected entries belong to disconnected
-		 * durable handles. So at this point, we have a new
-		 * handle on the file and the disconnected durable has
-		 * already been closed (we are not a durable reconnect).
-		 * So we need to clean the disconnected brl entry.
-		 */
-
-		if (!validate_lock_entries(&br_lck->num_locks,
-					   &br_lck->lock_data, false)) {
-			TALLOC_FREE(br_lck);
-			return NULL;
-		}
-
-		/* Ensure invalid locks are cleaned up in the destructor. */
-		if (orig_num_locks != br_lck->num_locks) {
-			br_lck->modified = True;
-		}
-
-		/* Mark the lockdb as "clean" as seen from this open file. */
-		fsp->lockdb_clean = True;
-	}
-
 	if (DEBUGLEVEL >= 10) {
 		unsigned int i;
 		struct lock_struct *locks = br_lck->lock_data;
@@ -2121,18 +2133,6 @@ struct byte_range_lock *brl_get_locks_readonly(files_struct *fsp)
 		return fsp->brlock_rec;
 	}
 
-	if (!fsp->lockdb_clean) {
-		/*
-		 * Fetch the record in R/W mode to give validate_lock_entries
-		 * a chance to kick in once.
-		 */
-		rw = brl_get_locks(talloc_tos(), fsp);
-		if (rw == NULL) {
-			return NULL;
-		}
-		fsp->lockdb_clean = true;
-	}
-
 	if (rw != NULL) {
 		size_t lock_data_size;
 
diff --git a/source3/locking/locking.c b/source3/locking/locking.c
index 0a99449..a320068 100644
--- a/source3/locking/locking.c
+++ b/source3/locking/locking.c
@@ -137,6 +137,16 @@ bool strict_lock_default(files_struct *fsp, struct lock_struct *plock)
 	}
 	ret = brl_locktest(br_lck, plock);
 
+	if (!ret) {
+		/*
+		 * We got a lock conflict. Retry with rw locks to enable
+		 * autocleanup. This is the slow path anyway.
+		 */
+		br_lck = brl_get_locks(talloc_tos(), fsp);
+		ret = brl_locktest(br_lck, plock);
+		TALLOC_FREE(br_lck);
+	}
+
 	DEBUG(10, ("strict_lock_default: flavour = %s brl start=%ju "
 		   "len=%ju %s for fnum %ju file %s\n",
 		   lock_flav_name(plock->lock_flav),
diff --git a/source3/locking/proto.h b/source3/locking/proto.h
index 722779f..46eec2a 100644
--- a/source3/locking/proto.h
+++ b/source3/locking/proto.h
@@ -58,7 +58,7 @@ bool brl_unlock(struct messaging_context *msg_ctx,
 bool brl_unlock_windows_default(struct messaging_context *msg_ctx,
 			       struct byte_range_lock *br_lck,
 			       const struct lock_struct *plock);
-bool brl_locktest(const struct byte_range_lock *br_lck,
+bool brl_locktest(struct byte_range_lock *br_lck,
 		  const struct lock_struct *rw_probe);
 NTSTATUS brl_lockquery(struct byte_range_lock *br_lck,
 		uint64_t *psmblctx,
-- 
1.7.9.5


From 37861080e693a8bc1d11f14e9089a18a7402af9b Mon Sep 17 00:00:00 2001
From: Volker Lendecke <vl at samba.org>
Date: Wed, 20 Aug 2014 09:07:14 +0000
Subject: [PATCH 2/4] brlock: Remove validate_lock_entries

This is now only called during brl_forall. It does not really hurt if we list
dead processes here. If the upper layers really care, they can filter it out
themselves. The real lock conflicts are not removed on-demand.

Signed-off-by: Volker Lendecke <vl at samba.org>
---
 source3/locking/brlock.c |   95 +---------------------------------------------
 1 file changed, 1 insertion(+), 94 deletions(-)

diff --git a/source3/locking/brlock.c b/source3/locking/brlock.c
index 078077f..295e147 100644
--- a/source3/locking/brlock.c
+++ b/source3/locking/brlock.c
@@ -1749,80 +1749,6 @@ bool brl_reconnect_disconnected(struct files_struct *fsp)
 	return true;
 }
 
-/****************************************************************************
- Ensure this set of lock entries is valid.
-****************************************************************************/
-static bool validate_lock_entries(unsigned int *pnum_entries, struct lock_struct **pplocks,
-				  bool keep_disconnected)
-{
-	unsigned int i;
-	struct lock_struct *locks = *pplocks;
-	unsigned int num_entries = *pnum_entries;
-	TALLOC_CTX *frame;
-	struct server_id *ids;
-	bool *exists;
-
-	if (num_entries == 0) {
-		return true;
-	}
-
-	frame = talloc_stackframe();
-
-	ids = talloc_array(frame, struct server_id, num_entries);
-	if (ids == NULL) {
-		DEBUG(0, ("validate_lock_entries: "
-			  "talloc_array(struct server_id, %u) failed\n",
-			  num_entries));
-		talloc_free(frame);
-		return false;
-	}
-
-	exists = talloc_array(frame, bool, num_entries);
-	if (exists == NULL) {
-		DEBUG(0, ("validate_lock_entries: "
-			  "talloc_array(bool, %u) failed\n",
-			  num_entries));
-		talloc_free(frame);
-		return false;
-	}
-
-	for (i = 0; i < num_entries; i++) {
-		ids[i] = locks[i].context.pid;
-	}
-
-	if (!serverids_exist(ids, num_entries, exists)) {
-		DEBUG(3, ("validate_lock_entries: serverids_exists failed\n"));
-		talloc_free(frame);
-		return false;
-	}
-
-	i = 0;
-
-	while (i < num_entries) {
-		if (exists[i]) {
-			i++;
-			continue;
-		}
-
-		if (keep_disconnected &&
-		    server_id_is_disconnected(&ids[i]))
-		{
-			i++;
-			continue;
-		}
-
-		/* This process no longer exists */
-
-		brl_delete_lock_struct(locks, num_entries, i);
-		num_entries -= 1;
-	}
-	TALLOC_FREE(frame);
-
-	*pnum_entries = num_entries;
-
-	return True;
-}
-
 struct brl_forall_cb {
 	void (*fn)(struct file_id id, struct server_id pid,
 		   enum brl_type lock_type,
@@ -1844,7 +1770,6 @@ static int brl_traverse_fn(struct db_record *rec, void *state)
 	struct file_id *key;
 	unsigned int i;
 	unsigned int num_locks = 0;
-	unsigned int orig_num_locks = 0;
 	TDB_DATA dbkey;
 	TDB_DATA value;
 
@@ -1861,25 +1786,7 @@ static int brl_traverse_fn(struct db_record *rec, void *state)
 	}
 
 	key = (struct file_id *)dbkey.dptr;
-	orig_num_locks = num_locks = value.dsize/sizeof(*locks);
-
-	/* Ensure the lock db is clean of entries from invalid processes. */
-
-	if (!validate_lock_entries(&num_locks, &locks, true)) {
-		TALLOC_FREE(locks);
-		return -1; /* Terminate traversal */
-	}
-
-	if (orig_num_locks != num_locks) {
-		if (num_locks) {
-			TDB_DATA data;
-			data.dptr = (uint8_t *)locks;
-			data.dsize = num_locks*sizeof(struct lock_struct);
-			dbwrap_record_store(rec, data, TDB_REPLACE);
-		} else {
-			dbwrap_record_delete(rec);
-		}
-	}
+	num_locks = value.dsize/sizeof(*locks);
 
 	if (cb->fn) {
 		for ( i=0; i<num_locks; i++) {
-- 
1.7.9.5


From 35fdccb11afbca0dc35ed936596b454574f5dcda Mon Sep 17 00:00:00 2001
From: Volker Lendecke <vl at samba.org>
Date: Wed, 20 Aug 2014 09:52:39 +0000
Subject: [PATCH 3/4] torture: Run the cleanup2 test against 2 nodes

This enables testing the brlock cleanup across ctdb

Signed-off-by: Volker Lendecke <vl at samba.org>
---
 source3/torture/test_cleanup.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/source3/torture/test_cleanup.c b/source3/torture/test_cleanup.c
index e96a5de..c5c53c3 100644
--- a/source3/torture/test_cleanup.c
+++ b/source3/torture/test_cleanup.c
@@ -127,7 +127,7 @@ bool run_cleanup2(int dummy)
 	/*
 	 * Check the file is indeed locked
 	 */
-	if (!torture_open_connection(&cli2, 0)) {
+	if (!torture_open_connection(&cli2, 1)) {
 		return false;
 	}
 	status = cli_ntcreate(
-- 
1.7.9.5


From 932ba9f9a8fb89d1cf3f7b9971c1e42a3e522e98 Mon Sep 17 00:00:00 2001
From: Volker Lendecke <vl at samba.org>
Date: Wed, 20 Aug 2014 09:53:28 +0000
Subject: [PATCH 4/4] torture: Fix cleanup2 to utilize on-demand cleanup

Now we check the cleanup when conflicts happen, not when we first open
the file. This means we don't have to re-open the connection to make
cleanup happen.

Signed-off-by: Volker Lendecke <vl at samba.org>
---
 source3/torture/test_cleanup.c |   19 +++----------------
 1 file changed, 3 insertions(+), 16 deletions(-)

diff --git a/source3/torture/test_cleanup.c b/source3/torture/test_cleanup.c
index c5c53c3..8efdf35 100644
--- a/source3/torture/test_cleanup.c
+++ b/source3/torture/test_cleanup.c
@@ -157,23 +157,10 @@ bool run_cleanup2(int dummy)
 	}
 
 	/*
-	 * Right now we don't clean up immediately. Re-open the 2nd connection.
+	 * Give the suicidal smbd a bit of time to really pass away
 	 */
-#if 1
-	cli_shutdown(cli2);
-	if (!torture_open_connection(&cli2, 0)) {
-		return false;
-	}
-	status = cli_ntcreate(
-		cli2, fname, 0, FILE_GENERIC_READ|FILE_GENERIC_WRITE,
-		FILE_ATTRIBUTE_NORMAL,
-		FILE_SHARE_READ|FILE_SHARE_WRITE|FILE_SHARE_DELETE,
-		FILE_OPEN, 0, 0, &fnum2, NULL);
-	if (!NT_STATUS_IS_OK(status)) {
-		printf("open of %s failed (%s)\n", fname, nt_errstr(status));
-		return false;
-	}
-#endif
+	smb_msleep(1000);
+
 	status = cli_smbwrite(cli2, fnum2, &buf, 0, 1, NULL);
 	if (!NT_STATUS_IS_OK(status)) {
 		printf("write failed: %s\n", nt_errstr(status));
-- 
1.7.9.5



More information about the samba-technical mailing list