PATCHSET: Remove FAKE_LEVEL_II_OPLOCKs

Volker Lendecke Volker.Lendecke at SerNet.DE
Mon Oct 7 14:28:07 MDT 2013


Hi!

This has been submitted before, but now some preparatory
patches have gone in separately, and there has been a bit of
cleanup. The first two patches implement $SUBJECT, the rest
is some assorted fallout patches from removing the
FAKE_LEVEL_II_OPLOCK definition.

Please review & push!

Thanks,

Volker

-- 
SerNet GmbH, Bahnhofsallee 1b, 37081 Göttingen
phone: +49-551-370000-0, fax: +49-551-370000-9
AG Göttingen, HRB 2816, GF: Dr. Johannes Loxen
http://www.sernet.de, mailto:kontakt at sernet.de

*****************************************************************
visit us on it-sa:IT security exhibitions in Nürnberg, Germany
October 8th - 10th 2013, hall 12, booth 333
free tickets available via code 270691 on: www.it-sa.de/gutschein
******************************************************************
-------------- next part --------------
From ca0c5ed31221e19591ce4e140dfe3590ae44a7e1 Mon Sep 17 00:00:00 2001
From: Volker Lendecke <vl at samba.org>
Date: Wed, 11 Sep 2013 12:48:14 +0000
Subject: [PATCH 1/7] smbd: Put "have_read_oplocks" into brlock.tdb

This implements an idea by metze: Right now Samba does not grant level2
oplocks where it should: After an initial no-oplock open that has been
written to, we don't have the FAKE_LEVEL2_OPLOCK entry in locking.tdb
around anymore, this downgraded to NO_OPLOCK. Windows in this case will
grant level2 if being asked, we don't.  Part of the reason for this
is that we don't have a proper mechanism to communicate the fact that
level2 needs to be broken to other smbds. Metze's insight was that we
have to look into brlock.tdb for every write anyway, so this might be
the right place to store this information.

My first reaction was that this is really hackish, but on further thought
this is not. oplocks depend on brlocks anyway, and we have the proper
mechanisms in place for brlocks.

The format for this change is to add one byte to the end of the brlock.tdb
record with value 1 if we have level2 oplocks around. Thus this patch
effectively reverts 8f41142 which I discovered while writing this
change. We now legally have unaligned records.

We can certainly talk about the format, but I'm not yet convinced we
need an idl for this yet. This is a potentially very hot code path,
and ndr marshalling has a cost.

Signed-off-by: Volker Lendecke <vl at samba.org>
---
 source3/locking/brlock.c |   55 +++++++++++++++++++++++++++++++++++++---------
 source3/locking/proto.h  |    3 +++
 2 files changed, 48 insertions(+), 10 deletions(-)

diff --git a/source3/locking/brlock.c b/source3/locking/brlock.c
index 0d45501..67b1701 100644
--- a/source3/locking/brlock.c
+++ b/source3/locking/brlock.c
@@ -47,6 +47,7 @@ struct byte_range_lock {
 	struct files_struct *fsp;
 	unsigned int num_locks;
 	bool modified;
+	bool have_read_oplocks;
 	struct lock_struct *lock_data;
 	struct db_record *record;
 };
@@ -81,6 +82,19 @@ struct files_struct *brl_fsp(struct byte_range_lock *brl)
 	return brl->fsp;
 }
 
+bool brl_have_read_oplocks(const struct byte_range_lock *brl)
+{
+	return brl->have_read_oplocks;
+}
+
+void brl_set_have_read_oplocks(struct byte_range_lock *brl,
+			       bool have_read_oplocks)
+{
+	SMB_ASSERT(brl->record != NULL); /* otherwise we're readonly */
+	brl->have_read_oplocks = have_read_oplocks;
+	brl->modified = true;
+}
+
 /****************************************************************************
  See if two locking contexts are equal.
 ****************************************************************************/
@@ -1878,11 +1892,18 @@ int brl_forall(void (*fn)(struct file_id id, struct server_id pid,
 
 static void byte_range_lock_flush(struct byte_range_lock *br_lck)
 {
+	size_t data_len;
 	if (!br_lck->modified) {
 		goto done;
 	}
 
-	if (br_lck->num_locks == 0) {
+	data_len = br_lck->num_locks * sizeof(struct lock_struct);
+
+	if (br_lck->have_read_oplocks) {
+		data_len += 1;
+	}
+
+	if (data_len == 0) {
 		/* No locks - delete this entry. */
 		NTSTATUS status = dbwrap_record_delete(br_lck->record);
 		if (!NT_STATUS_IS_OK(status)) {
@@ -1894,10 +1915,19 @@ static void byte_range_lock_flush(struct byte_range_lock *br_lck)
 		TDB_DATA data;
 		NTSTATUS status;
 
-		data.dptr = (uint8 *)br_lck->lock_data;
-		data.dsize = br_lck->num_locks * sizeof(struct lock_struct);
+		data.dsize = data_len;
+		data.dptr = talloc_array(talloc_tos(), uint8_t, data_len);
+		SMB_ASSERT(data.dptr != NULL);
+
+		memcpy(data.dptr, br_lck->lock_data,
+		       br_lck->num_locks * sizeof(struct lock_struct));
+
+		if (br_lck->have_read_oplocks) {
+			data.dptr[data_len-1] = 1;
+		}
 
 		status = dbwrap_record_store(br_lck->record, data, TDB_REPLACE);
+		TALLOC_FREE(data.dptr);
 		if (!NT_STATUS_IS_OK(status)) {
 			DEBUG(0, ("store returned %s\n", nt_errstr(status)));
 			smb_panic("Could not store byte range mode entry");
@@ -1932,6 +1962,7 @@ struct byte_range_lock *brl_get_locks(TALLOC_CTX *mem_ctx, files_struct *fsp)
 
 	br_lck->fsp = fsp;
 	br_lck->num_locks = 0;
+	br_lck->have_read_oplocks = false;
 	br_lck->modified = False;
 
 	key.dptr = (uint8 *)&fsp->file_id;
@@ -1947,12 +1978,6 @@ struct byte_range_lock *brl_get_locks(TALLOC_CTX *mem_ctx, files_struct *fsp)
 
 	data = dbwrap_record_get_value(br_lck->record);
 
-	if ((data.dsize % sizeof(struct lock_struct)) != 0) {
-		DEBUG(3, ("Got invalid brlock data\n"));
-		TALLOC_FREE(br_lck);
-		return NULL;
-	}
-
 	br_lck->lock_data = NULL;
 
 	talloc_set_destructor(br_lck, byte_range_lock_destructor);
@@ -1968,7 +1993,12 @@ struct byte_range_lock *brl_get_locks(TALLOC_CTX *mem_ctx, files_struct *fsp)
 			return NULL;
 		}
 
-		memcpy(br_lck->lock_data, data.dptr, data.dsize);
+		memcpy(br_lck->lock_data, data.dptr,
+		       talloc_get_size(br_lck->lock_data));
+	}
+
+	if ((data.dsize % sizeof(struct lock_struct)) == 1) {
+		br_lck->have_read_oplocks = (data.dptr[data.dsize-1] == 1);
 	}
 
 	if (!fsp->lockdb_clean) {
@@ -2038,6 +2068,10 @@ static void brl_get_locks_readonly_parser(TDB_DATA key, TDB_DATA data,
 		br_lock, data.dptr, data.dsize);
 	br_lock->num_locks = data.dsize / sizeof(struct lock_struct);
 
+	if ((data.dsize % sizeof(struct lock_struct)) == 1) {
+		br_lock->have_read_oplocks = (data.dptr[data.dsize-1] == 1);
+	}
+
 	*state->br_lock = br_lock;
 }
 
@@ -2079,6 +2113,7 @@ struct byte_range_lock *brl_get_locks_readonly(files_struct *fsp)
 		if (br_lock == NULL) {
 			goto fail;
 		}
+		br_lock->have_read_oplocks = rw->have_read_oplocks;
 		br_lock->num_locks = rw->num_locks;
 		br_lock->lock_data = (struct lock_struct *)talloc_memdup(
 			br_lock, rw->lock_data, lock_data_size);
diff --git a/source3/locking/proto.h b/source3/locking/proto.h
index 1573f6b..b9c01e5 100644
--- a/source3/locking/proto.h
+++ b/source3/locking/proto.h
@@ -30,6 +30,9 @@ void brl_shutdown(void);
 
 unsigned int brl_num_locks(const struct byte_range_lock *brl);
 struct files_struct *brl_fsp(struct byte_range_lock *brl);
+bool brl_have_read_oplocks(const struct byte_range_lock *brl);
+void brl_set_have_read_oplocks(struct byte_range_lock *brl,
+			       bool have_read_oplocks);
 
 NTSTATUS brl_lock_windows_default(struct byte_range_lock *br_lck,
 		struct lock_struct *plock,
-- 
1.7.9.5


From 3f3317404b68e0784ad302fac52cb028933fae6f Mon Sep 17 00:00:00 2001
From: Volker Lendecke <vl at samba.org>
Date: Wed, 11 Sep 2013 16:07:33 +0000
Subject: [PATCH 2/7] smbd: Remove FAKE_LEVEL_II_OPLOCK

FAKE_LEVEL_II_OPLOCK was an indicator to break level2 oplock holders
on write.  This information is now being held in brlock.tdb, which makes
the FAKE_LEVEL_II_OPLOCK type unnecessary.

Signed-off-by: Volker Lendecke <vl at samba.org>
---
 source3/include/smb.h     |    5 +--
 source3/locking/locking.c |   14 +--------
 source3/smbd/files.c      |    3 +-
 source3/smbd/open.c       |   41 ++++++++++--------------
 source3/smbd/oplock.c     |   77 +++++++++++++++++++++++++++++++++++----------
 5 files changed, 81 insertions(+), 59 deletions(-)

diff --git a/source3/include/smb.h b/source3/include/smb.h
index 0d07f71..5c0dfdc 100644
--- a/source3/include/smb.h
+++ b/source3/include/smb.h
@@ -686,7 +686,8 @@ enum remote_arch_types {RA_UNKNOWN, RA_WFWG, RA_OS2, RA_WIN95, RA_WINNT,
 
 /* The following are Samba-private. */
 #define INTERNAL_OPEN_ONLY 		0x8
-#define FAKE_LEVEL_II_OPLOCK 		0x10	/* Client requested no_oplock, but we have to
+/* #define FAKE_LEVEL_II_OPLOCK 	0x10 */	  /* Not used anymore */
+				/* Client requested no_oplock, but we have to
 				 * inform potential level2 holders on
 				 * write. */
 /* #define DEFERRED_OPEN_ENTRY 		0x20 */   /* Not used anymore */
@@ -698,7 +699,7 @@ enum remote_arch_types {RA_UNKNOWN, RA_WFWG, RA_OS2, RA_WIN95, RA_WINNT,
 
 #define EXCLUSIVE_OPLOCK_TYPE(lck) ((lck) & ((unsigned int)EXCLUSIVE_OPLOCK|(unsigned int)BATCH_OPLOCK))
 #define BATCH_OPLOCK_TYPE(lck) ((lck) & (unsigned int)BATCH_OPLOCK)
-#define LEVEL_II_OPLOCK_TYPE(lck) ((lck) & ((unsigned int)LEVEL_II_OPLOCK|(unsigned int)FAKE_LEVEL_II_OPLOCK))
+#define LEVEL_II_OPLOCK_TYPE(lck) ((lck) & (unsigned int)LEVEL_II_OPLOCK)
 
 /* kernel_oplock_message definition.
 
diff --git a/source3/locking/locking.c b/source3/locking/locking.c
index d0b6eaf..bab49b5 100644
--- a/source3/locking/locking.c
+++ b/source3/locking/locking.c
@@ -847,19 +847,7 @@ bool remove_share_oplock(struct share_mode_lock *lck, files_struct *fsp)
 		return False;
 	}
 
-	if (EXCLUSIVE_OPLOCK_TYPE(e->op_type)) {
-		/*
-		 * Going from exclusive or batch,
- 		 * we always go through FAKE_LEVEL_II
- 		 * first.
- 		 */
-		if (!EXCLUSIVE_OPLOCK_TYPE(fsp->oplock_type)) {
-			smb_panic("remove_share_oplock: logic error");
-		}
-		e->op_type = FAKE_LEVEL_II_OPLOCK;
-	} else {
-		e->op_type = NO_OPLOCK;
-	}
+	e->op_type = NO_OPLOCK;
 	lck->data->modified = True;
 	return True;
 }
diff --git a/source3/smbd/files.c b/source3/smbd/files.c
index d94ee11..c64c841 100644
--- a/source3/smbd/files.c
+++ b/source3/smbd/files.c
@@ -318,8 +318,7 @@ files_struct *file_find_dif(struct smbd_server_connection *sconn,
 			}
 			/* Paranoia check. */
 			if ((fsp->fh->fd == -1) &&
-			    (fsp->oplock_type != NO_OPLOCK) &&
-			    (fsp->oplock_type != FAKE_LEVEL_II_OPLOCK)) {
+			    (fsp->oplock_type != NO_OPLOCK)) {
 				DEBUG(0,("file_find_dif: file %s file_id = "
 					 "%s, gen = %u oplock_type = %u is a "
 					 "stat open with oplock type !\n",
diff --git a/source3/smbd/open.c b/source3/smbd/open.c
index 858d2be..c96bbb5 100644
--- a/source3/smbd/open.c
+++ b/source3/smbd/open.c
@@ -1052,14 +1052,6 @@ static void validate_my_share_entries(struct smbd_server_connection *sconn,
 			  "share entry with an open file\n");
 	}
 
-	if ((share_entry->op_type == NO_OPLOCK) &&
-	    (fsp->oplock_type == FAKE_LEVEL_II_OPLOCK))
-	{
-		/* Someone has already written to it, but I haven't yet
-		 * noticed */
-		return;
-	}
-
 	if (((uint16)fsp->oplock_type) != share_entry->op_type) {
 		goto panic;
 	}
@@ -1409,24 +1401,10 @@ static void grant_fsp_oplock_type(files_struct *fsp,
  	 * what was found in the existing share modes.
  	 */
 
-	if (got_a_none_oplock) {
-		fsp->oplock_type = NO_OPLOCK;
-	} else if (got_level2_oplock) {
-		if (fsp->oplock_type == NO_OPLOCK ||
-				fsp->oplock_type == FAKE_LEVEL_II_OPLOCK) {
-			/* Store a level2 oplock, but don't tell the client */
-			fsp->oplock_type = FAKE_LEVEL_II_OPLOCK;
-		} else {
+	if (got_level2_oplock || got_a_none_oplock) {
+		if (EXCLUSIVE_OPLOCK_TYPE(fsp->oplock_type)) {
 			fsp->oplock_type = LEVEL_II_OPLOCK;
 		}
-	} else {
-		/* All share_mode_entries are placeholders or deferred.
-		 * Silently upgrade to fake levelII if the client didn't
-		 * ask for an oplock. */
-		if (fsp->oplock_type == NO_OPLOCK) {
-			/* Store a level2 oplock, but don't tell the client */
-			fsp->oplock_type = FAKE_LEVEL_II_OPLOCK;
-		}
 	}
 
 	/*
@@ -1434,7 +1412,20 @@ static void grant_fsp_oplock_type(files_struct *fsp,
 	 * or if we've turned them off.
 	 */
 	if (fsp->oplock_type == LEVEL_II_OPLOCK && !allow_level2) {
-		fsp->oplock_type = FAKE_LEVEL_II_OPLOCK;
+		fsp->oplock_type = NO_OPLOCK;
+	}
+
+	if (fsp->oplock_type == LEVEL_II_OPLOCK && !got_level2_oplock) {
+		/*
+		 * We're the first level2 oplock. Indicate that in brlock.tdb.
+		 */
+		struct byte_range_lock *brl;
+
+		brl = brl_get_locks(talloc_tos(), fsp);
+		if (brl != NULL) {
+			brl_set_have_read_oplocks(brl, true);
+			TALLOC_FREE(brl);
+		}
 	}
 
 	DEBUG(10,("grant_fsp_oplock_type: oplock type 0x%x on file %s\n",
diff --git a/source3/smbd/oplock.c b/source3/smbd/oplock.c
index 70f168e..f18ac65 100644
--- a/source3/smbd/oplock.c
+++ b/source3/smbd/oplock.c
@@ -66,7 +66,6 @@ NTSTATUS set_file_oplock(files_struct *fsp, int oplock_type)
 	}
 
 	if ((fsp->oplock_type != NO_OPLOCK) &&
-	    (fsp->oplock_type != FAKE_LEVEL_II_OPLOCK) &&
 	    use_kernel &&
 	    !koplocks->ops->set_oplock(koplocks, fsp, oplock_type))
 	{
@@ -100,7 +99,6 @@ void release_file_oplock(files_struct *fsp)
 	struct kernel_oplocks *koplocks = sconn->oplocks.kernel_ops;
 
 	if ((fsp->oplock_type != NO_OPLOCK) &&
-	    (fsp->oplock_type != FAKE_LEVEL_II_OPLOCK) &&
 	    koplocks) {
 		koplocks->ops->release_oplock(koplocks, fsp, NO_OPLOCK);
 	}
@@ -114,12 +112,7 @@ void release_file_oplock(files_struct *fsp)
 	SMB_ASSERT(sconn->oplocks.exclusive_open>=0);
 	SMB_ASSERT(sconn->oplocks.level_II_open>=0);
 
-	if (EXCLUSIVE_OPLOCK_TYPE(fsp->oplock_type)) {
-		/* This doesn't matter for close. */
-		fsp->oplock_type = FAKE_LEVEL_II_OPLOCK;
-	} else {
-		fsp->oplock_type = NO_OPLOCK;
-	}
+	fsp->oplock_type = NO_OPLOCK;
 	fsp->sent_oplock_break = NO_BREAK_SENT;
 
 	flush_write_cache(fsp, OPLOCK_RELEASE_FLUSH);
@@ -171,6 +164,45 @@ bool remove_oplock(files_struct *fsp)
 			 "file %s\n", fsp_str_dbg(fsp)));
 		return False;
 	}
+
+	if (fsp->oplock_type == LEVEL_II_OPLOCK) {
+
+		/*
+		 * If we're the only LEVEL_II holder, we have to remove the
+		 * have_read_oplocks from the brlock entry
+		 */
+
+		struct share_mode_data *data = lck->data;
+		uint32_t i, num_level2;
+
+		num_level2 = 0;
+		for (i=0; i<data->num_share_modes; i++) {
+			if (data->share_modes[i].op_type == LEVEL_II_OPLOCK) {
+				num_level2 += 1;
+			}
+			if (num_level2 > 1) {
+				/*
+				 * No need to count them all...
+				 */
+				break;
+			}
+		}
+
+		if (num_level2 == 1) {
+			/*
+			 * That's only us. We are dropping that level2 oplock,
+			 * so remove the brlock flag.
+			 */
+			struct byte_range_lock *brl;
+
+			brl = brl_get_locks(talloc_tos(), fsp);
+			if (brl) {
+				brl_set_have_read_oplocks(brl, false);
+				TALLOC_FREE(brl);
+			}
+		}
+	}
+
 	ret = remove_share_oplock(lck, fsp);
 	if (!ret) {
 		DEBUG(0,("remove_oplock: failed to remove share oplock for "
@@ -190,6 +222,7 @@ bool downgrade_oplock(files_struct *fsp)
 {
 	bool ret;
 	struct share_mode_lock *lck;
+	struct byte_range_lock *brl;
 
 	lck = get_existing_share_mode_lock(talloc_tos(), fsp->file_id);
 	if (lck == NULL) {
@@ -206,6 +239,13 @@ bool downgrade_oplock(files_struct *fsp)
 	}
 
 	downgrade_file_oplock(fsp);
+
+	brl = brl_get_locks(talloc_tos(), fsp);
+	if (brl != NULL) {
+		brl_set_have_read_oplocks(brl, true);
+		TALLOC_FREE(brl);
+	}
+
 	TALLOC_FREE(lck);
 	return ret;
 }
@@ -375,14 +415,6 @@ static void break_level2_to_none_async(files_struct *fsp)
 		return;
 	}
 
-	if (fsp->oplock_type == FAKE_LEVEL_II_OPLOCK) {
-		/* Don't tell the client, just downgrade. */
-		DEBUG(3, ("process_oplock_async_level2_break_message: "
-			  "downgrading fake level 2 oplock.\n"));
-		remove_oplock(fsp);
-		return;
-	}
-
 	/* Ensure we're really at level2 state. */
 	SMB_ASSERT(fsp->oplock_type == LEVEL_II_OPLOCK);
 
@@ -622,6 +654,7 @@ static void contend_level2_oplocks_begin_default(files_struct *fsp,
 	struct smbd_server_connection *sconn = fsp->conn->sconn;
 	struct tevent_immediate *im;
 	struct break_to_none_state *state;
+	struct byte_range_lock *brl;
 
 	/*
 	 * If this file is level II oplocked then we need
@@ -631,8 +664,18 @@ static void contend_level2_oplocks_begin_default(files_struct *fsp,
 	 * the shared memory area whilst doing this.
 	 */
 
-	if (!LEVEL_II_OPLOCK_TYPE(fsp->oplock_type))
+	if (EXCLUSIVE_OPLOCK_TYPE(fsp->oplock_type)) {
+		/*
+		 * There can't be any level2 oplocks, we're alone.
+		 */
 		return;
+	}
+
+	brl = brl_get_locks_readonly(fsp);
+	if ((brl != NULL) && !brl_have_read_oplocks(brl)) {
+		DEBUG(10, ("No read oplocks around\n"));
+		return;
+	}
 
 	/*
 	 * When we get here we might have a brlock entry locked. Also
-- 
1.7.9.5


From 78310cc59e63417bb5cbf07427b28b7b318c9d1b Mon Sep 17 00:00:00 2001
From: Volker Lendecke <vl at samba.org>
Date: Mon, 9 Sep 2013 18:53:15 +0000
Subject: [PATCH 3/7] torture: Extend raw.oplock.batch10

With FAKE_LEVEL_II_OPLOCKS around we did not grant LEVEL2 after
a NO_OPLOCK file got written to. Windows does grant LEVEL2 in this
case. With the have_level2_oplocks in brlocks.tdb we can now grant LEVEL2
in this case as well.

Signed-off-by: Volker Lendecke <vl at samba.org>
---
 source4/torture/raw/oplock.c |   12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/source4/torture/raw/oplock.c b/source4/torture/raw/oplock.c
index c2e086a..ef9f973 100644
--- a/source4/torture/raw/oplock.c
+++ b/source4/torture/raw/oplock.c
@@ -1696,6 +1696,18 @@ static bool test_raw_oplock_batch10(struct torture_context *tctx, struct smbcli_
 	CHECK_VAL(break_info.failures, 0);
 	CHECK_VAL(io.ntcreatex.out.oplock_level, 0);
 
+	{
+		union smb_write wr;
+		wr.write.level = RAW_WRITE_WRITE;
+		wr.write.in.file.fnum = fnum;
+		wr.write.in.count = 1;
+		wr.write.in.offset = 0;
+		wr.write.in.remaining = 0;
+		wr.write.in.data = (const uint8_t *)"x";
+		status = smb_raw_write(cli1->tree, &wr);
+		CHECK_STATUS(tctx, status, NT_STATUS_OK);
+	}
+
 	smbcli_oplock_handler(cli2->transport, oplock_handler_ack_to_given, cli2->tree);
 
 	io.ntcreatex.in.flags = NTCREATEX_FLAGS_EXTENDED |
-- 
1.7.9.5


From 4753b1f9351b683c98e14fd85c9656a666e67a04 Mon Sep 17 00:00:00 2001
From: Volker Lendecke <vl at samba.org>
Date: Fri, 13 Sep 2013 14:13:51 +0200
Subject: [PATCH 4/7] smbd: Add debugs to brlock.c

Signed-off-by: Volker Lendecke <vl at samba.org>
---
 source3/locking/brlock.c |   15 +++++++++++++++
 source3/smbd/oplock.c    |    6 ++++++
 2 files changed, 21 insertions(+)

diff --git a/source3/locking/brlock.c b/source3/locking/brlock.c
index 67b1701..b5eebc8 100644
--- a/source3/locking/brlock.c
+++ b/source3/locking/brlock.c
@@ -90,6 +90,8 @@ bool brl_have_read_oplocks(const struct byte_range_lock *brl)
 void brl_set_have_read_oplocks(struct byte_range_lock *brl,
 			       bool have_read_oplocks)
 {
+	DEBUG(10, ("Setting have_read_oplocks to %s\n",
+		   have_read_oplocks ? "true" : "false"));
 	SMB_ASSERT(brl->record != NULL); /* otherwise we're readonly */
 	brl->have_read_oplocks = have_read_oplocks;
 	brl->modified = true;
@@ -1894,6 +1896,7 @@ static void byte_range_lock_flush(struct byte_range_lock *br_lck)
 {
 	size_t data_len;
 	if (!br_lck->modified) {
+		DEBUG(10, ("br_lck not modified\n"));
 		goto done;
 	}
 
@@ -1903,6 +1906,8 @@ static void byte_range_lock_flush(struct byte_range_lock *br_lck)
 		data_len += 1;
 	}
 
+	DEBUG(10, ("data_len=%d\n", (int)data_len));
+
 	if (data_len == 0) {
 		/* No locks - delete this entry. */
 		NTSTATUS status = dbwrap_record_delete(br_lck->record);
@@ -1934,6 +1939,8 @@ static void byte_range_lock_flush(struct byte_range_lock *br_lck)
 		}
 	}
 
+	DEBUG(10, ("seqnum=%d\n", dbwrap_get_seqnum(brlock_db)));
+
  done:
 	br_lck->modified = false;
 	TALLOC_FREE(br_lck->record);
@@ -1997,6 +2004,8 @@ struct byte_range_lock *brl_get_locks(TALLOC_CTX *mem_ctx, files_struct *fsp)
 		       talloc_get_size(br_lck->lock_data));
 	}
 
+	DEBUG(10, ("data.dsize=%d\n", (int)data.dsize));
+
 	if ((data.dsize % sizeof(struct lock_struct)) == 1) {
 		br_lck->have_read_oplocks = (data.dptr[data.dsize-1] == 1);
 	}
@@ -2072,6 +2081,9 @@ static void brl_get_locks_readonly_parser(TDB_DATA key, TDB_DATA data,
 		br_lock->have_read_oplocks = (data.dptr[data.dsize-1] == 1);
 	}
 
+	DEBUG(10, ("Got %d bytes, have_read_oplocks: %s\n", (int)data.dsize,
+		   br_lock->have_read_oplocks ? "true" : "false"));
+
 	*state->br_lock = br_lock;
 }
 
@@ -2080,6 +2092,9 @@ struct byte_range_lock *brl_get_locks_readonly(files_struct *fsp)
 	struct byte_range_lock *br_lock = NULL;
 	struct byte_range_lock *rw = NULL;
 
+	DEBUG(10, ("seqnum=%d, fsp->brlock_seqnum=%d\n",
+		   dbwrap_get_seqnum(brlock_db), fsp->brlock_seqnum));
+
 	if ((fsp->brlock_rec != NULL)
 	    && (dbwrap_get_seqnum(brlock_db) == fsp->brlock_seqnum)) {
 		/*
diff --git a/source3/smbd/oplock.c b/source3/smbd/oplock.c
index f18ac65..fc6b0ef 100644
--- a/source3/smbd/oplock.c
+++ b/source3/smbd/oplock.c
@@ -157,6 +157,9 @@ bool remove_oplock(files_struct *fsp)
 	bool ret;
 	struct share_mode_lock *lck;
 
+	DEBUG(10, ("remove_oplock called for %s\n",
+		   fsp_str_dbg(fsp)));
+
 	/* Remove the oplock flag from the sharemode. */
 	lck = get_existing_share_mode_lock(talloc_tos(), fsp->file_id);
 	if (lck == NULL) {
@@ -224,6 +227,9 @@ bool downgrade_oplock(files_struct *fsp)
 	struct share_mode_lock *lck;
 	struct byte_range_lock *brl;
 
+	DEBUG(10, ("downgrade_oplock called for %s\n",
+		   fsp_str_dbg(fsp)));
+
 	lck = get_existing_share_mode_lock(talloc_tos(), fsp->file_id);
 	if (lck == NULL) {
 		DEBUG(0,("downgrade_oplock: failed to lock share entry for "
-- 
1.7.9.5


From a9443e235e7484ea2b96e75f0fdb851d60033d18 Mon Sep 17 00:00:00 2001
From: Volker Lendecke <vl at samba.org>
Date: Fri, 13 Sep 2013 15:18:15 +0200
Subject: [PATCH 5/7] smbd: Remove some FAKE_LEVEL_II comments

Signed-off-by: Volker Lendecke <vl at samba.org>
---
 source3/smbd/oplock.c      |    6 +++---
 source3/smbd/smb2_create.c |    5 -----
 2 files changed, 3 insertions(+), 8 deletions(-)

diff --git a/source3/smbd/oplock.c b/source3/smbd/oplock.c
index fc6b0ef..ffea57b 100644
--- a/source3/smbd/oplock.c
+++ b/source3/smbd/oplock.c
@@ -442,8 +442,8 @@ static void break_level2_to_none_async(files_struct *fsp)
 /*******************************************************************
  This handles the case of a write triggering a break to none
  message on a level2 oplock.
- When we get this message we may be in any of three states :
- NO_OPLOCK, LEVEL_II, FAKE_LEVEL2. We only send a message to
+ When we get this message we may be in any of two states :
+ NO_OPLOCK, LEVEL_II. We only send a message to
  the client for LEVEL2.
 *******************************************************************/
 
@@ -740,7 +740,7 @@ static void do_break_to_none(struct tevent_context *ctx,
 		 * As there could have been multiple writes waiting at the
 		 * lock_share_entry gate we may not be the first to
 		 * enter. Hence the state of the op_types in the share mode
-		 * entries may be partly NO_OPLOCK and partly LEVEL_II or FAKE_LEVEL_II
+		 * entries may be partly NO_OPLOCK and partly LEVEL_II
 		 * oplock. It will do no harm to re-send break messages to
 		 * those smbd's that are still waiting their turn to remove
 		 * their LEVEL_II state, and also no harm to ignore existing
diff --git a/source3/smbd/smb2_create.c b/source3/smbd/smb2_create.c
index 140c81b..f48c8ee 100644
--- a/source3/smbd/smb2_create.c
+++ b/source3/smbd/smb2_create.c
@@ -58,11 +58,6 @@ static uint8_t map_samba_oplock_levels_to_smb2(int oplock_type)
 	} else if (EXCLUSIVE_OPLOCK_TYPE(oplock_type)) {
 		return SMB2_OPLOCK_LEVEL_EXCLUSIVE;
 	} else if (oplock_type == LEVEL_II_OPLOCK) {
-		/*
-		 * Don't use LEVEL_II_OPLOCK_TYPE here as
-		 * this also includes FAKE_LEVEL_II_OPLOCKs
-		 * which are internal only.
-		 */
 		return SMB2_OPLOCK_LEVEL_II;
 	} else {
 		return SMB2_OPLOCK_LEVEL_NONE;
-- 
1.7.9.5


From d46390b4fdaaf92fa7e419c1481202f459135a7e Mon Sep 17 00:00:00 2001
From: Volker Lendecke <vl at samba.org>
Date: Fri, 4 Oct 2013 09:24:29 +0000
Subject: [PATCH 6/7] smbd: Remove a special case for level2 break

With the level2 indicator in brlock.tdb this race condition does not
exist anymore

Signed-off-by: Volker Lendecke <vl at samba.org>
---
 source3/smbd/oplock.c |   45 ++++++++++++---------------------------------
 1 file changed, 12 insertions(+), 33 deletions(-)

diff --git a/source3/smbd/oplock.c b/source3/smbd/oplock.c
index ffea57b..b0ab1b5 100644
--- a/source3/smbd/oplock.c
+++ b/source3/smbd/oplock.c
@@ -409,9 +409,10 @@ static void send_break_message_smb1(files_struct *fsp, int level)
 	}
 }
 
-static void break_level2_to_none_async(files_struct *fsp)
+static void break_level2_to_none_async(struct server_id src, files_struct *fsp)
 {
 	struct smbd_server_connection *sconn = fsp->conn->sconn;
+	struct server_id self = messaging_server_id(sconn->msg_ctx);
 
 	if (fsp->oplock_type == NO_OPLOCK) {
 		/* We already got a "break to none" message and we've handled
@@ -428,6 +429,12 @@ static void break_level2_to_none_async(files_struct *fsp)
 		  "to none message for %s, file %s\n", fsp_fnum_dbg(fsp),
 		  fsp_str_dbg(fsp)));
 
+	/* Need to wait before sending a break
+	   message if we sent ourselves this message. */
+	if (serverid_equal(&self, &src)) {
+		wait_before_sending_break();
+	}
+
 	/* Now send a break to none message to our client. */
 	if (sconn->using_smb2) {
 		send_break_message_smb2(fsp, OPLOCKLEVEL_NONE);
@@ -487,7 +494,7 @@ static void process_oplock_async_level2_break_message(struct messaging_context *
 		return;
 	}
 
-	break_level2_to_none_async(fsp);
+	break_level2_to_none_async(src, fsp);
 }
 
 /*******************************************************************
@@ -713,7 +720,6 @@ static void do_break_to_none(struct tevent_context *ctx,
 {
 	struct break_to_none_state *state = talloc_get_type_abort(
 		private_data, struct break_to_none_state);
-	struct server_id self = messaging_server_id(state->sconn->msg_ctx);
 	int i;
 	struct share_mode_lock *lck;
 
@@ -766,36 +772,9 @@ static void do_break_to_none(struct tevent_context *ctx,
 
 		share_mode_entry_to_message(msg, share_entry);
 
-		/*
-		 * Deal with a race condition when breaking level2
- 		 * oplocks. Don't send all the messages and release
- 		 * the lock, this allows someone else to come in and
- 		 * get a level2 lock before any of the messages are
- 		 * processed, and thus miss getting a break message.
- 		 * Ensure at least one entry (the one we're breaking)
- 		 * is processed immediately under the lock and becomes
- 		 * set as NO_OPLOCK to stop any waiter getting a level2.
- 		 * Bugid #5980.
- 		 */
-
-		if (serverid_equal(&self, &share_entry->pid)) {
-			struct files_struct *cur_fsp =
-				initial_break_processing(state->sconn,
-					share_entry->id,
-					share_entry->share_file_id);
-			if (cur_fsp != NULL) {
-				wait_before_sending_break();
-				break_level2_to_none_async(cur_fsp);
-			} else {
-				DEBUG(3, ("release_level_2_oplocks_on_change: "
-				"Did not find fsp, ignoring\n"));
-			}
-		} else {
-			messaging_send_buf(state->sconn->msg_ctx,
-					share_entry->pid,
-					MSG_SMB_ASYNC_LEVEL2_BREAK,
-					(uint8 *)msg, sizeof(msg));
-		}
+		messaging_send_buf(state->sconn->msg_ctx, share_entry->pid,
+				   MSG_SMB_ASYNC_LEVEL2_BREAK,
+				   (uint8 *)msg, sizeof(msg));
 	}
 
 	/* We let the message receivers handle removing the oplock state
-- 
1.7.9.5


From 28ba0602eef254a6697c4867c1a7398488b45cbc Mon Sep 17 00:00:00 2001
From: Volker Lendecke <vl at samba.org>
Date: Fri, 4 Oct 2013 10:11:38 +0000
Subject: [PATCH 7/7] smbd: Inline break_level2_to_none_async

With the special case for bug 5980 in do_break_to_none we only have
one caller: process_oplock_async_level2_break_message. The further
goal is to merge process_oplock_async_level2_break_message with
process_oplock_break_message.

Signed-off-by: Volker Lendecke <vl at samba.org>
---
 source3/smbd/oplock.c |   70 ++++++++++++++++++++++---------------------------
 1 file changed, 32 insertions(+), 38 deletions(-)

diff --git a/source3/smbd/oplock.c b/source3/smbd/oplock.c
index b0ab1b5..e2880c5 100644
--- a/source3/smbd/oplock.c
+++ b/source3/smbd/oplock.c
@@ -409,43 +409,6 @@ static void send_break_message_smb1(files_struct *fsp, int level)
 	}
 }
 
-static void break_level2_to_none_async(struct server_id src, files_struct *fsp)
-{
-	struct smbd_server_connection *sconn = fsp->conn->sconn;
-	struct server_id self = messaging_server_id(sconn->msg_ctx);
-
-	if (fsp->oplock_type == NO_OPLOCK) {
-		/* We already got a "break to none" message and we've handled
-		 * it.  just ignore. */
-		DEBUG(3, ("process_oplock_async_level2_break_message: already "
-			  "broken to none, ignoring.\n"));
-		return;
-	}
-
-	/* Ensure we're really at level2 state. */
-	SMB_ASSERT(fsp->oplock_type == LEVEL_II_OPLOCK);
-
-	DEBUG(10,("process_oplock_async_level2_break_message: sending break "
-		  "to none message for %s, file %s\n", fsp_fnum_dbg(fsp),
-		  fsp_str_dbg(fsp)));
-
-	/* Need to wait before sending a break
-	   message if we sent ourselves this message. */
-	if (serverid_equal(&self, &src)) {
-		wait_before_sending_break();
-	}
-
-	/* Now send a break to none message to our client. */
-	if (sconn->using_smb2) {
-		send_break_message_smb2(fsp, OPLOCKLEVEL_NONE);
-	} else {
-		send_break_message_smb1(fsp, OPLOCKLEVEL_NONE);
-	}
-
-	/* Async level2 request, don't send a reply, just remove the oplock. */
-	remove_oplock(fsp);
-}
-
 /*******************************************************************
  This handles the case of a write triggering a break to none
  message on a level2 oplock.
@@ -465,6 +428,7 @@ static void process_oplock_async_level2_break_message(struct messaging_context *
 	struct smbd_server_connection *sconn =
 		talloc_get_type_abort(private_data,
 		struct smbd_server_connection);
+	struct server_id self = messaging_server_id(sconn->msg_ctx);
 
 	if (data->data == NULL) {
 		DEBUG(0, ("Got NULL buffer\n"));
@@ -494,7 +458,37 @@ static void process_oplock_async_level2_break_message(struct messaging_context *
 		return;
 	}
 
-	break_level2_to_none_async(src, fsp);
+
+	if (fsp->oplock_type == NO_OPLOCK) {
+		/* We already got a "break to none" message and we've handled
+		 * it.  just ignore. */
+		DEBUG(3, ("process_oplock_async_level2_break_message: already "
+			  "broken to none, ignoring.\n"));
+		return;
+	}
+
+	/* Ensure we're really at level2 state. */
+	SMB_ASSERT(fsp->oplock_type == LEVEL_II_OPLOCK);
+
+	DEBUG(10,("process_oplock_async_level2_break_message: sending break "
+		  "to none message for %s, file %s\n", fsp_fnum_dbg(fsp),
+		  fsp_str_dbg(fsp)));
+
+	/* Need to wait before sending a break
+	   message if we sent ourselves this message. */
+	if (serverid_equal(&self, &src)) {
+		wait_before_sending_break();
+	}
+
+	/* Now send a break to none message to our client. */
+	if (sconn->using_smb2) {
+		send_break_message_smb2(fsp, OPLOCKLEVEL_NONE);
+	} else {
+		send_break_message_smb1(fsp, OPLOCKLEVEL_NONE);
+	}
+
+	/* Async level2 request, don't send a reply, just remove the oplock. */
+	remove_oplock(fsp);
 }
 
 /*******************************************************************
-- 
1.7.9.5



More information about the samba-technical mailing list