[PATCH] minor tdb fixes

Mon Jul 3 06:19:27 UTC 2017

Hi!

Attached is a small patchset that fixes the BUCKET() macro in tdb and the offset
calculations.

I stumpled upon this when looking at the `tdbtool DB list` output: the freelist
was showing the entries from another chain.

Turns out that the BUCKET(-1) macro returns wrong results because of sign
conversion, eg -1 is sign converted to an unsigned int 4294967295 and 4294967295
% 10 = 5. The expected result is -1, not 5.

This doesn't cause more severe issues because we consistenly lock the wrong
chain when trying to lock the freelist. It doesn't deadlock because in tdb_store
we lock the freelist before the hashchain. And finally in freelist.c we use the
FREELIST_TOP define directly, not the TDB_HASH_TOP macro that uses BUCKET.

Thoughts?

Cheerio!
-slow
-------------- next part --------------
From 9ec7578322d4eba46c2a42ea6faa6e3b8ee66fb5 Mon Sep 17 00:00:00 2001
From: Ralph Boehme <slow at samba.org>
Date: Sun, 2 Jul 2017 07:46:17 +0200
Subject: [PATCH 1/4] tdb: rename struct tdb_traverse_lock::hash member to
 bucket

The member stores the hashtable bucket, not the hash.

No change in behaviour.

Signed-off-by: Ralph Boehme <slow at samba.org>
---
 lib/tdb/common/tdb_private.h |  2 +-
 lib/tdb/common/traverse.c    | 40 ++++++++++++++++++++--------------------
 2 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/lib/tdb/common/tdb_private.h b/lib/tdb/common/tdb_private.h
index 7ff29aa..90b4193 100644
--- a/lib/tdb/common/tdb_private.h
+++ b/lib/tdb/common/tdb_private.h
@@ -178,7 +178,7 @@ struct tdb_lock_type {
 struct tdb_traverse_lock {
 	struct tdb_traverse_lock *next;
 	uint32_t off;
-	uint32_t hash;
+	uint32_t bucket;
 	int lock_rw;
 };
 
diff --git a/lib/tdb/common/traverse.c b/lib/tdb/common/traverse.c
index f33ef34..dc44b97 100644
--- a/lib/tdb/common/traverse.c
+++ b/lib/tdb/common/traverse.c
@@ -37,8 +37,8 @@ static tdb_off_t tdb_next_lock(struct tdb_context *tdb, struct tdb_traverse_lock
 	int want_next = (tlock->off != 0);
 
 	/* Lock each chain from the start one. */
-	for (; tlock->hash < tdb->hash_size; tlock->hash++) {
-		if (!tlock->off && tlock->hash != 0) {
+	for (; tlock->bucket < tdb->hash_size; tlock->bucket++) {
+		if (!tlock->off && tlock->bucket != 0) {
 			/* this is an optimisation for the common case where
 			   the hash chain is empty, which is particularly
 			   common for the use of tdb with ldb, where large
@@ -67,18 +67,18 @@ static tdb_off_t tdb_next_lock(struct tdb_context *tdb, struct tdb_traverse_lock
 			   factor of around 80 in speed on a linux 2.6.x
 			   system (testing using ldbtest).
 			*/
-			tdb->methods->next_hash_chain(tdb, &tlock->hash);
-			if (tlock->hash == tdb->hash_size) {
+			tdb->methods->next_hash_chain(tdb, &tlock->bucket);
+			if (tlock->bucket == tdb->hash_size) {
 				continue;
 			}
 		}
 
-		if (tdb_lock(tdb, tlock->hash, tlock->lock_rw) == -1)
+		if (tdb_lock(tdb, tlock->bucket, tlock->lock_rw) == -1)
 			return TDB_NEXT_LOCK_ERR;
 
 		/* No previous record?  Start at top of chain. */
 		if (!tlock->off) {
-			if (tdb_ofs_read(tdb, TDB_HASH_TOP(tlock->hash),
+			if (tdb_ofs_read(tdb, TDB_HASH_TOP(tlock->bucket),
 				     &tlock->off) == -1)
 				goto fail;
 		} else {
@@ -121,7 +121,7 @@ static tdb_off_t tdb_next_lock(struct tdb_context *tdb, struct tdb_traverse_lock
 			    tdb_do_delete(tdb, current, rec) != 0)
 				goto fail;
 		}
-		tdb_unlock(tdb, tlock->hash, tlock->lock_rw);
+		tdb_unlock(tdb, tlock->bucket, tlock->lock_rw);
 		want_next = 0;
 	}
 	/* We finished iteration without finding anything */
@@ -130,7 +130,7 @@ static tdb_off_t tdb_next_lock(struct tdb_context *tdb, struct tdb_traverse_lock
 
  fail:
 	tlock->off = 0;
-	if (tdb_unlock(tdb, tlock->hash, tlock->lock_rw) != 0)
+	if (tdb_unlock(tdb, tlock->bucket, tlock->lock_rw) != 0)
 		TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_next_lock: On error unlock failed!\n"));
 	return TDB_NEXT_LOCK_ERR;
 }
@@ -181,7 +181,7 @@ static int tdb_traverse_internal(struct tdb_context *tdb,
 
 			if (key.dptr == NULL) {
 				ret = -1;
-				if (tdb_unlock(tdb, tl->hash, tl->lock_rw)
+				if (tdb_unlock(tdb, tl->bucket, tl->lock_rw)
 				    != 0) {
 					goto out;
 				}
@@ -205,7 +205,7 @@ static int tdb_traverse_internal(struct tdb_context *tdb,
 					       key.dptr, full_len, 0);
 		if (nread == -1) {
 			ret = -1;
-			if (tdb_unlock(tdb, tl->hash, tl->lock_rw) != 0)
+			if (tdb_unlock(tdb, tl->bucket, tl->lock_rw) != 0)
 				goto out;
 			if (tdb_unlock_record(tdb, tl->off) != 0)
 				TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_traverse: key.dptr == NULL and unlock_record failed!\n"));
@@ -218,7 +218,7 @@ static int tdb_traverse_internal(struct tdb_context *tdb,
 		tdb_trace_1rec_retrec(tdb, "traverse", key, dbuf);
 
 		/* Drop chain lock, call out */
-		if (tdb_unlock(tdb, tl->hash, tl->lock_rw) != 0) {
+		if (tdb_unlock(tdb, tl->bucket, tl->lock_rw) != 0) {
 			ret = -1;
 			goto out;
 		}
@@ -322,7 +322,7 @@ _PUBLIC_ TDB_DATA tdb_firstkey(struct tdb_context *tdb)
 	/* release any old lock */
 	if (tdb_unlock_record(tdb, tdb->travlocks.off) != 0)
 		return tdb_null;
-	tdb->travlocks.off = tdb->travlocks.hash = 0;
+	tdb->travlocks.off = tdb->travlocks.bucket = 0;
 	tdb->travlocks.lock_rw = F_RDLCK;
 
 	/* Grab first record: locks chain and returned record. */
@@ -338,7 +338,7 @@ _PUBLIC_ TDB_DATA tdb_firstkey(struct tdb_context *tdb)
 	tdb_trace_retrec(tdb, "tdb_firstkey", key);
 
 	/* Unlock the hash chain of the record we just read. */
-	if (tdb_unlock(tdb, tdb->travlocks.hash, tdb->travlocks.lock_rw) != 0)
+	if (tdb_unlock(tdb, tdb->travlocks.bucket, tdb->travlocks.lock_rw) != 0)
 		TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_firstkey: error occurred while tdb_unlocking!\n"));
 	return key;
 }
@@ -346,7 +346,7 @@ _PUBLIC_ TDB_DATA tdb_firstkey(struct tdb_context *tdb)
 /* find the next entry in the database, returning its key */
 _PUBLIC_ TDB_DATA tdb_nextkey(struct tdb_context *tdb, TDB_DATA oldkey)
 {
-	uint32_t oldhash;
+	uint32_t oldbucket;
 	TDB_DATA key = tdb_null;
 	struct tdb_record rec;
 	unsigned char *k = NULL;
@@ -354,7 +354,7 @@ _PUBLIC_ TDB_DATA tdb_nextkey(struct tdb_context *tdb, TDB_DATA oldkey)
 
 	/* Is locked key the old key?  If so, traverse will be reliable. */
 	if (tdb->travlocks.off) {
-		if (tdb_lock(tdb,tdb->travlocks.hash,tdb->travlocks.lock_rw))
+		if (tdb_lock(tdb,tdb->travlocks.bucket,tdb->travlocks.lock_rw))
 			return tdb_null;
 		if (tdb_rec_read(tdb, tdb->travlocks.off, &rec) == -1
 		    || !(k = tdb_alloc_read(tdb,tdb->travlocks.off+sizeof(rec),
@@ -367,7 +367,7 @@ _PUBLIC_ TDB_DATA tdb_nextkey(struct tdb_context *tdb, TDB_DATA oldkey)
 				SAFE_FREE(k);
 				return tdb_null;
 			}
-			if (tdb_unlock(tdb, tdb->travlocks.hash, tdb->travlocks.lock_rw) != 0) {
+			if (tdb_unlock(tdb, tdb->travlocks.bucket, tdb->travlocks.lock_rw) != 0) {
 				SAFE_FREE(k);
 				return tdb_null;
 			}
@@ -384,13 +384,13 @@ _PUBLIC_ TDB_DATA tdb_nextkey(struct tdb_context *tdb, TDB_DATA oldkey)
 			tdb_trace_1rec_retrec(tdb, "tdb_nextkey", oldkey, tdb_null);
 			return tdb_null;
 		}
-		tdb->travlocks.hash = BUCKET(rec.full_hash);
+		tdb->travlocks.bucket = BUCKET(rec.full_hash);
 		if (tdb_lock_record(tdb, tdb->travlocks.off) != 0) {
 			TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_nextkey: lock_record failed (%s)!\n", strerror(errno)));
 			return tdb_null;
 		}
 	}
-	oldhash = tdb->travlocks.hash;
+	oldbucket = tdb->travlocks.bucket;
 
 	/* Grab next record: locks chain and returned record,
 	   unlocks old record */
@@ -400,11 +400,11 @@ _PUBLIC_ TDB_DATA tdb_nextkey(struct tdb_context *tdb, TDB_DATA oldkey)
 		key.dptr = tdb_alloc_read(tdb, tdb->travlocks.off+sizeof(rec),
 					  key.dsize);
 		/* Unlock the chain of this new record */
-		if (tdb_unlock(tdb, tdb->travlocks.hash, tdb->travlocks.lock_rw) != 0)
+		if (tdb_unlock(tdb, tdb->travlocks.bucket, tdb->travlocks.lock_rw) != 0)
 			TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_nextkey: WARNING tdb_unlock failed!\n"));
 	}
 	/* Unlock the chain of old record */
-	if (tdb_unlock(tdb, BUCKET(oldhash), tdb->travlocks.lock_rw) != 0)
+	if (tdb_unlock(tdb, oldbucket, tdb->travlocks.lock_rw) != 0)
 		TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_nextkey: WARNING tdb_unlock failed!\n"));
 	tdb_trace_1rec_retrec(tdb, "tdb_nextkey", oldkey, key);
 	return key;
-- 
2.9.4


From d0617a3be6900858fe76a9ec7806bc51d19db13d Mon Sep 17 00:00:00 2001
From: Ralph Boehme <slow at samba.org>
Date: Sun, 2 Jul 2017 14:56:11 +0200
Subject: [PATCH 2/4] tdb: avoid calling TDB_HASH_TOP in TDB_DATA_START

TDB_HASH_TOP uses tbd->hash_size which may still be set to 0 in
tdb_open_ex().

Signed-off-by: Ralph Boehme <slow at samba.org>
---
 lib/tdb/common/tdb_private.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/tdb/common/tdb_private.h b/lib/tdb/common/tdb_private.h
index 90b4193..d7f9879 100644
--- a/lib/tdb/common/tdb_private.h
+++ b/lib/tdb/common/tdb_private.h
@@ -63,7 +63,7 @@ typedef uint32_t tdb_off_t;
 #define TDB_BAD_MAGIC(r) ((r)->magic != TDB_MAGIC && !TDB_DEAD(r))
 #define TDB_HASH_TOP(hash) (FREELIST_TOP + (BUCKET(hash)+1)*sizeof(tdb_off_t))
 #define TDB_HASHTABLE_SIZE(tdb) ((tdb->hash_size+1)*sizeof(tdb_off_t))
-#define TDB_DATA_START(hash_size) (TDB_HASH_TOP(hash_size-1) + sizeof(tdb_off_t))
+#define TDB_DATA_START(hash_size) (FREELIST_TOP + (hash_size + 1) * sizeof(tdb_off_t))
 #define TDB_RECOVERY_HEAD offsetof(struct tdb_header, recovery_start)
 #define TDB_SEQNUM_OFS    offsetof(struct tdb_header, sequence_number)
 #define TDB_PAD_BYTE 0x42
-- 
2.9.4


From b6d90504631b0002e48ed3c45ea7afcf1c824cf7 Mon Sep 17 00:00:00 2001
From: Ralph Boehme <slow at samba.org>
Date: Sat, 1 Jul 2017 22:21:26 +0200
Subject: [PATCH 3/4] tbd: BUCKET(-1) returns wrong offset because
 tdb->hash_size is an unsigned int

The following C program demonstrates the issue:

  #include <stdio.h>
  #include <stdlib.h>
  #include <stdarg.h>
  #include <stdbool.h>
  #include <string.h>
  #include <unistd.h>
  #include <errno.h>
  #include <dirent.h>
  #include <sys/types.h>

  int main(int argc, char **argv)
  {
      int hash = -1;
      int tsize_signed = 10;
      unsigned int tsize_unsigned = 10;
      int bucket;

  #define BUCKET(hash, tsize) ((hash) % (tsize))

      bucket = BUCKET(hash, tsize_unsigned);
      printf("hash [%d] tsize [%d] bucket [%d]\n", hash, tsize_unsigned, bucket);

      bucket = BUCKET(hash, tsize_signed);
      printf("hash [%d] tsize [%d] bucket [%d]\n", hash, tsize_signed, bucket);

      return 0;
  }

Output:

$ ./tmp
hash [-1] tsize [10] bucket [5]
hash [-1] tsize [10] bucket [-1]

The first version is what the current BUCKET() macro does. As a result
we lock the hashtable chain in bucket 5, NOT the freelist.

-1 is sign converted to an unsigned int 4294967295 and

  4294967295 % 10 = 5.

As all callers will lock the same wrong list consistently locking is
still consistent.

Stumpled across this when looking at the output of `tdbtool DB list`
which always printed some other hashchain and not the freelist.

The freelist bucket offset computation doesn't use the BUCKET macro in
freelist.c (directly or indirectly) when working on the freelist, it
just directly uses the FREELIST_TOP define, so this problem only affects
tdbtool list.

Signed-off-by: Ralph Boehme <slow at samba.org>
---
 lib/tdb/common/tdb_private.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/tdb/common/tdb_private.h b/lib/tdb/common/tdb_private.h
index d7f9879..5387f6b 100644
--- a/lib/tdb/common/tdb_private.h
+++ b/lib/tdb/common/tdb_private.h
@@ -126,7 +126,7 @@ void tdb_trace_2rec_retrec(struct tdb_context *tdb, const char *op,
 #define SAFE_FREE(x) do { if ((x) != NULL) {free(x); (x)=NULL;} } while(0)
 #endif
 
-#define BUCKET(hash) ((hash) % tdb->hash_size)
+#define BUCKET(hash) ((hash) % ((ssize_t)tdb->hash_size))
 
 #define DOCONV() (tdb->flags & TDB_CONVERT)
 #define CONVERT(x) (DOCONV() ? tdb_convert(&x, sizeof(x)) : &x)
-- 
2.9.4


From 906aeea79701032b2c63c122d8b0b4ca8cb8da87 Mon Sep 17 00:00:00 2001
From: Ralph Boehme <slow at samba.org>
Date: Sun, 2 Jul 2017 11:41:43 +0200
Subject: [PATCH 4/4] tdb: fix hashtable bucket offset calculation

We consistently lock hash chains at the wrong offset because in lock.c
lock_offset() returns an offset 4 bytes below the real offset.

The hashchain array starting at FREELIST_TOP looks like this:

FREELIST_TOP + 0 = freelist
FREELIST_TOP + 4 = hashtbale list 0
FREELIST_TOP + 8 = hashtbale list 1
...

By replacing                          with

lock_offset(-1)                       => FREELIST_TOP
FREELIST_TOP                          => TDB_HASH_TOP(0)
lock_offset(hash)                     => TDB_HASH_TOP(hash)
lock_offset(BUCKET(tdb->hash_fn(&key) => TDB_HASH_TOP(tdb->hash_fn(&key)
lock_offset(tdb->hash_size)           => TDB_DATA_START(tdb->hash_size)

we calculate the correct offset.

Finally remove lock_offset() as it's not used anymore.

Signed-off-by: Ralph Boehme <slow at samba.org>
---
 lib/tdb/common/lock.c        | 42 ++++++++++++++++++------------------------
 lib/tdb/common/mutex.c       | 13 ++-----------
 lib/tdb/test/lock-tracking.c |  4 ++--
 3 files changed, 22 insertions(+), 37 deletions(-)

diff --git a/lib/tdb/common/lock.c b/lib/tdb/common/lock.c
index e330201..1944d42 100644
--- a/lib/tdb/common/lock.c
+++ b/lib/tdb/common/lock.c
@@ -137,12 +137,6 @@ static int fcntl_unlock(struct tdb_context *tdb, int rw, off_t off, off_t len)
 	return fcntl(tdb->fd, F_SETLKW, &fl);
 }
 
-/* list -1 is the alloc list, otherwise a hash chain. */
-static tdb_off_t lock_offset(int list)
-{
-	return FREELIST_TOP + 4*list;
-}
-
 /* a byte range locking function - return 0 on success
    this functions locks/unlocks "len" byte at the specified offset.
 
@@ -273,13 +267,13 @@ int tdb_allrecord_upgrade(struct tdb_context *tdb)
 		if (ret == -1) {
 			goto fail;
 		}
-		ret = tdb_brlock_retry(tdb, F_WRLCK, lock_offset(tdb->hash_size),
+		ret = tdb_brlock_retry(tdb, F_WRLCK, TDB_DATA_START(tdb->hash_size),
 				       0, TDB_LOCK_WAIT|TDB_LOCK_PROBE);
 		if (ret == -1) {
 			tdb_mutex_allrecord_downgrade(tdb);
 		}
 	} else {
-		ret = tdb_brlock_retry(tdb, F_WRLCK, FREELIST_TOP, 0,
+		ret = tdb_brlock_retry(tdb, F_WRLCK, TDB_HASH_TOP(0), 0,
 				       TDB_LOCK_WAIT|TDB_LOCK_PROBE);
 	}
 
@@ -312,7 +306,7 @@ int tdb_nest_lock(struct tdb_context *tdb, uint32_t offset, int ltype,
 {
 	struct tdb_lock_type *new_lck;
 
-	if (offset >= lock_offset(tdb->hash_size)) {
+	if (offset >= TDB_DATA_START(tdb->hash_size)) {
 		tdb->ecode = TDB_ERR_LOCK;
 		TDB_LOG((tdb, TDB_DEBUG_ERROR,"tdb_lock: invalid offset %u for ltype=%d\n",
 			 offset, ltype));
@@ -380,19 +374,19 @@ static int tdb_lock_and_recover(struct tdb_context *tdb)
 	int ret;
 
 	/* We need to match locking order in transaction commit. */
-	if (tdb_brlock(tdb, F_WRLCK, FREELIST_TOP, 0, TDB_LOCK_WAIT)) {
+	if (tdb_brlock(tdb, F_WRLCK, TDB_HASH_TOP(0), 0, TDB_LOCK_WAIT)) {
 		return -1;
 	}
 
 	if (tdb_brlock(tdb, F_WRLCK, OPEN_LOCK, 1, TDB_LOCK_WAIT)) {
-		tdb_brunlock(tdb, F_WRLCK, FREELIST_TOP, 0);
+		tdb_brunlock(tdb, F_WRLCK, TDB_HASH_TOP(0), 0);
 		return -1;
 	}
 
 	ret = tdb_transaction_recover(tdb);
 
 	tdb_brunlock(tdb, F_WRLCK, OPEN_LOCK, 1);
-	tdb_brunlock(tdb, F_WRLCK, FREELIST_TOP, 0);
+	tdb_brunlock(tdb, F_WRLCK, TDB_HASH_TOP(0), 0);
 
 	return ret;
 }
@@ -402,7 +396,7 @@ static bool have_data_locks(const struct tdb_context *tdb)
 	int i;
 
 	for (i = 0; i < tdb->num_lockrecs; i++) {
-		if (tdb->lockrecs[i].off >= lock_offset(-1))
+		if (tdb->lockrecs[i].off >= FREELIST_TOP)
 			return true;
 	}
 	return false;
@@ -453,10 +447,10 @@ static int tdb_lock_list(struct tdb_context *tdb, int list, int ltype,
 	 * during a commit.
 	 */
 	check = !have_data_locks(tdb);
-	ret = tdb_nest_lock(tdb, lock_offset(list), ltype, waitflag);
+	ret = tdb_nest_lock(tdb, TDB_HASH_TOP(list), ltype, waitflag);
 
 	if (ret == 0 && check && tdb_needs_recovery(tdb)) {
-		tdb_nest_unlock(tdb, lock_offset(list), ltype, false);
+		tdb_nest_unlock(tdb, TDB_HASH_TOP(list), ltype, false);
 
 		if (tdb_lock_and_recover(tdb) == -1) {
 			return -1;
@@ -496,7 +490,7 @@ int tdb_nest_unlock(struct tdb_context *tdb, uint32_t offset, int ltype,
 		return 0;
 
 	/* Sanity checks */
-	if (offset >= lock_offset(tdb->hash_size)) {
+	if (offset >= TDB_DATA_START(tdb->hash_size)) {
 		TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_unlock: offset %u invalid (%d)\n", offset, tdb->hash_size));
 		return ret;
 	}
@@ -548,7 +542,7 @@ _PUBLIC_ int tdb_unlock(struct tdb_context *tdb, int list, int ltype)
 		return tdb_lock_covered_by_allrecord_lock(tdb, ltype);
 	}
 
-	return tdb_nest_unlock(tdb, lock_offset(list), ltype, false);
+	return tdb_nest_unlock(tdb, TDB_HASH_TOP(list), ltype, false);
 }
 
 /*
@@ -664,7 +658,7 @@ int tdb_allrecord_lock(struct tdb_context *tdb, int ltype,
 	if (tdb_have_mutexes(tdb)) {
 		ret = tdb_mutex_allrecord_lock(tdb, ltype, flags);
 	} else {
-		ret = tdb_chainlock_gradual(tdb, ltype, flags, FREELIST_TOP,
+		ret = tdb_chainlock_gradual(tdb, ltype, flags, TDB_HASH_TOP(0),
 					    tdb->hash_size * 4);
 	}
 
@@ -673,12 +667,12 @@ int tdb_allrecord_lock(struct tdb_context *tdb, int ltype,
 	}
 
 	/* Grab individual record locks. */
-	if (tdb_brlock(tdb, ltype, lock_offset(tdb->hash_size), 0,
+	if (tdb_brlock(tdb, ltype, TDB_DATA_START(tdb->hash_size), 0,
 		       flags) == -1) {
 		if (tdb_have_mutexes(tdb)) {
 			tdb_mutex_allrecord_unlock(tdb);
 		} else {
-			tdb_brunlock(tdb, ltype, FREELIST_TOP,
+			tdb_brunlock(tdb, ltype, TDB_HASH_TOP(0),
 				     tdb->hash_size * 4);
 		}
 		return -1;
@@ -743,11 +737,11 @@ int tdb_allrecord_unlock(struct tdb_context *tdb, int ltype, bool mark_lock)
 			ret = tdb_mutex_allrecord_unlock(tdb);
 			if (ret == 0) {
 				ret = tdb_brunlock(tdb, ltype,
-						   lock_offset(tdb->hash_size),
+						   TDB_DATA_START(tdb->hash_size),
 						   0);
 			}
 		} else {
-			ret = tdb_brunlock(tdb, ltype, FREELIST_TOP, 0);
+			ret = tdb_brunlock(tdb, ltype, TDB_HASH_TOP(0), 0);
 		}
 
 		if (ret != 0) {
@@ -843,7 +837,7 @@ _PUBLIC_ int tdb_chainlock_nonblock(struct tdb_context *tdb, TDB_DATA key)
 /* mark a chain as locked without actually locking it. Warning! use with great caution! */
 _PUBLIC_ int tdb_chainlock_mark(struct tdb_context *tdb, TDB_DATA key)
 {
-	int ret = tdb_nest_lock(tdb, lock_offset(BUCKET(tdb->hash_fn(&key))),
+	int ret = tdb_nest_lock(tdb, TDB_HASH_TOP(tdb->hash_fn(&key)),
 				F_WRLCK, TDB_LOCK_MARK_ONLY);
 	tdb_trace_1rec(tdb, "tdb_chainlock_mark", key);
 	return ret;
@@ -853,7 +847,7 @@ _PUBLIC_ int tdb_chainlock_mark(struct tdb_context *tdb, TDB_DATA key)
 _PUBLIC_ int tdb_chainlock_unmark(struct tdb_context *tdb, TDB_DATA key)
 {
 	tdb_trace_1rec(tdb, "tdb_chainlock_unmark", key);
-	return tdb_nest_unlock(tdb, lock_offset(BUCKET(tdb->hash_fn(&key))),
+	return tdb_nest_unlock(tdb, TDB_HASH_TOP(tdb->hash_fn(&key)),
 			       F_WRLCK, true);
 }
 
diff --git a/lib/tdb/common/mutex.c b/lib/tdb/common/mutex.c
index 8a122d5..896d80d 100644
--- a/lib/tdb/common/mutex.c
+++ b/lib/tdb/common/mutex.c
@@ -100,15 +100,6 @@ size_t tdb_mutex_size(struct tdb_context *tdb)
 static bool tdb_mutex_index(struct tdb_context *tdb, off_t off, off_t len,
 			    unsigned *idx)
 {
-	/*
-	 * Weird but true: We fcntl lock 1 byte at an offset 4 bytes before
-	 * the 4 bytes of the freelist start and the hash chain that is about
-	 * to be locked. See lock_offset() where the freelist is -1 vs the
-	 * "+1" in TDB_HASH_TOP(). Because the mutex array is represented in
-	 * the tdb file itself as data, we need to adjust the offset here.
-	 */
-	const off_t freelist_lock_ofs = FREELIST_TOP - sizeof(tdb_off_t);
-
 	if (!tdb_have_mutexes(tdb)) {
 		return false;
 	}
@@ -116,7 +107,7 @@ static bool tdb_mutex_index(struct tdb_context *tdb, off_t off, off_t len,
 		/* Possibly the allrecord lock */
 		return false;
 	}
-	if (off < freelist_lock_ofs) {
+	if (off < FREELIST_TOP) {
 		/* One of the special locks */
 		return false;
 	}
@@ -140,7 +131,7 @@ static bool tdb_mutex_index(struct tdb_context *tdb, off_t off, off_t len,
 	/*
 	 * Re-index the fcntl offset into an offset into the mutex array
 	 */
-	off -= freelist_lock_ofs; /* rebase to index 0 */
+	off -= FREELIST_TOP; /* rebase to index 0 */
 	off /= sizeof(tdb_off_t); /* 0 for freelist 1-n for hashchain */
 
 	*idx = off;
diff --git a/lib/tdb/test/lock-tracking.c b/lib/tdb/test/lock-tracking.c
index b2f092c..e41a7a1 100644
--- a/lib/tdb/test/lock-tracking.c
+++ b/lib/tdb/test/lock-tracking.c
@@ -112,8 +112,8 @@ int fcntl_with_lockcheck(int fd, int cmd, ... /* arg */ )
 		if (i) {
 			/* Special case: upgrade of allrecord lock. */
 			if (i->type == F_RDLCK && fl->l_type == F_WRLCK
-			    && i->off == FREELIST_TOP
-			    && fl->l_start == FREELIST_TOP
+			    && i->off == (FREELIST_TOP + sizeof(tdb_off_t))
+			    && fl->l_start == (FREELIST_TOP + sizeof(tdb_off_t))
 			    && i->len == 0
 			    && fl->l_len == 0) {
 				if (ret == 0)
-- 
2.9.4