[PATCH] Streamline gencache

Volker Lendecke Volker.Lendecke at SerNet.DE
Fri Nov 2 16:14:48 UTC 2018


Hi!

Attached find some significant improvements to gencache, in particular
for both large installations and small ones with a slow system disk.

Review appreciated!

Thanks, Volker

-- 
SerNet GmbH, Bahnhofsallee 1b, 37081 Göttingen
phone: +49-551-370000-0, fax: +49-551-370000-9
AG Göttingen, HRB 2816, GF: Dr. Johannes Loxen
http://www.sernet.de, mailto:kontakt at sernet.de
-------------- next part --------------
From 723597bd9b47735e79e9f7dcd1ed8948111ab798 Mon Sep 17 00:00:00 2001
From: Volker Lendecke <vl at samba.org>
Date: Sun, 28 Oct 2018 09:06:39 +0100
Subject: [PATCH 1/9] tdb: Add tdb_traverse_chain

This is a lightweight readonly traverse of a single chain, see the
comment in the header file.

Signed-off-by: Volker Lendecke <vl at samba.org>
---
 lib/tdb/common/traverse.c | 110 ++++++++++++++++++++++++++++++++++++++++++++++
 lib/tdb/include/tdb.h     |  67 ++++++++++++++++++++++++++++
 2 files changed, 177 insertions(+)

diff --git a/lib/tdb/common/traverse.c b/lib/tdb/common/traverse.c
index a9af1d4b824..54a69dc8d03 100644
--- a/lib/tdb/common/traverse.c
+++ b/lib/tdb/common/traverse.c
@@ -399,3 +399,113 @@ _PUBLIC_ TDB_DATA tdb_nextkey(struct tdb_context *tdb, TDB_DATA oldkey)
 	return key;
 }
 
+_PUBLIC_ int tdb_traverse_chain(struct tdb_context *tdb,
+				unsigned chain,
+				tdb_traverse_func fn,
+				void *private_data)
+{
+	tdb_off_t rec_ptr;
+	struct tdb_chainwalk_ctx chainwalk;
+	int count = 0;
+	int ret;
+
+	if (chain >= tdb->hash_size) {
+		tdb->ecode = TDB_ERR_EINVAL;
+		return -1;
+	}
+
+	if (tdb->traverse_read != 0) {
+		tdb->ecode = TDB_ERR_LOCK;
+		return -1;
+	}
+
+	ret = tdb_lock(tdb, chain, F_RDLCK);
+	if (ret == -1) {
+		return -1;
+	}
+
+	tdb->traverse_read += 1;
+
+	ret = tdb_ofs_read(tdb, TDB_HASH_TOP(chain), &rec_ptr);
+	if (ret == -1) {
+		goto fail;
+	}
+
+	tdb_chainwalk_init(&chainwalk, rec_ptr);
+
+	while (rec_ptr != 0) {
+		struct tdb_record rec;
+		bool ok;
+
+		ret = tdb_rec_read(tdb, rec_ptr, &rec);
+		if (ret == -1) {
+			goto fail;
+		}
+
+		if (!TDB_DEAD(&rec)) {
+			/* no overflow checks, tdb_rec_read checked it */
+			tdb_off_t key_ofs = rec_ptr + sizeof(rec);
+			size_t full_len = rec.key_len + rec.data_len;
+			uint8_t *buf = NULL;
+
+			TDB_DATA key = { .dsize = rec.key_len };
+			TDB_DATA data = { .dsize = rec.data_len };
+
+			if ((tdb->transaction == NULL) &&
+			    (tdb->map_ptr != NULL)) {
+				ret = tdb->methods->tdb_oob(
+					tdb, key_ofs, full_len, 0);
+				if (ret == -1) {
+					goto fail;
+				}
+				key.dptr = (uint8_t *)tdb->map_ptr + key_ofs;
+			} else {
+				buf = tdb_alloc_read(tdb, key_ofs, full_len);
+				if (buf == NULL) {
+					goto fail;
+				}
+				key.dptr = buf;
+			}
+			data.dptr = key.dptr + key.dsize;
+
+			ret = fn(tdb, key, data, private_data);
+			free(buf);
+
+			count += 1;
+
+			if (ret != 0) {
+				break;
+			}
+		}
+
+		rec_ptr = rec.next;
+
+		ok = tdb_chainwalk_check(tdb, &chainwalk, rec_ptr);
+		if (!ok) {
+			goto fail;
+		}
+	}
+	tdb->traverse_read -= 1;
+	tdb_unlock(tdb, chain, F_RDLCK);
+	return count;
+
+fail:
+	tdb->traverse_read -= 1;
+	tdb_unlock(tdb, chain, F_RDLCK);
+	return -1;
+}
+
+_PUBLIC_ int tdb_traverse_key_chain(struct tdb_context *tdb,
+				    TDB_DATA key,
+				    tdb_traverse_func fn,
+				    void *private_data)
+{
+	uint32_t hash, chain;
+	int ret;
+
+	hash = tdb->hash_fn(&key);
+	chain = BUCKET(hash);
+	ret = tdb_traverse_chain(tdb, chain, fn, private_data);
+
+	return ret;
+}
diff --git a/lib/tdb/include/tdb.h b/lib/tdb/include/tdb.h
index 535f07ac00a..445f48c1fd9 100644
--- a/lib/tdb/include/tdb.h
+++ b/lib/tdb/include/tdb.h
@@ -481,6 +481,73 @@ int tdb_traverse(struct tdb_context *tdb, tdb_traverse_func fn, void *private_da
 int tdb_traverse_read(struct tdb_context *tdb, tdb_traverse_func fn, void *private_data);
 
 /**
+ * @brief Traverse a single hash chain
+ *
+ * Traverse a single hash chain under a single lock operation. No
+ * database modification is possible in the callback.
+ *
+ * This exists for background cleanup of databases. In normal
+ * operations, traversing a complete database can be much too
+ * expensive. Databases can have many chains, which will all have to
+ * be looked at before tdb_traverse finishes. Also tdb_traverse does a
+ * lot of fcntl activity to protect against concurrent record deletes.
+ *
+ * With this you can walk a fraction of the whole tdb, collect the
+ * entries you want to prune, leave the traverse, and then modify or
+ * delete the records in a subsequent step.
+ *
+ * To walk the entire database, call this function tdb_hash_size()
+ * times, with 0<=chain<tdb_hash_size(tdb).
+ *
+ * @param[in]  tdb      The database to traverse.
+ *
+ * @param[in]  chain    The hash chain number to traverse.
+ *
+ * @param[in]  fn       The function to call on each entry.
+ *
+ * @param[in]  private_data The private data which should be passed to the
+ *                          traversing function.
+ *
+ * @return              The record count traversed, -1 on error.
+ */
+
+int tdb_traverse_chain(struct tdb_context *tdb,
+		       unsigned chain,
+		       tdb_traverse_func fn,
+		       void *private_data);
+
+/**
+ * @brief Traverse a single hash chain
+ *
+ * This is like tdb_traverse_chain(), but for all records that are in
+ * the same chain as the record corresponding to the key parameter.
+ *
+ * Use it for ongoing database maintenance under a lock. Whenever you
+ * are about to modify a database, you know which record you are going
+ * to write to. For that tdb_store(), an exclusive chainlock is taken
+ * behind the scenes. To utilize this exclusive lock for incremental
+ * database cleanup as well, tdb_chainlock() the key you are about to
+ * modify, then tdb_traverse_key_chain() to do the incremental
+ * maintenance, modify your record and tdb_chainunlock() the key
+ * again.
+ *
+ * @param[in]  tdb      The database to traverse.
+ *
+ * @param[in]  key      The record key to walk the chain for.
+ *
+ * @param[in]  fn       The function to call on each entry.
+ *
+ * @param[in]  private_data The private data which should be passed to the
+ *                          traversing function.
+ *
+ * @return              The record count traversed, -1 on error.
+ */
+
+int tdb_traverse_key_chain(struct tdb_context *tdb,
+			   TDB_DATA key,
+			   tdb_traverse_func fn,
+			   void *private_data);
+/**
  * @brief Check if an entry in the database exists.
  *
  * @note 1 is returned if the key is found and 0 is returned if not found this
-- 
2.11.0


From 4a83f569412e27260f0f3edcfbb6abf031841904 Mon Sep 17 00:00:00 2001
From: Volker Lendecke <vl at samba.org>
Date: Sun, 7 Oct 2018 22:03:09 +0200
Subject: [PATCH 2/9] tdb: Add test for tdb_traverse_chain

Signed-off-by: Volker Lendecke <vl at samba.org>
---
 lib/tdb/test/run-traverse-chain.c | 94 +++++++++++++++++++++++++++++++++++++++
 lib/tdb/wscript                   |  1 +
 2 files changed, 95 insertions(+)
 create mode 100644 lib/tdb/test/run-traverse-chain.c

diff --git a/lib/tdb/test/run-traverse-chain.c b/lib/tdb/test/run-traverse-chain.c
new file mode 100644
index 00000000000..2a25beca3f0
--- /dev/null
+++ b/lib/tdb/test/run-traverse-chain.c
@@ -0,0 +1,94 @@
+#include "../common/tdb_private.h"
+#include "../common/io.c"
+#include "../common/tdb.c"
+#include "../common/lock.c"
+#include "../common/freelist.c"
+#include "../common/traverse.c"
+#include "../common/transaction.c"
+#include "../common/error.c"
+#include "../common/open.c"
+#include "../common/check.c"
+#include "../common/hash.c"
+#include "../common/mutex.c"
+#include "tap-interface.h"
+#include <stdlib.h>
+#include "logging.h"
+
+static char keystr0[] = "x";
+static TDB_DATA key0 = { .dptr = (uint8_t *)keystr0,
+			 .dsize = sizeof(keystr0) };
+static char valuestr0[] = "y";
+static TDB_DATA value0 = { .dptr = (uint8_t *)valuestr0,
+			   .dsize = sizeof(valuestr0) };
+
+static char keystr1[] = "aaa";
+static TDB_DATA key1 = { .dptr = (uint8_t *)keystr1,
+			 .dsize = sizeof(keystr1) };
+static char valuestr1[] = "bbbbb";
+static TDB_DATA value1 = { .dptr = (uint8_t *)valuestr1,
+			   .dsize = sizeof(valuestr1) };
+
+static TDB_DATA *keys[] = { &key0, &key1 };
+static TDB_DATA *values[] = { &value0, &value1 };
+
+static bool tdb_data_same(TDB_DATA d1, TDB_DATA d2)
+{
+	if (d1.dsize != d2.dsize) {
+		return false;
+	}
+	return (memcmp(d1.dptr, d2.dptr, d1.dsize) == 0);
+}
+
+struct traverse_chain_state {
+	size_t idx;
+	bool ok;
+};
+
+static int traverse_chain_fn(struct tdb_context *tdb,
+			     TDB_DATA key,
+			     TDB_DATA data,
+			     void *private_data)
+{
+	struct traverse_chain_state *state = private_data;
+
+	state->ok &= tdb_data_same(key, *keys[state->idx]);
+	state->ok &= tdb_data_same(data, *values[state->idx]);
+	state->idx += 1;
+
+	return 0;
+}
+
+int main(int argc, char *argv[])
+{
+	struct tdb_context *tdb;
+	struct traverse_chain_state state = { .ok = true };
+	int ret;
+
+	plan_tests(4);
+
+	tdb = tdb_open_ex(
+		"traverse_chain.tdb",
+		1,
+		TDB_CLEAR_IF_FIRST,
+		O_RDWR|O_CREAT,
+		0600,
+		&taplogctx,
+		NULL);
+	ok1(tdb);
+
+	/* add in reverse order, tdb_store adds to the front of the list */
+	ret = tdb_store(tdb, key1, value1, TDB_INSERT);
+	ok1(ret == 0);
+	ret = tdb_store(tdb, key0, value0, TDB_INSERT);
+	ok1(ret == 0);
+
+	ret = tdb_traverse_key_chain(tdb, key0, traverse_chain_fn, &state);
+	ok1(ret == 2);
+	ok1(state.ok);
+
+	unlink(tdb_name(tdb));
+
+	tdb_close(tdb);
+
+	return exit_status();
+}
diff --git a/lib/tdb/wscript b/lib/tdb/wscript
index 3ab21fcebc1..483d8e0ebec 100644
--- a/lib/tdb/wscript
+++ b/lib/tdb/wscript
@@ -57,6 +57,7 @@ tdb1_unit_tests = [
     'run-mutex1',
     'run-circular-chain',
     'run-circular-freelist',
+    'run-traverse-chain',
 ]
 
 def options(opt):
-- 
2.11.0


From c15081209e60090f18a7b4b137079efbde14a867 Mon Sep 17 00:00:00 2001
From: Volker Lendecke <vl at samba.org>
Date: Mon, 29 Oct 2018 07:43:43 +0100
Subject: [PATCH 3/9] tdb: Version 1.3.17 for tdb_traverse_chain

Signed-off-by: Volker Lendecke <vl at samba.org>
---
 lib/tdb/ABI/tdb-1.3.17.sigs | 73 +++++++++++++++++++++++++++++++++++++++++++++
 lib/tdb/wscript             |  2 +-
 2 files changed, 74 insertions(+), 1 deletion(-)
 create mode 100644 lib/tdb/ABI/tdb-1.3.17.sigs

diff --git a/lib/tdb/ABI/tdb-1.3.17.sigs b/lib/tdb/ABI/tdb-1.3.17.sigs
new file mode 100644
index 00000000000..e2b0427c347
--- /dev/null
+++ b/lib/tdb/ABI/tdb-1.3.17.sigs
@@ -0,0 +1,73 @@
+tdb_add_flags: void (struct tdb_context *, unsigned int)
+tdb_append: int (struct tdb_context *, TDB_DATA, TDB_DATA)
+tdb_chainlock: int (struct tdb_context *, TDB_DATA)
+tdb_chainlock_mark: int (struct tdb_context *, TDB_DATA)
+tdb_chainlock_nonblock: int (struct tdb_context *, TDB_DATA)
+tdb_chainlock_read: int (struct tdb_context *, TDB_DATA)
+tdb_chainlock_read_nonblock: int (struct tdb_context *, TDB_DATA)
+tdb_chainlock_unmark: int (struct tdb_context *, TDB_DATA)
+tdb_chainunlock: int (struct tdb_context *, TDB_DATA)
+tdb_chainunlock_read: int (struct tdb_context *, TDB_DATA)
+tdb_check: int (struct tdb_context *, int (*)(TDB_DATA, TDB_DATA, void *), void *)
+tdb_close: int (struct tdb_context *)
+tdb_delete: int (struct tdb_context *, TDB_DATA)
+tdb_dump_all: void (struct tdb_context *)
+tdb_enable_seqnum: void (struct tdb_context *)
+tdb_error: enum TDB_ERROR (struct tdb_context *)
+tdb_errorstr: const char *(struct tdb_context *)
+tdb_exists: int (struct tdb_context *, TDB_DATA)
+tdb_fd: int (struct tdb_context *)
+tdb_fetch: TDB_DATA (struct tdb_context *, TDB_DATA)
+tdb_firstkey: TDB_DATA (struct tdb_context *)
+tdb_freelist_size: int (struct tdb_context *)
+tdb_get_flags: int (struct tdb_context *)
+tdb_get_logging_private: void *(struct tdb_context *)
+tdb_get_seqnum: int (struct tdb_context *)
+tdb_hash_size: int (struct tdb_context *)
+tdb_increment_seqnum_nonblock: void (struct tdb_context *)
+tdb_jenkins_hash: unsigned int (TDB_DATA *)
+tdb_lock_nonblock: int (struct tdb_context *, int, int)
+tdb_lockall: int (struct tdb_context *)
+tdb_lockall_mark: int (struct tdb_context *)
+tdb_lockall_nonblock: int (struct tdb_context *)
+tdb_lockall_read: int (struct tdb_context *)
+tdb_lockall_read_nonblock: int (struct tdb_context *)
+tdb_lockall_unmark: int (struct tdb_context *)
+tdb_log_fn: tdb_log_func (struct tdb_context *)
+tdb_map_size: size_t (struct tdb_context *)
+tdb_name: const char *(struct tdb_context *)
+tdb_nextkey: TDB_DATA (struct tdb_context *, TDB_DATA)
+tdb_null: dptr = 0xXXXX, dsize = 0
+tdb_open: struct tdb_context *(const char *, int, int, int, mode_t)
+tdb_open_ex: struct tdb_context *(const char *, int, int, int, mode_t, const struct tdb_logging_context *, tdb_hash_func)
+tdb_parse_record: int (struct tdb_context *, TDB_DATA, int (*)(TDB_DATA, TDB_DATA, void *), void *)
+tdb_printfreelist: int (struct tdb_context *)
+tdb_remove_flags: void (struct tdb_context *, unsigned int)
+tdb_reopen: int (struct tdb_context *)
+tdb_reopen_all: int (int)
+tdb_repack: int (struct tdb_context *)
+tdb_rescue: int (struct tdb_context *, void (*)(TDB_DATA, TDB_DATA, void *), void *)
+tdb_runtime_check_for_robust_mutexes: bool (void)
+tdb_set_logging_function: void (struct tdb_context *, const struct tdb_logging_context *)
+tdb_set_max_dead: void (struct tdb_context *, int)
+tdb_setalarm_sigptr: void (struct tdb_context *, volatile sig_atomic_t *)
+tdb_store: int (struct tdb_context *, TDB_DATA, TDB_DATA, int)
+tdb_storev: int (struct tdb_context *, TDB_DATA, const TDB_DATA *, int, int)
+tdb_summary: char *(struct tdb_context *)
+tdb_transaction_active: bool (struct tdb_context *)
+tdb_transaction_cancel: int (struct tdb_context *)
+tdb_transaction_commit: int (struct tdb_context *)
+tdb_transaction_prepare_commit: int (struct tdb_context *)
+tdb_transaction_start: int (struct tdb_context *)
+tdb_transaction_start_nonblock: int (struct tdb_context *)
+tdb_transaction_write_lock_mark: int (struct tdb_context *)
+tdb_transaction_write_lock_unmark: int (struct tdb_context *)
+tdb_traverse: int (struct tdb_context *, tdb_traverse_func, void *)
+tdb_traverse_chain: int (struct tdb_context *, unsigned int, tdb_traverse_func, void *)
+tdb_traverse_key_chain: int (struct tdb_context *, TDB_DATA, tdb_traverse_func, void *)
+tdb_traverse_read: int (struct tdb_context *, tdb_traverse_func, void *)
+tdb_unlock: int (struct tdb_context *, int, int)
+tdb_unlockall: int (struct tdb_context *)
+tdb_unlockall_read: int (struct tdb_context *)
+tdb_validate_freelist: int (struct tdb_context *, int *)
+tdb_wipe_all: int (struct tdb_context *)
diff --git a/lib/tdb/wscript b/lib/tdb/wscript
index 483d8e0ebec..6a6adabc4c1 100644
--- a/lib/tdb/wscript
+++ b/lib/tdb/wscript
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 
 APPNAME = 'tdb'
-VERSION = '1.3.16'
+VERSION = '1.3.17'
 
 import sys, os
 
-- 
2.11.0


From 3488b6fc3324eceac763d3b0c156e91c52fbc4ce Mon Sep 17 00:00:00 2001
From: Volker Lendecke <vl at samba.org>
Date: Mon, 22 Oct 2018 08:57:00 +0200
Subject: [PATCH 4/9] tdb: Allow !CLEAR_IF_FIRST & MUTEX_LOCKING

This is a prerequisite to allow gencache to run on a non-transactioned
database with mutexes.

Signed-off-by: Volker Lendecke <vl at samba.org>
---
 lib/tdb/common/open.c               | 91 +++++++++++++++++--------------------
 lib/tdb/test/run-mutex-openflags2.c |  7 ---
 2 files changed, 42 insertions(+), 56 deletions(-)

diff --git a/lib/tdb/common/open.c b/lib/tdb/common/open.c
index 899a2fcaffb..be5f8075557 100644
--- a/lib/tdb/common/open.c
+++ b/lib/tdb/common/open.c
@@ -230,8 +230,6 @@ static bool check_header_hash(struct tdb_context *tdb,
 static bool tdb_mutex_open_ok(struct tdb_context *tdb,
 			      const struct tdb_header *header)
 {
-	int locked;
-
 	if (tdb->flags & TDB_NOLOCK) {
 		/*
 		 * We don't look at locks, so it does not matter to have a
@@ -240,37 +238,6 @@ static bool tdb_mutex_open_ok(struct tdb_context *tdb,
 		return true;
 	}
 
-	locked = tdb_nest_lock(tdb, ACTIVE_LOCK, F_WRLCK,
-			       TDB_LOCK_NOWAIT|TDB_LOCK_PROBE);
-
-	if ((locked == -1) && (tdb->ecode == TDB_ERR_LOCK)) {
-		/*
-		 * CLEAR_IF_FIRST still active. The tdb was created on this
-		 * host, so we can assume the mutex implementation is
-		 * compatible. Important for tools like tdbdump on a still
-		 * open locking.tdb.
-		 */
-		goto check_local_settings;
-	}
-
-	/*
-	 * We got the CLEAR_IF_FIRST lock. That means the database was
-	 * potentially copied from somewhere else. The mutex implementation
-	 * might be incompatible.
-	 */
-
-	if (tdb_nest_unlock(tdb, ACTIVE_LOCK, F_WRLCK, false) == -1) {
-		/*
-		 * Should not happen
-		 */
-		TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_mutex_open_ok: "
-			 "failed to release ACTIVE_LOCK on %s: %s\n",
-			 tdb->name, strerror(errno)));
-		return false;
-	}
-
-check_local_settings:
-
 	if (!(tdb->flags & TDB_MUTEX_LOCKING)) {
 		TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_mutex_open_ok[%s]: "
 			 "Can use mutexes only with "
@@ -416,14 +383,6 @@ _PUBLIC_ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int td
 		 * the runtime check for existing tdb's comes later.
 		 */
 
-		if (!(tdb->flags & TDB_CLEAR_IF_FIRST)) {
-			TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: "
-				"invalid flags for %s - TDB_MUTEX_LOCKING "
-				"requires TDB_CLEAR_IF_FIRST\n", name));
-			errno = EINVAL;
-			goto fail;
-		}
-
 		if (tdb->flags & TDB_INTERNAL) {
 			TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: "
 				"invalid flags for %s - TDB_MUTEX_LOCKING and "
@@ -632,6 +591,30 @@ _PUBLIC_ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int td
 		 * mutex locking.
 		 */
 		tdb->hdr_ofs = header.mutex_size;
+
+		if ((!(tdb_flags & TDB_CLEAR_IF_FIRST)) && (!tdb->read_only)) {
+			/*
+			 * Open an existing mutexed tdb, but without
+			 * CLEAR_IF_FIRST. We need to initialize the
+			 * mutex array and keep the CLEAR_IF_FIRST
+			 * lock locked.
+			 */
+			ret = tdb_nest_lock(tdb, ACTIVE_LOCK, F_WRLCK,
+					    TDB_LOCK_NOWAIT|TDB_LOCK_PROBE);
+			locked = (ret == 0);
+
+			if (locked) {
+				ret = tdb_mutex_init(tdb);
+				if (ret == -1) {
+					TDB_LOG((tdb,
+						 TDB_DEBUG_FATAL,
+						 "tdb_open_ex: tdb_mutex_init "
+						 "failed for ""%s: %s\n",
+						 name, strerror(errno)));
+					goto fail;
+				}
+			}
+		}
 	}
 
 	if ((header.magic1_hash == 0) && (header.magic2_hash == 0)) {
@@ -708,15 +691,19 @@ _PUBLIC_ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int td
 			goto fail;
 		}
 
+
 	}
 
-	/* We always need to do this if the CLEAR_IF_FIRST flag is set, even if
-	   we didn't get the initial exclusive lock as we need to let all other
-	   users know we're using it. */
+	if (locked || (tdb_flags & TDB_CLEAR_IF_FIRST)) {
+		/*
+		 * We always need to do this if the CLEAR_IF_FIRST
+		 * flag is set, even if we didn't get the initial
+		 * exclusive lock as we need to let all other users
+		 * know we're using it.
+		 */
 
-	if (tdb_flags & TDB_CLEAR_IF_FIRST) {
-		/* leave this lock in place to indicate it's in use */
-		if (tdb_nest_lock(tdb, ACTIVE_LOCK, F_RDLCK, TDB_LOCK_WAIT) == -1) {
+		ret = tdb_nest_lock(tdb, ACTIVE_LOCK, F_RDLCK, TDB_LOCK_WAIT);
+		if (ret == -1) {
 			goto fail;
 		}
 	}
@@ -932,7 +919,10 @@ fail:
    seek pointer from our parent and to re-establish locks */
 _PUBLIC_ int tdb_reopen(struct tdb_context *tdb)
 {
-	return tdb_reopen_internal(tdb, tdb->flags & TDB_CLEAR_IF_FIRST);
+	bool active_lock;
+	active_lock = (tdb->flags & (TDB_CLEAR_IF_FIRST|TDB_MUTEX_LOCKING));
+
+	return tdb_reopen_internal(tdb, active_lock);
 }
 
 /* reopen all tdb's */
@@ -941,7 +931,10 @@ _PUBLIC_ int tdb_reopen_all(int parent_longlived)
 	struct tdb_context *tdb;
 
 	for (tdb=tdbs; tdb; tdb = tdb->next) {
-		bool active_lock = (tdb->flags & TDB_CLEAR_IF_FIRST);
+		bool active_lock;
+
+		active_lock =
+			(tdb->flags & (TDB_CLEAR_IF_FIRST|TDB_MUTEX_LOCKING));
 
 		/*
 		 * If the parent is longlived (ie. a
diff --git a/lib/tdb/test/run-mutex-openflags2.c b/lib/tdb/test/run-mutex-openflags2.c
index 6522ae42faa..89603e638c2 100644
--- a/lib/tdb/test/run-mutex-openflags2.c
+++ b/lib/tdb/test/run-mutex-openflags2.c
@@ -112,13 +112,6 @@ int main(int argc, char *argv[])
 	data.dsize = strlen("world");
 	data.dptr = discard_const_p(uint8_t, "world");
 
-	tdb = tdb_open_ex("mutex-openflags2.tdb", 0,
-			  TDB_INCOMPATIBLE_HASH|
-			  TDB_MUTEX_LOCKING,
-			  O_RDWR|O_CREAT, 0755, &nolog_ctx, NULL);
-	ok((tdb == NULL) && (errno == EINVAL), "TDB_MUTEX_LOCKING without "
-	   "TDB_CLEAR_IF_FIRST should fail with EINVAL - %d", errno);
-
 	if (!runtime_support) {
 		tdb = tdb_open_ex("mutex-openflags2.tdb", 0,
 				  TDB_CLEAR_IF_FIRST|
-- 
2.11.0


From 58a769b9a932c39c58c7f01cce9e34e5ed181c6f Mon Sep 17 00:00:00 2001
From: Volker Lendecke <vl at samba.org>
Date: Wed, 10 Oct 2018 16:12:28 +0200
Subject: [PATCH 5/9] gencache: Convert to a binary timestamp

Two reasons:

The ascii conversion shows up on profiles.

In a further commit we will get checksums for gencache entries to
protect at hidden corruption due to a crash on the non-transactioned
gencache.tdb. Next to the timestamp this is a second field that is
gencache metadata, and I don't want to deal with a second ascii number
when at least some of the gencache values are binary already.

Signed-off-by: Volker Lendecke <vl at samba.org>
---
 source3/lib/gencache.c | 37 ++++++-------------------------------
 1 file changed, 6 insertions(+), 31 deletions(-)

diff --git a/source3/lib/gencache.c b/source3/lib/gencache.c
index f566534a1a6..5aa52a06e7c 100644
--- a/source3/lib/gencache.c
+++ b/source3/lib/gencache.c
@@ -31,8 +31,6 @@
 #undef  DBGC_CLASS
 #define DBGC_CLASS DBGC_TDB
 
-#define CACHE_DATA_FMT	"%12u/"
-
 static struct tdb_wrap *cache;
 static struct tdb_wrap *cache_notrans;
 
@@ -269,8 +267,6 @@ bool gencache_set_data_blob(const char *keystr, DATA_BLOB blob,
 {
 	TDB_DATA key;
 	int ret;
-	fstring hdr;
-	int hdr_len;
 	time_t last_stabilize;
 	static int writecount;
 	TDB_DATA dbufs[2];
@@ -297,13 +293,8 @@ bool gencache_set_data_blob(const char *keystr, DATA_BLOB blob,
 		return true;
 	}
 
-	hdr_len = fstr_sprintf(hdr, CACHE_DATA_FMT, (int)timeout);
-
-	if (hdr_len == -1) {
-		return false;
-	}
-
-	dbufs[0] = (TDB_DATA) { .dptr = (uint8_t *)hdr, .dsize = hdr_len };
+	dbufs[0] = (TDB_DATA) { .dptr = (uint8_t *)&timeout,
+				.dsize = sizeof(time_t) };
 	dbufs[1] = (TDB_DATA) { .dptr = blob.data, .dsize = blob.length };
 
 	DEBUG(10, ("Adding cache entry with key=[%s] and timeout="
@@ -408,32 +399,16 @@ bool gencache_del(const char *keystr)
 
 static bool gencache_pull_timeout(TDB_DATA data, time_t *pres, DATA_BLOB *payload)
 {
-	time_t res;
-	char *slash = NULL;
-	char *endptr;
-
-	if (data.dptr == NULL) {
-		return false;
-	}
-	slash = memchr(data.dptr, '/', data.dsize);
-	if (slash == NULL) {
-		return false;
-	}
-
-	res = strtol((char *)data.dptr, &endptr, 10);
-
-	if ((endptr == NULL) || (*endptr != '/')) {
-		DBG_WARNING("Invalid gencache data format\n");
+	if ((data.dptr == NULL) || (data.dsize < sizeof(time_t))) {
 		return false;
 	}
 	if (pres != NULL) {
-		*pres = res;
+		memcpy(pres, data.dptr, sizeof(time_t));
 	}
 	if (payload != NULL) {
-		endptr += 1;
 		*payload = (DATA_BLOB) {
-			.data = discard_const_p(uint8_t, endptr),
-			.length = data.dsize - PTR_DIFF(endptr, data.dptr),
+			.data = data.dptr + sizeof(time_t),
+			.length = data.dsize - sizeof(time_t),
 		};
 	}
 	return true;
-- 
2.11.0


From ebb12be3cf534a6d6f039d88c709a78eff22d784 Mon Sep 17 00:00:00 2001
From: Volker Lendecke <vl at samba.org>
Date: Wed, 10 Oct 2018 16:53:10 +0200
Subject: [PATCH 6/9] gencache: Add crc check

This covers key, timestamp and data. This will detect silent
corruption of gencache data after a system crash

Signed-off-by: Volker Lendecke <vl at samba.org>
---
 source3/lib/gencache.c | 48 +++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 39 insertions(+), 9 deletions(-)

diff --git a/source3/lib/gencache.c b/source3/lib/gencache.c
index 5aa52a06e7c..ec7c397ad73 100644
--- a/source3/lib/gencache.c
+++ b/source3/lib/gencache.c
@@ -27,6 +27,7 @@
 #include "system/glob.h"
 #include "util_tdb.h"
 #include "tdb_wrap/tdb_wrap.h"
+#include "zlib.h"
 
 #undef  DBGC_CLASS
 #define DBGC_CLASS DBGC_TDB
@@ -269,7 +270,8 @@ bool gencache_set_data_blob(const char *keystr, DATA_BLOB blob,
 	int ret;
 	time_t last_stabilize;
 	static int writecount;
-	TDB_DATA dbufs[2];
+	TDB_DATA dbufs[3];
+	uint32_t crc;
 
 	if ((keystr == NULL) || (blob.data == NULL)) {
 		return false;
@@ -297,13 +299,21 @@ bool gencache_set_data_blob(const char *keystr, DATA_BLOB blob,
 				.dsize = sizeof(time_t) };
 	dbufs[1] = (TDB_DATA) { .dptr = blob.data, .dsize = blob.length };
 
+	crc = crc32(0, Z_NULL, 0);
+	crc = crc32(crc, key.dptr, key.dsize);
+	crc = crc32(crc, dbufs[0].dptr, dbufs[0].dsize);
+	crc = crc32(crc, dbufs[1].dptr, dbufs[1].dsize);
+
+	dbufs[2] = (TDB_DATA) { .dptr = (uint8_t *)&crc,
+				.dsize = sizeof(crc) };
+
 	DEBUG(10, ("Adding cache entry with key=[%s] and timeout="
 	           "[%s] (%d seconds %s)\n", keystr,
 		   timestring(talloc_tos(), timeout),
 		   (int)(timeout - time(NULL)), 
 		   timeout > time(NULL) ? "ahead" : "in the past"));
 
-	ret = tdb_storev(cache_notrans->tdb, key, dbufs, 2, 0);
+	ret = tdb_storev(cache_notrans->tdb, key, dbufs, ARRAY_SIZE(dbufs), 0);
 	if (ret != 0) {
 		return false;
 	}
@@ -397,18 +407,38 @@ bool gencache_del(const char *keystr)
 	return result;
 }
 
-static bool gencache_pull_timeout(TDB_DATA data, time_t *pres, DATA_BLOB *payload)
+static bool gencache_pull_timeout(TDB_DATA key,
+				  TDB_DATA data,
+				  time_t *pres,
+				  DATA_BLOB *payload)
 {
-	if ((data.dptr == NULL) || (data.dsize < sizeof(time_t))) {
+	size_t crc_ofs;
+	uint32_t crc, stored_crc;
+
+	if ((data.dptr == NULL) ||
+	    (data.dsize < (sizeof(time_t) + sizeof(uint32_t)))) {
+		return false;
+	}
+
+	crc_ofs = data.dsize - sizeof(uint32_t);
+
+	crc = crc32(0, Z_NULL, 0);
+	crc = crc32(crc, key.dptr, key.dsize);
+	crc = crc32(crc, data.dptr, crc_ofs);
+
+	memcpy(&stored_crc, data.dptr + crc_ofs, sizeof(uint32_t));
+
+	if (stored_crc != crc) {
 		return false;
 	}
+
 	if (pres != NULL) {
 		memcpy(pres, data.dptr, sizeof(time_t));
 	}
 	if (payload != NULL) {
 		*payload = (DATA_BLOB) {
-			.data = data.dptr + sizeof(time_t),
-			.length = data.dsize - sizeof(time_t),
+			.data = data.dptr+sizeof(time_t),
+			.length = data.dsize-sizeof(time_t)-sizeof(uint32_t),
 		};
 	}
 	return true;
@@ -429,7 +459,7 @@ static int gencache_parse_fn(TDB_DATA key, TDB_DATA data, void *private_data)
 	DATA_BLOB payload;
 	bool ret;
 
-	ret = gencache_pull_timeout(data, &t.timeout, &payload);
+	ret = gencache_pull_timeout(key, data, &t.timeout, &payload);
 	if (!ret) {
 		return -1;
 	}
@@ -692,7 +722,7 @@ static int stabilize_fn(struct tdb_context *tdb, TDB_DATA key, TDB_DATA val,
 		return 0;
 	}
 
-	if (!gencache_pull_timeout(val, &timeout, NULL)) {
+	if (!gencache_pull_timeout(key, val, &timeout, NULL)) {
 		DEBUG(10, ("Ignoring invalid entry\n"));
 		return 0;
 	}
@@ -817,7 +847,7 @@ static int gencache_iterate_blobs_fn(struct tdb_context *tdb, TDB_DATA key,
 		}
 	}
 
-	if (!gencache_pull_timeout(data, &timeout, &payload)) {
+	if (!gencache_pull_timeout(key, data, &timeout, &payload)) {
 		goto done;
 	}
 
-- 
2.11.0


From 37b513cee8d5927dd9de87c8baafb0504905d753 Mon Sep 17 00:00:00 2001
From: Volker Lendecke <vl at samba.org>
Date: Thu, 11 Oct 2018 12:52:40 +0200
Subject: [PATCH 7/9] gencache: Remove transaction-based tdb

At more than one large site I've seen significant problems due to
gencache_stabilize. gencache_stabilize was mainly introduced to
survive machine crashes with the cache still being in place. Given
that most installations crash rarely and this is still a cache, this
safety is overkill and causes real problems.

With the recent changes to tdb, we should be safe enough to run on
completely corrupted databases and properly detect errors. A further
commit will introduce code that wipes the gencache.tdb if such a
corruption is detected.

There is one kind of corruption that we don't properly handle:
Orphaned space in the database. I don't have a good idea yet how to
handle this in a graceful and efficient way during normal operations,
but maybe this idea pops up at some point.

Signed-off-by: Volker Lendecke <vl at samba.org>
---
 source3/lib/gencache.c               | 423 +----------------------------------
 source3/lib/gencache.h               |   1 -
 source3/nmbd/nmbd.c                  |   2 -
 source3/smbd/server_exit.c           |   1 -
 source3/torture/test_namemap_cache.c |   2 -
 source3/utils/net.c                  |   2 -
 source3/utils/net_cache.c            |  26 ---
 source3/winbindd/winbindd.c          |   2 -
 8 files changed, 9 insertions(+), 450 deletions(-)

diff --git a/source3/lib/gencache.c b/source3/lib/gencache.c
index ec7c397ad73..1d8349c5702 100644
--- a/source3/lib/gencache.c
+++ b/source3/lib/gencache.c
@@ -33,7 +33,6 @@
 #define DBGC_CLASS DBGC_TDB
 
 static struct tdb_wrap *cache;
-static struct tdb_wrap *cache_notrans;
 
 /**
  * @file gencache.c
@@ -72,7 +71,7 @@ static bool gencache_init(void)
 
 	hash_size = lp_parm_int(-1, "gencache", "hash_size", 10000);
 
-	cache_fname = cache_path(talloc_tos(), "gencache.tdb");
+	cache_fname = lock_path(talloc_tos(), "gencache.tdb");
 	if (cache_fname == NULL) {
 		return false;
 	}
@@ -80,44 +79,14 @@ static bool gencache_init(void)
 	DEBUG(5, ("Opening cache file at %s\n", cache_fname));
 
 	cache = tdb_wrap_open(NULL, cache_fname, hash_size,
-			      TDB_DEFAULT|TDB_INCOMPATIBLE_HASH,
+			      TDB_INCOMPATIBLE_HASH|
+			      TDB_NOSYNC|
+			      TDB_MUTEX_LOCKING,
 			      open_flags, 0644);
-
-	if (!cache && (errno == EACCES)) {
-		open_flags = O_RDONLY;
-		cache = tdb_wrap_open(NULL, cache_fname, hash_size,
-				      TDB_DEFAULT|TDB_INCOMPATIBLE_HASH,
-				      open_flags, 0644);
-		if (cache) {
-			DEBUG(5, ("gencache_init: Opening cache file %s read-only.\n", cache_fname));
-		}
-	}
-	TALLOC_FREE(cache_fname);
-
-	if (!cache) {
-		DEBUG(5, ("Attempt to open gencache.tdb has failed.\n"));
-		return false;
-	}
-
-	cache_fname = lock_path(talloc_tos(), "gencache_notrans.tdb");
-	if (cache_fname == NULL) {
-		TALLOC_FREE(cache);
-		return false;
-	}
-
-	DEBUG(5, ("Opening cache file at %s\n", cache_fname));
-
-	cache_notrans = tdb_wrap_open(NULL, cache_fname, hash_size,
-				      TDB_CLEAR_IF_FIRST|
-				      TDB_INCOMPATIBLE_HASH|
-				      TDB_NOSYNC|
-				      TDB_MUTEX_LOCKING,
-				      open_flags, 0644);
-	if (cache_notrans == NULL) {
+	if (cache == NULL) {
 		DEBUG(5, ("Opening %s failed: %s\n", cache_fname,
 			  strerror(errno)));
 		TALLOC_FREE(cache_fname);
-		TALLOC_FREE(cache);
 		return false;
 	}
 	TALLOC_FREE(cache_fname);
@@ -125,132 +94,6 @@ static bool gencache_init(void)
 	return true;
 }
 
-static TDB_DATA last_stabilize_key(void)
-{
-	const char key[] = "@LAST_STABILIZED";
-
-	return (TDB_DATA) {
-		.dptr = discard_const_p(uint8_t, key),
-		.dsize = sizeof(key),
-	};
-}
-
-struct gencache_have_val_state {
-	time_t new_timeout;
-	const DATA_BLOB *data;
-	bool gotit;
-};
-
-static void gencache_have_val_parser(const struct gencache_timeout *old_timeout,
-				     DATA_BLOB data,
-				     void *private_data)
-{
-	struct gencache_have_val_state *state =
-		(struct gencache_have_val_state *)private_data;
-	time_t now = time(NULL);
-	int cache_time_left, new_time_left, additional_time;
-
-	/*
-	 * Excuse the many variables, but these time calculations are
-	 * confusing to me. We do not want to write to gencache with a
-	 * possibly expensive transaction if we are about to write the same
-	 * value, just extending the remaining timeout by less than 10%.
-	 */
-
-	cache_time_left = old_timeout->timeout - now;
-	if (cache_time_left <= 0) {
-		/*
-		 * timed out, write new value
-		 */
-		return;
-	}
-
-	new_time_left = state->new_timeout - now;
-	if (new_time_left <= 0) {
-		/*
-		 * Huh -- no new timeout?? Write it.
-		 */
-		return;
-	}
-
-	if (new_time_left < cache_time_left) {
-		/*
-		 * Someone wants to shorten the timeout. Let it happen.
-		 */
-		return;
-	}
-
-	/*
-	 * By how much does the new timeout extend the remaining cache time?
-	 */
-	additional_time = new_time_left - cache_time_left;
-
-	if (additional_time * 10 < 0) {
-		/*
-		 * Integer overflow. We extend by so much that we have to write it.
-		 */
-		return;
-	}
-
-	/*
-	 * The comparison below is essentially equivalent to
-	 *
-	 *    new_time_left > cache_time_left * 1.10
-	 *
-	 * but without floating point calculations.
-	 */
-
-	if (additional_time * 10 > cache_time_left) {
-		/*
-		 * We extend the cache timeout by more than 10%. Do it.
-		 */
-		return;
-	}
-
-	/*
-	 * Now the more expensive data compare.
-	 */
-	if (data_blob_cmp(state->data, &data) != 0) {
-		/*
-		 * Write a new value. Certainly do it.
-		 */
-		return;
-	}
-
-	/*
-	 * Extending the timeout by less than 10% for the same cache value is
-	 * not worth the trouble writing a value into gencache under a
-	 * possibly expensive transaction.
-	 */
-	state->gotit = true;
-}
-
-static bool gencache_have_val(const char *keystr, const DATA_BLOB *data,
-			      time_t timeout)
-{
-	struct gencache_have_val_state state;
-
-	state.new_timeout = timeout;
-	state.data = data;
-	state.gotit = false;
-
-	if (!gencache_parse(keystr, gencache_have_val_parser, &state)) {
-		return false;
-	}
-	return state.gotit;
-}
-
-static int last_stabilize_parser(TDB_DATA key, TDB_DATA data,
-				 void *private_data)
-{
-	time_t *last_stabilize = private_data;
-
-	if ((data.dsize != 0) && (data.dptr[data.dsize-1] == '\0')) {
-		*last_stabilize = atoi((char *)data.dptr);
-	}
-	return 0;
-}
-
 /**
  * Set an entry in the cache file. If there's no such
  * one, then add it.
@@ -268,8 +111,6 @@ bool gencache_set_data_blob(const char *keystr, DATA_BLOB blob,
 {
 	TDB_DATA key;
 	int ret;
-	time_t last_stabilize;
-	static int writecount;
 	TDB_DATA dbufs[3];
 	uint32_t crc;
 
@@ -279,22 +120,10 @@ bool gencache_set_data_blob(const char *keystr, DATA_BLOB blob,
 
 	key = string_term_tdb_data(keystr);
 
-	ret = tdb_data_cmp(key, last_stabilize_key());
-	if (ret == 0) {
-		DEBUG(10, ("Can't store %s as a key\n", keystr));
-		return false;
-	}
-
 	if (!gencache_init()) {
 		return false;
 	}
 
-	if ((timeout != 0) && gencache_have_val(keystr, &blob, timeout)) {
-		DEBUG(10, ("Did not store value for %s, we already got it\n",
-			   keystr));
-		return true;
-	}
-
 	dbufs[0] = (TDB_DATA) { .dptr = (uint8_t *)&timeout,
 				.dsize = sizeof(time_t) };
 	dbufs[1] = (TDB_DATA) { .dptr = blob.data, .dsize = blob.length };
@@ -313,53 +142,11 @@ bool gencache_set_data_blob(const char *keystr, DATA_BLOB blob,
 		   (int)(timeout - time(NULL)), 
 		   timeout > time(NULL) ? "ahead" : "in the past"));
 
-	ret = tdb_storev(cache_notrans->tdb, key, dbufs, ARRAY_SIZE(dbufs), 0);
-	if (ret != 0) {
-		return false;
-	}
-
-	/*
-	 * Every 100 writes within a single process, stabilize the cache with
-	 * a transaction. This is done to prevent a single transaction to
-	 * become huge and chew lots of memory.
-	 */
-	writecount += 1;
-	if (writecount > lp_parm_int(-1, "gencache", "stabilize_count", 100)) {
-		gencache_stabilize();
-		writecount = 0;
-		goto done;
-	}
-
-	/*
-	 * Every 5 minutes, call gencache_stabilize() to not let grow
-	 * gencache_notrans.tdb too large.
-	 */
+	ret = tdb_storev(cache->tdb, key, dbufs, ARRAY_SIZE(dbufs), 0);
 
-	last_stabilize = 0;
-
-	tdb_parse_record(cache_notrans->tdb, last_stabilize_key(),
-			 last_stabilize_parser, &last_stabilize);
-
-	if ((last_stabilize
-	     + lp_parm_int(-1, "gencache", "stabilize_interval", 300))
-	    < time(NULL)) {
-		gencache_stabilize();
-	}
-
-done:
 	return ret == 0;
 }
 
-static void gencache_del_parser(const struct gencache_timeout *t,
-				DATA_BLOB blob,
-				void *private_data)
-{
-	if (t->timeout != 0) {
-		bool *exists = private_data;
-		*exists = true;
-	}
-}
-
 /**
  * Delete one entry from the cache file.
  *
@@ -372,8 +159,6 @@ static void gencache_del_parser(const struct gencache_timeout *t,
 bool gencache_del(const char *keystr)
 {
 	TDB_DATA key = string_term_tdb_data(keystr);
-	bool exists = false;
-	bool result = false;
 	int ret;
 
 	if (keystr == NULL) {
@@ -386,25 +171,9 @@ bool gencache_del(const char *keystr)
 
 	DEBUG(10, ("Deleting cache entry (key=[%s])\n", keystr));
 
-	ret = tdb_chainlock(cache_notrans->tdb, key);
-	if (ret == -1) {
-		return false;
-	}
-
-	gencache_parse(keystr, gencache_del_parser, &exists);
-
-	if (exists) {
-		/*
-		 * We delete an element by setting its timeout to
-		 * 0. This way we don't have to do a transaction on
-		 * gencache.tdb every time we delete an element.
-		 */
-		result = gencache_set(keystr, "", 0);
-	}
-
-	tdb_chainunlock(cache_notrans->tdb, key);
+	ret = tdb_delete(cache->tdb, key);
 
-	return result;
+	return (ret != -1);
 }
 
 static bool gencache_pull_timeout(TDB_DATA key,
@@ -449,7 +218,6 @@ struct gencache_parse_state {
 		       DATA_BLOB blob,
 		       void *private_data);
 	void *private_data;
-	bool copy_to_notrans;
 };
 
 static int gencache_parse_fn(TDB_DATA key, TDB_DATA data, void *private_data)
@@ -466,10 +234,6 @@ static int gencache_parse_fn(TDB_DATA key, TDB_DATA data, void *private_data)
 	state = (struct gencache_parse_state *)private_data;
 	state->parser(&t, payload, state->private_data);
 
-	if (state->copy_to_notrans) {
-		tdb_store(cache_notrans->tdb, key, data, 0);
-	}
-
 	return 0;
 }
 
@@ -486,44 +250,19 @@ bool gencache_parse(const char *keystr,
 	if (keystr == NULL) {
 		return false;
 	}
-	if (tdb_data_cmp(key, last_stabilize_key()) == 0) {
-		return false;
-	}
 	if (!gencache_init()) {
 		return false;
 	}
 
 	state.parser = parser;
 	state.private_data = private_data;
-	state.copy_to_notrans = false;
 
-	ret = tdb_chainlock(cache_notrans->tdb, key);
-	if (ret != 0) {
-		return false;
-	}
-
-	ret = tdb_parse_record(cache_notrans->tdb, key,
+	ret = tdb_parse_record(cache->tdb, key,
 			       gencache_parse_fn, &state);
 	if (ret == 0) {
-		tdb_chainunlock(cache_notrans->tdb, key);
 		return true;
 	}
 
-	state.copy_to_notrans = true;
-
-	ret = tdb_parse_record(cache->tdb, key, gencache_parse_fn, &state);
-
-	if ((ret == -1) && (tdb_error(cache->tdb) == TDB_ERR_NOEXIST)) {
-		/*
-		 * The record does not exist. Set a delete-marker in
-		 * gencache_notrans, so that we don't have to look at
-		 * the fcntl-based cache again.
-		 */
-		gencache_set(keystr, "", 0);
-	}
-
-	tdb_chainunlock(cache_notrans->tdb, key);
-
 	return (ret == 0);
 }
 
@@ -617,138 +356,6 @@ fail:
 	return false;
 } 
 
-struct stabilize_state {
-	bool written;
-};
-static int stabilize_fn(struct tdb_context *tdb, TDB_DATA key, TDB_DATA val,
-			void *priv);
-
-/**
- * Stabilize gencache
- *
- * Migrate the clear-if-first gencache data to the stable,
- * transaction-based gencache.tdb
- */
-
-bool gencache_stabilize(void)
-{
-	struct stabilize_state state;
-	int res;
-	char *now;
-
-	if (!gencache_init()) {
-		return false;
-	}
-
-	res = tdb_transaction_start_nonblock(cache->tdb);
-	if (res != 0) {
-		if (tdb_error(cache->tdb) == TDB_ERR_NOLOCK)
-		{
-			/*
-			 * Someone else already does the stabilize,
-			 * this does not have to be done twice
-			 */
-			return true;
-		}
-
-		DEBUG(10, ("Could not start transaction on gencache.tdb: "
-			   "%s\n", tdb_errorstr(cache->tdb)));
-		return false;
-	}
-
-	res = tdb_lockall_nonblock(cache_notrans->tdb);
-	if (res != 0) {
-		tdb_transaction_cancel(cache->tdb);
-		DEBUG(10, ("Could not get allrecord lock on "
-			   "gencache_notrans.tdb: %s\n",
-			   tdb_errorstr(cache_notrans->tdb)));
-		return false;
-	}
-
-	state.written = false;
-
-	res = tdb_traverse(cache_notrans->tdb, stabilize_fn, &state);
-	if (res < 0) {
-		tdb_unlockall(cache_notrans->tdb);
-		tdb_transaction_cancel(cache->tdb);
-		return false;
-	}
-
-	if (!state.written) {
-		tdb_unlockall(cache_notrans->tdb);
-		tdb_transaction_cancel(cache->tdb);
-		return true;
-	}
-
-	res = tdb_transaction_commit(cache->tdb);
-	if (res != 0) {
-		DEBUG(10, ("tdb_transaction_commit on gencache.tdb failed: "
-			   "%s\n", tdb_errorstr(cache->tdb)));
-		tdb_unlockall(cache_notrans->tdb);
-		return false;
-	}
-
-	res = tdb_wipe_all(cache_notrans->tdb);
-	if (res < 0) {
-		DBG_DEBUG("tdb_wipe_all on gencache_notrans.tdb failed: %s\n",
-			  tdb_errorstr(cache_notrans->tdb));
-	}
-
-	now = talloc_asprintf(talloc_tos(), "%d", (int)time(NULL));
-	if (now != NULL) {
-		tdb_store(cache_notrans->tdb, last_stabilize_key(),
-			  string_term_tdb_data(now), 0);
-		TALLOC_FREE(now);
-	}
-
-	res = tdb_unlockall(cache_notrans->tdb);
-	if (res != 0) {
-		DEBUG(10, ("tdb_unlockall on gencache.tdb failed: "
-			   "%s\n", tdb_errorstr(cache->tdb)));
-		return false;
-	}
-
-	return true;
-}
-
-static int stabilize_fn(struct tdb_context *tdb, TDB_DATA key, TDB_DATA val,
-			void *priv)
-{
-	struct stabilize_state *state = (struct stabilize_state *)priv;
-	int res;
-	time_t timeout;
-
-	if (tdb_data_cmp(key, last_stabilize_key()) == 0) {
-		return 0;
-	}
-
-	if (!gencache_pull_timeout(key, val, &timeout, NULL)) {
-		DEBUG(10, ("Ignoring invalid entry\n"));
-		return 0;
-	}
-	if ((timeout < time(NULL)) || (val.dsize == 0)) {
-		res = tdb_delete(cache->tdb, key);
-		if (res == 0) {
-			state->written = true;
-		} else if (tdb_error(cache->tdb) == TDB_ERR_NOEXIST) {
-			res = 0;
-		}
-	} else {
-		res = tdb_store(cache->tdb, key, val, 0);
-		if (res == 0) {
-			state->written = true;
-		}
-	}
-
-	if (res != 0) {
-		DEBUG(10, ("Transfer to gencache.tdb failed: %s\n",
-			   tdb_errorstr(cache->tdb)));
-		return -1;
-	}
-
-	return 0;
-}
-
 /**
  * Get existing entry from the cache file.
  *
@@ -816,7 +423,6 @@ struct gencache_iterate_blobs_state {
 		   time_t timeout, void *private_data);
 	const char *pattern;
 	void *private_data;
-	bool in_persistent;
 };
 
 static int gencache_iterate_blobs_fn(struct tdb_context *tdb, TDB_DATA key,
@@ -829,13 +435,6 @@ static int gencache_iterate_blobs_fn(struct tdb_context *tdb, TDB_DATA key,
 	time_t timeout;
 	DATA_BLOB payload;
 
-	if (tdb_data_cmp(key, last_stabilize_key()) == 0) {
-		return 0;
-	}
-	if (state->in_persistent && tdb_exists(cache_notrans->tdb, key)) {
-		return 0;
-	}
-
 	if (key.dptr[key.dsize-1] == '\0') {
 		keystr = (char *)key.dptr;
 	} else {
@@ -887,10 +486,6 @@ void gencache_iterate_blobs(void (*fn)(const char *key, DATA_BLOB value,
 	state.pattern = pattern;
 	state.private_data = private_data;
 
-	state.in_persistent = false;
-	tdb_traverse(cache_notrans->tdb, gencache_iterate_blobs_fn, &state);
-
-	state.in_persistent = true;
 	tdb_traverse(cache->tdb, gencache_iterate_blobs_fn, &state);
 }
 
diff --git a/source3/lib/gencache.h b/source3/lib/gencache.h
index 3d8ebe53872..ccf16e9d2c2 100644
--- a/source3/lib/gencache.h
+++ b/source3/lib/gencache.h
@@ -48,7 +48,6 @@ bool gencache_parse(const char *keystr,
 bool gencache_get_data_blob(const char *keystr, TALLOC_CTX *mem_ctx,
 			    DATA_BLOB *blob,
 			    time_t *timeout, bool *was_expired);
-bool gencache_stabilize(void);
 bool gencache_set_data_blob(const char *keystr, DATA_BLOB blob,
 			    time_t timeout);
 void gencache_iterate_blobs(void (*fn)(const char *key, DATA_BLOB value,
diff --git a/source3/nmbd/nmbd.c b/source3/nmbd/nmbd.c
index 85c3d7177f3..fa2bfbb287a 100644
--- a/source3/nmbd/nmbd.c
+++ b/source3/nmbd/nmbd.c
@@ -70,8 +70,6 @@ static void terminate(struct messaging_context *msg)
 	/* If there was an async dns child - kill it. */
 	kill_async_dns_child();
 
-	gencache_stabilize();
-
 	pidfile_unlink(lp_pid_directory(), "nmbd");
 
 	exit(0);
diff --git a/source3/smbd/server_exit.c b/source3/smbd/server_exit.c
index 2a34f067a24..2378c0c15ca 100644
--- a/source3/smbd/server_exit.c
+++ b/source3/smbd/server_exit.c
@@ -237,7 +237,6 @@ static void exit_server_common(enum server_exit_reason how,
 		if (am_parent) {
 			pidfile_unlink(lp_pid_directory(), "smbd");
 		}
-		gencache_stabilize();
 	}
 
 	exit(0);
diff --git a/source3/torture/test_namemap_cache.c b/source3/torture/test_namemap_cache.c
index 4b0ead1a25b..b30fe8743d4 100644
--- a/source3/torture/test_namemap_cache.c
+++ b/source3/torture/test_namemap_cache.c
@@ -266,7 +266,5 @@ bool run_local_namemap_cache1(int dummy)
 		return false;
 	}
 
-	gencache_stabilize();
-
 	return true;
 }
diff --git a/source3/utils/net.c b/source3/utils/net.c
index 1138d1984e6..e0776c8d82c 100644
--- a/source3/utils/net.c
+++ b/source3/utils/net.c
@@ -1107,8 +1107,6 @@ static struct functable net_func[] = {
 
 	DEBUG(2,("return code = %d\n", rc));
 
-	gencache_stabilize();
-
 	libnetapi_free(c->netapi_ctx);
 
 	poptFreeContext(pc);
diff --git a/source3/utils/net_cache.c b/source3/utils/net_cache.c
index 2d6989918f5..baa69f6554c 100644
--- a/source3/utils/net_cache.c
+++ b/source3/utils/net_cache.c
@@ -347,24 +347,6 @@ static int net_cache_flush(struct net_context *c, int argc, const char **argv)
 	return 0;
 }
 
-static int net_cache_stabilize(struct net_context *c, int argc,
-			       const char **argv)
-{
-	if (c->display_usage) {
-		d_printf(  "%s\n"
-			   "net cache stabilize\n"
-			   "    %s\n",
-			 _("Usage:"),
-			 _("Move transient cache content to stable storage"));
-		return 0;
-	}
-
-	if (!gencache_stabilize()) {
-		return -1;
-	}
-	return 0;
-}
-
 static int netsamlog_cache_for_all_cb(const char *sid_str,
 				      time_t when_cached,
 				      struct netr_SamInfo3 *info3,
@@ -655,14 +637,6 @@ int net_cache(struct net_context *c, int argc, const char **argv)
 			   "  Delete all cache entries")
 		},
 		{
-			"stabilize",
-			net_cache_stabilize,
-			NET_TRANSPORT_LOCAL,
-			N_("Move transient cache content to stable storage"),
-			N_("net cache stabilize\n"
-			   "  Move transient cache content to stable storage")
-		},
-		{
 			"samlogon",
 			net_cache_samlogon,
 			NET_TRANSPORT_LOCAL,
diff --git a/source3/winbindd/winbindd.c b/source3/winbindd/winbindd.c
index 237841881ac..07a81329358 100644
--- a/source3/winbindd/winbindd.c
+++ b/source3/winbindd/winbindd.c
@@ -208,8 +208,6 @@ static void terminate(bool is_parent)
 
 	idmap_close();
 
-	gencache_stabilize();
-
 	netlogon_creds_cli_close_global_db();
 
 #if 0
-- 
2.11.0


From 04680f43c4fedb002e3ea45129e3a6fc97370106 Mon Sep 17 00:00:00 2001
From: Volker Lendecke <vl at samba.org>
Date: Fri, 2 Nov 2018 16:58:53 +0100
Subject: [PATCH 8/9] gencache: Wipe corrupt databases

Signed-off-by: Volker Lendecke <vl at samba.org>
---
 source3/lib/gencache.c | 66 ++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 53 insertions(+), 13 deletions(-)

diff --git a/source3/lib/gencache.c b/source3/lib/gencache.c
index 1d8349c5702..124715ed10d 100644
--- a/source3/lib/gencache.c
+++ b/source3/lib/gencache.c
@@ -144,7 +144,17 @@ bool gencache_set_data_blob(const char *keystr, DATA_BLOB blob,
 
 	ret = tdb_storev(cache->tdb, key, dbufs, ARRAY_SIZE(dbufs), 0);
 
-	return ret == 0;
+	if (ret == 0) {
+		return true;
+	}
+	if (tdb_error(cache->tdb) != TDB_ERR_CORRUPT) {
+		return false;
+	}
+
+	ret = tdb_wipe_all(cache->tdb);
+	SMB_ASSERT(ret == 0);
+
+	return false;
 }
 
 /**
@@ -173,7 +183,17 @@ bool gencache_del(const char *keystr)
 
 	ret = tdb_delete(cache->tdb, key);
 
-	return (ret != -1);
+	if (ret == 0) {
+		return true;
+	}
+	if (tdb_error(cache->tdb) != TDB_ERR_CORRUPT) {
+		return false;
+	}
+
+	ret = tdb_wipe_all(cache->tdb);
+	SMB_ASSERT(ret == 0);
+
+	return true;		/* We've deleted a bit more... */
 }
 
 static bool gencache_pull_timeout(TDB_DATA key,
@@ -218,20 +238,21 @@ struct gencache_parse_state {
 		       DATA_BLOB blob,
 		       void *private_data);
 	void *private_data;
+	bool format_error;
 };
 
 static int gencache_parse_fn(TDB_DATA key, TDB_DATA data, void *private_data)
 {
-	struct gencache_parse_state *state;
+	struct gencache_parse_state *state = private_data;
 	struct gencache_timeout t;
 	DATA_BLOB payload;
 	bool ret;
 
 	ret = gencache_pull_timeout(key, data, &t.timeout, &payload);
 	if (!ret) {
-		return -1;
+		state->format_error = true;
+		return 0;
 	}
-	state = (struct gencache_parse_state *)private_data;
 	state->parser(&t, payload, state->private_data);
 
 	return 0;
@@ -243,7 +264,9 @@ bool gencache_parse(const char *keystr,
 				   void *private_data),
 		    void *private_data)
 {
-	struct gencache_parse_state state;
+	struct gencache_parse_state state = {
+		.parser = parser, .private_data = private_data
+	};
 	TDB_DATA key = string_term_tdb_data(keystr);
 	int ret;
 
@@ -254,16 +277,27 @@ bool gencache_parse(const char *keystr,
 		return false;
 	}
 
-	state.parser = parser;
-	state.private_data = private_data;
-
 	ret = tdb_parse_record(cache->tdb, key,
 			       gencache_parse_fn, &state);
-	if (ret == 0) {
-		return true;
+	if ((ret == -1) && (tdb_error(cache->tdb) == TDB_ERR_CORRUPT)) {
+		goto wipe;
+	}
+	if (ret == -1) {
+		return false;
+	}
+	if (state.format_error) {
+		ret = tdb_delete(cache->tdb, key);
+		if (ret == -1) {
+			goto wipe;
+		}
+		return false;
 	}
+	return true;
 
-	return (ret == 0);
+wipe:
+	ret = tdb_wipe_all(cache->tdb);
+	SMB_ASSERT(ret == 0);
+	return false;
 }
 
 struct gencache_get_data_blob_state {
@@ -475,6 +509,7 @@ void gencache_iterate_blobs(void (*fn)(const char *key, DATA_BLOB value,
 			    void *private_data, const char *pattern)
 {
 	struct gencache_iterate_blobs_state state;
+	int ret;
 
 	if ((fn == NULL) || (pattern == NULL) || !gencache_init()) {
 		return;
@@ -486,7 +521,12 @@ void gencache_iterate_blobs(void (*fn)(const char *key, DATA_BLOB value,
 	state.pattern = pattern;
 	state.private_data = private_data;
 
-	tdb_traverse(cache->tdb, gencache_iterate_blobs_fn, &state);
+	ret = tdb_traverse(cache->tdb, gencache_iterate_blobs_fn, &state);
+
+	if ((ret == -1) && (tdb_error(cache->tdb) == TDB_ERR_CORRUPT)) {
+		ret = tdb_wipe_all(cache->tdb);
+		SMB_ASSERT(ret == 0);
+	}
 }
 
 /**
-- 
2.11.0


From 071dc1dbb44bb1e3097d8e0566f8f955334e4ba4 Mon Sep 17 00:00:00 2001
From: Volker Lendecke <vl at samba.org>
Date: Wed, 24 Oct 2018 10:51:40 +0200
Subject: [PATCH 9/9] gencache: Prune expired entries

This solves the problem that gencache never shrinks right
now. Whenever we write an entry, we now walk that entry's chain and
delete expired entries. This should be a good balance between
performance and cleanup actions: Reading is still unaffected, and
those who write pay a small penalty while keeping gencache size under
control.

Signed-off-by: Volker Lendecke <vl at samba.org>
---
 source3/lib/gencache.c | 88 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 88 insertions(+)

diff --git a/source3/lib/gencache.c b/source3/lib/gencache.c
index 124715ed10d..d7c3ad32921 100644
--- a/source3/lib/gencache.c
+++ b/source3/lib/gencache.c
@@ -28,6 +28,7 @@
 #include "util_tdb.h"
 #include "tdb_wrap/tdb_wrap.h"
 #include "zlib.h"
+#include "lib/util/strv.h"
 
 #undef  DBGC_CLASS
 #define DBGC_CLASS DBGC_TDB
@@ -41,6 +42,11 @@ static struct tdb_wrap *cache;
  *
  **/
 
+static bool gencache_pull_timeout(TDB_DATA key,
+				  TDB_DATA data,
+				  time_t *pres,
+				  DATA_BLOB *payload);
+
 struct gencache_timeout {
 	time_t timeout;
 };
@@ -94,6 +100,77 @@ static bool gencache_init(void)
 	return true;
 }
 
+/*
+ * Walk the hash chain for "key", deleting all expired entries for
+ * that hash chain
+ */
+struct gencache_prune_expired_state {
+	TALLOC_CTX *mem_ctx;
+	char *keys;
+};
+
+static int gencache_prune_expired_fn(struct tdb_context *tdb,
+				     TDB_DATA key,
+				     TDB_DATA data,
+				     void *private_data)
+{
+	struct gencache_prune_expired_state *state = private_data;
+	struct gencache_timeout t;
+	bool ok = false;
+	bool expired = false;
+
+	if ((key.dsize == 0) || (key.dptr[key.dsize-1] != '\0')) {
+		/* not a valid record, should never happen */
+		return 0;
+	}
+
+	ok = gencache_pull_timeout(key, data, &t.timeout, NULL);
+	if (ok) {
+		expired = gencache_timeout_expired(&t);
+	}
+
+	if (!ok || expired) {
+		/*
+		 * Ignore failure, this is "just" background cleanup
+		 */
+		strv_add(state->mem_ctx, &state->keys, (char *)key.dptr);
+	}
+
+	return 0;
+}
+
+static void gencache_prune_expired(struct tdb_context *tdb,
+				   TDB_DATA chain_key)
+{
+	struct gencache_prune_expired_state state = {
+		.mem_ctx = talloc_tos(),
+	};
+	char *keystr = NULL;
+	int ret;
+
+	ret = tdb_traverse_key_chain(
+		tdb, chain_key, gencache_prune_expired_fn, &state);
+	if (ret == -1) {
+		DBG_DEBUG("tdb_traverse_key_chain failed: %s\n",
+			  tdb_errorstr(tdb));
+		return;
+	}
+
+	while ((keystr = strv_next(state.keys, keystr)) != NULL) {
+		TDB_DATA key = string_term_tdb_data(keystr);
+
+		/*
+		 * We expect the hash chain of "chain_key" to be
+		 * locked. So between gencache_prune_expired_fn
+		 * figuring out "keystr" is expired and the
+		 * tdb_delete, nobody can have reset the timeout.
+		 */
+		tdb_delete(tdb, key);
+	}
+
+	TALLOC_FREE(state.keys);
+}
+
 /**
  * Set an entry in the cache file. If there's no such
  * one, then add it.
@@ -142,8 +219,19 @@ bool gencache_set_data_blob(const char *keystr, DATA_BLOB blob,
 		   (int)(timeout - time(NULL)), 
 		   timeout > time(NULL) ? "ahead" : "in the past"));
 
+	ret = tdb_chainlock(cache->tdb, key);
+	if (ret == -1) {
+		DBG_WARNING("tdb_chainlock for key [%s] failed: %s\n",
+			    keystr, tdb_errorstr(cache->tdb));
+		return false;
+	}
+
+	gencache_prune_expired(cache->tdb, key);
+
 	ret = tdb_storev(cache->tdb, key, dbufs, ARRAY_SIZE(dbufs), 0);
 
+	tdb_chainunlock(cache->tdb, key);
+
 	if (ret == 0) {
 		return true;
 	}
-- 
2.11.0



More information about the samba-technical mailing list