[PATCH] Streamline gencache
Jeremy Allison
jra at samba.org
Tue Nov 6 01:06:56 UTC 2018
On Fri, Nov 02, 2018 at 05:14:48PM +0100, Volker Lendecke via samba-technical wrote:
> Hi!
>
> Attached find some significant improvements to gencache, in particular
> for both large installations and small ones with a slow system disk.
>
> Review appreciated!
FYI, haven't forgotten, in the middle of reviewing this !
Jeremy.
> --
> SerNet GmbH, Bahnhofsallee 1b, 37081 Göttingen
> phone: +49-551-370000-0, fax: +49-551-370000-9
> AG Göttingen, HRB 2816, GF: Dr. Johannes Loxen
> http://www.sernet.de, mailto:kontakt at sernet.de
> From 723597bd9b47735e79e9f7dcd1ed8948111ab798 Mon Sep 17 00:00:00 2001
> From: Volker Lendecke <vl at samba.org>
> Date: Sun, 28 Oct 2018 09:06:39 +0100
> Subject: [PATCH 1/9] tdb: Add tdb_traverse_chain
>
> This is a lightweight readonly traverse of a single chain, see the
> comment in the header file.
>
> Signed-off-by: Volker Lendecke <vl at samba.org>
> ---
> lib/tdb/common/traverse.c | 110 ++++++++++++++++++++++++++++++++++++++++++++++
> lib/tdb/include/tdb.h | 67 ++++++++++++++++++++++++++++
> 2 files changed, 177 insertions(+)
>
> diff --git a/lib/tdb/common/traverse.c b/lib/tdb/common/traverse.c
> index a9af1d4b824..54a69dc8d03 100644
> --- a/lib/tdb/common/traverse.c
> +++ b/lib/tdb/common/traverse.c
> @@ -399,3 +399,113 @@ _PUBLIC_ TDB_DATA tdb_nextkey(struct tdb_context *tdb, TDB_DATA oldkey)
> return key;
> }
>
> +_PUBLIC_ int tdb_traverse_chain(struct tdb_context *tdb,
> + unsigned chain,
> + tdb_traverse_func fn,
> + void *private_data)
> +{
> + tdb_off_t rec_ptr;
> + struct tdb_chainwalk_ctx chainwalk;
> + int count = 0;
> + int ret;
> +
> + if (chain >= tdb->hash_size) {
> + tdb->ecode = TDB_ERR_EINVAL;
> + return -1;
> + }
> +
> + if (tdb->traverse_read != 0) {
> + tdb->ecode = TDB_ERR_LOCK;
> + return -1;
> + }
> +
> + ret = tdb_lock(tdb, chain, F_RDLCK);
> + if (ret == -1) {
> + return -1;
> + }
> +
> + tdb->traverse_read += 1;
> +
> + ret = tdb_ofs_read(tdb, TDB_HASH_TOP(chain), &rec_ptr);
> + if (ret == -1) {
> + goto fail;
> + }
> +
> + tdb_chainwalk_init(&chainwalk, rec_ptr);
> +
> + while (rec_ptr != 0) {
> + struct tdb_record rec;
> + bool ok;
> +
> + ret = tdb_rec_read(tdb, rec_ptr, &rec);
> + if (ret == -1) {
> + goto fail;
> + }
> +
> + if (!TDB_DEAD(&rec)) {
> + /* no overflow checks, tdb_rec_read checked it */
> + tdb_off_t key_ofs = rec_ptr + sizeof(rec);
> + size_t full_len = rec.key_len + rec.data_len;
> + uint8_t *buf = NULL;
> +
> + TDB_DATA key = { .dsize = rec.key_len };
> + TDB_DATA data = { .dsize = rec.data_len };
> +
> + if ((tdb->transaction == NULL) &&
> + (tdb->map_ptr != NULL)) {
> + ret = tdb->methods->tdb_oob(
> + tdb, key_ofs, full_len, 0);
> + if (ret == -1) {
> + goto fail;
> + }
> + key.dptr = (uint8_t *)tdb->map_ptr + key_ofs;
> + } else {
> + buf = tdb_alloc_read(tdb, key_ofs, full_len);
> + if (buf == NULL) {
> + goto fail;
> + }
> + key.dptr = buf;
> + }
> + data.dptr = key.dptr + key.dsize;
> +
> + ret = fn(tdb, key, data, private_data);
> + free(buf);
> +
> + count += 1;
> +
> + if (ret != 0) {
> + break;
> + }
> + }
> +
> + rec_ptr = rec.next;
> +
> + ok = tdb_chainwalk_check(tdb, &chainwalk, rec_ptr);
> + if (!ok) {
> + goto fail;
> + }
> + }
> + tdb->traverse_read -= 1;
> + tdb_unlock(tdb, chain, F_RDLCK);
> + return count;
> +
> +fail:
> + tdb->traverse_read -= 1;
> + tdb_unlock(tdb, chain, F_RDLCK);
> + return -1;
> +}
> +
> +_PUBLIC_ int tdb_traverse_key_chain(struct tdb_context *tdb,
> + TDB_DATA key,
> + tdb_traverse_func fn,
> + void *private_data)
> +{
> + uint32_t hash, chain;
> + int ret;
> +
> + hash = tdb->hash_fn(&key);
> + chain = BUCKET(hash);
> + ret = tdb_traverse_chain(tdb, chain, fn, private_data);
> +
> + return ret;
> +}
> diff --git a/lib/tdb/include/tdb.h b/lib/tdb/include/tdb.h
> index 535f07ac00a..445f48c1fd9 100644
> --- a/lib/tdb/include/tdb.h
> +++ b/lib/tdb/include/tdb.h
> @@ -481,6 +481,73 @@ int tdb_traverse(struct tdb_context *tdb, tdb_traverse_func fn, void *private_da
> int tdb_traverse_read(struct tdb_context *tdb, tdb_traverse_func fn, void *private_data);
>
> /**
> + * @brief Traverse a single hash chain
> + *
> + * Traverse a single hash chain under a single lock operation. No
> + * database modification is possible in the callback.
> + *
> + * This exists for background cleanup of databases. In normal
> + * operations, traversing a complete database can be much too
> + * expensive. Databases can have many chains, which will all have to
> + * be looked at before tdb_traverse finishes. Also tdb_traverse does a
> + * lot of fcntl activity to protect against concurrent record deletes.
> + *
> + * With this you can walk a fraction of the whole tdb, collect the
> + * entries you want to prune, leave the traverse, and then modify or
> + * delete the records in a subsequent step.
> + *
> + * To walk the entire database, call this function tdb_hash_size()
> + * times, with 0<=chain<tdb_hash_size(tdb).
> + *
> + * @param[in] tdb The database to traverse.
> + *
> + * @param[in] chain The hash chain number to traverse.
> + *
> + * @param[in] fn The function to call on each entry.
> + *
> + * @param[in] private_data The private data which should be passed to the
> + * traversing function.
> + *
> + * @return The record count traversed, -1 on error.
> + */
> +
> +int tdb_traverse_chain(struct tdb_context *tdb,
> + unsigned chain,
> + tdb_traverse_func fn,
> + void *private_data);
> +
> +/**
> + * @brief Traverse a single hash chain
> + *
> + * This is like tdb_traverse_chain(), but for all records that are in
> + * the same chain as the record corresponding to the key parameter.
> + *
> + * Use it for ongoing database maintenance under a lock. Whenever you
> + * are about to modify a database, you know which record you are going
> + * to write to. For that tdb_store(), an exclusive chainlock is taken
> + * behind the scenes. To utilize this exclusive lock for incremental
> + * database cleanup as well, tdb_chainlock() the key you are about to
> + * modify, then tdb_traverse_key_chain() to do the incremental
> + * maintenance, modify your record and tdb_chainunlock() the key
> + * again.
> + *
> + * @param[in] tdb The database to traverse.
> + *
> + * @param[in] key The record key to walk the chain for.
> + *
> + * @param[in] fn The function to call on each entry.
> + *
> + * @param[in] private_data The private data which should be passed to the
> + * traversing function.
> + *
> + * @return The record count traversed, -1 on error.
> + */
> +
> +int tdb_traverse_key_chain(struct tdb_context *tdb,
> + TDB_DATA key,
> + tdb_traverse_func fn,
> + void *private_data);
> +/**
> * @brief Check if an entry in the database exists.
> *
> * @note 1 is returned if the key is found and 0 is returned if not found this
> --
> 2.11.0
>
>
> From 4a83f569412e27260f0f3edcfbb6abf031841904 Mon Sep 17 00:00:00 2001
> From: Volker Lendecke <vl at samba.org>
> Date: Sun, 7 Oct 2018 22:03:09 +0200
> Subject: [PATCH 2/9] tdb: Add test for tdb_traverse_chain
>
> Signed-off-by: Volker Lendecke <vl at samba.org>
> ---
> lib/tdb/test/run-traverse-chain.c | 94 +++++++++++++++++++++++++++++++++++++++
> lib/tdb/wscript | 1 +
> 2 files changed, 95 insertions(+)
> create mode 100644 lib/tdb/test/run-traverse-chain.c
>
> diff --git a/lib/tdb/test/run-traverse-chain.c b/lib/tdb/test/run-traverse-chain.c
> new file mode 100644
> index 00000000000..2a25beca3f0
> --- /dev/null
> +++ b/lib/tdb/test/run-traverse-chain.c
> @@ -0,0 +1,94 @@
> +#include "../common/tdb_private.h"
> +#include "../common/io.c"
> +#include "../common/tdb.c"
> +#include "../common/lock.c"
> +#include "../common/freelist.c"
> +#include "../common/traverse.c"
> +#include "../common/transaction.c"
> +#include "../common/error.c"
> +#include "../common/open.c"
> +#include "../common/check.c"
> +#include "../common/hash.c"
> +#include "../common/mutex.c"
> +#include "tap-interface.h"
> +#include <stdlib.h>
> +#include "logging.h"
> +
> +static char keystr0[] = "x";
> +static TDB_DATA key0 = { .dptr = (uint8_t *)keystr0,
> + .dsize = sizeof(keystr0) };
> +static char valuestr0[] = "y";
> +static TDB_DATA value0 = { .dptr = (uint8_t *)valuestr0,
> + .dsize = sizeof(valuestr0) };
> +
> +static char keystr1[] = "aaa";
> +static TDB_DATA key1 = { .dptr = (uint8_t *)keystr1,
> + .dsize = sizeof(keystr1) };
> +static char valuestr1[] = "bbbbb";
> +static TDB_DATA value1 = { .dptr = (uint8_t *)valuestr1,
> + .dsize = sizeof(valuestr1) };
> +
> +static TDB_DATA *keys[] = { &key0, &key1 };
> +static TDB_DATA *values[] = { &value0, &value1 };
> +
> +static bool tdb_data_same(TDB_DATA d1, TDB_DATA d2)
> +{
> + if (d1.dsize != d2.dsize) {
> + return false;
> + }
> + return (memcmp(d1.dptr, d2.dptr, d1.dsize) == 0);
> +}
> +
> +struct traverse_chain_state {
> + size_t idx;
> + bool ok;
> +};
> +
> +static int traverse_chain_fn(struct tdb_context *tdb,
> + TDB_DATA key,
> + TDB_DATA data,
> + void *private_data)
> +{
> + struct traverse_chain_state *state = private_data;
> +
> + state->ok &= tdb_data_same(key, *keys[state->idx]);
> + state->ok &= tdb_data_same(data, *values[state->idx]);
> + state->idx += 1;
> +
> + return 0;
> +}
> +
> +int main(int argc, char *argv[])
> +{
> + struct tdb_context *tdb;
> + struct traverse_chain_state state = { .ok = true };
> + int ret;
> +
> + plan_tests(4);
> +
> + tdb = tdb_open_ex(
> + "traverse_chain.tdb",
> + 1,
> + TDB_CLEAR_IF_FIRST,
> + O_RDWR|O_CREAT,
> + 0600,
> + &taplogctx,
> + NULL);
> + ok1(tdb);
> +
> + /* add in reverse order, tdb_store adds to the front of the list */
> + ret = tdb_store(tdb, key1, value1, TDB_INSERT);
> + ok1(ret == 0);
> + ret = tdb_store(tdb, key0, value0, TDB_INSERT);
> + ok1(ret == 0);
> +
> + ret = tdb_traverse_key_chain(tdb, key0, traverse_chain_fn, &state);
> + ok1(ret == 2);
> + ok1(state.ok);
> +
> + unlink(tdb_name(tdb));
> +
> + tdb_close(tdb);
> +
> + return exit_status();
> +}
> diff --git a/lib/tdb/wscript b/lib/tdb/wscript
> index 3ab21fcebc1..483d8e0ebec 100644
> --- a/lib/tdb/wscript
> +++ b/lib/tdb/wscript
> @@ -57,6 +57,7 @@ tdb1_unit_tests = [
> 'run-mutex1',
> 'run-circular-chain',
> 'run-circular-freelist',
> + 'run-traverse-chain',
> ]
>
> def options(opt):
> --
> 2.11.0
>
>
> From c15081209e60090f18a7b4b137079efbde14a867 Mon Sep 17 00:00:00 2001
> From: Volker Lendecke <vl at samba.org>
> Date: Mon, 29 Oct 2018 07:43:43 +0100
> Subject: [PATCH 3/9] tdb: Version 1.3.17 for tdb_traverse_chain
>
> Signed-off-by: Volker Lendecke <vl at samba.org>
> ---
> lib/tdb/ABI/tdb-1.3.17.sigs | 73 +++++++++++++++++++++++++++++++++++++++++++++
> lib/tdb/wscript | 2 +-
> 2 files changed, 74 insertions(+), 1 deletion(-)
> create mode 100644 lib/tdb/ABI/tdb-1.3.17.sigs
>
> diff --git a/lib/tdb/ABI/tdb-1.3.17.sigs b/lib/tdb/ABI/tdb-1.3.17.sigs
> new file mode 100644
> index 00000000000..e2b0427c347
> --- /dev/null
> +++ b/lib/tdb/ABI/tdb-1.3.17.sigs
> @@ -0,0 +1,73 @@
> +tdb_add_flags: void (struct tdb_context *, unsigned int)
> +tdb_append: int (struct tdb_context *, TDB_DATA, TDB_DATA)
> +tdb_chainlock: int (struct tdb_context *, TDB_DATA)
> +tdb_chainlock_mark: int (struct tdb_context *, TDB_DATA)
> +tdb_chainlock_nonblock: int (struct tdb_context *, TDB_DATA)
> +tdb_chainlock_read: int (struct tdb_context *, TDB_DATA)
> +tdb_chainlock_read_nonblock: int (struct tdb_context *, TDB_DATA)
> +tdb_chainlock_unmark: int (struct tdb_context *, TDB_DATA)
> +tdb_chainunlock: int (struct tdb_context *, TDB_DATA)
> +tdb_chainunlock_read: int (struct tdb_context *, TDB_DATA)
> +tdb_check: int (struct tdb_context *, int (*)(TDB_DATA, TDB_DATA, void *), void *)
> +tdb_close: int (struct tdb_context *)
> +tdb_delete: int (struct tdb_context *, TDB_DATA)
> +tdb_dump_all: void (struct tdb_context *)
> +tdb_enable_seqnum: void (struct tdb_context *)
> +tdb_error: enum TDB_ERROR (struct tdb_context *)
> +tdb_errorstr: const char *(struct tdb_context *)
> +tdb_exists: int (struct tdb_context *, TDB_DATA)
> +tdb_fd: int (struct tdb_context *)
> +tdb_fetch: TDB_DATA (struct tdb_context *, TDB_DATA)
> +tdb_firstkey: TDB_DATA (struct tdb_context *)
> +tdb_freelist_size: int (struct tdb_context *)
> +tdb_get_flags: int (struct tdb_context *)
> +tdb_get_logging_private: void *(struct tdb_context *)
> +tdb_get_seqnum: int (struct tdb_context *)
> +tdb_hash_size: int (struct tdb_context *)
> +tdb_increment_seqnum_nonblock: void (struct tdb_context *)
> +tdb_jenkins_hash: unsigned int (TDB_DATA *)
> +tdb_lock_nonblock: int (struct tdb_context *, int, int)
> +tdb_lockall: int (struct tdb_context *)
> +tdb_lockall_mark: int (struct tdb_context *)
> +tdb_lockall_nonblock: int (struct tdb_context *)
> +tdb_lockall_read: int (struct tdb_context *)
> +tdb_lockall_read_nonblock: int (struct tdb_context *)
> +tdb_lockall_unmark: int (struct tdb_context *)
> +tdb_log_fn: tdb_log_func (struct tdb_context *)
> +tdb_map_size: size_t (struct tdb_context *)
> +tdb_name: const char *(struct tdb_context *)
> +tdb_nextkey: TDB_DATA (struct tdb_context *, TDB_DATA)
> +tdb_null: dptr = 0xXXXX, dsize = 0
> +tdb_open: struct tdb_context *(const char *, int, int, int, mode_t)
> +tdb_open_ex: struct tdb_context *(const char *, int, int, int, mode_t, const struct tdb_logging_context *, tdb_hash_func)
> +tdb_parse_record: int (struct tdb_context *, TDB_DATA, int (*)(TDB_DATA, TDB_DATA, void *), void *)
> +tdb_printfreelist: int (struct tdb_context *)
> +tdb_remove_flags: void (struct tdb_context *, unsigned int)
> +tdb_reopen: int (struct tdb_context *)
> +tdb_reopen_all: int (int)
> +tdb_repack: int (struct tdb_context *)
> +tdb_rescue: int (struct tdb_context *, void (*)(TDB_DATA, TDB_DATA, void *), void *)
> +tdb_runtime_check_for_robust_mutexes: bool (void)
> +tdb_set_logging_function: void (struct tdb_context *, const struct tdb_logging_context *)
> +tdb_set_max_dead: void (struct tdb_context *, int)
> +tdb_setalarm_sigptr: void (struct tdb_context *, volatile sig_atomic_t *)
> +tdb_store: int (struct tdb_context *, TDB_DATA, TDB_DATA, int)
> +tdb_storev: int (struct tdb_context *, TDB_DATA, const TDB_DATA *, int, int)
> +tdb_summary: char *(struct tdb_context *)
> +tdb_transaction_active: bool (struct tdb_context *)
> +tdb_transaction_cancel: int (struct tdb_context *)
> +tdb_transaction_commit: int (struct tdb_context *)
> +tdb_transaction_prepare_commit: int (struct tdb_context *)
> +tdb_transaction_start: int (struct tdb_context *)
> +tdb_transaction_start_nonblock: int (struct tdb_context *)
> +tdb_transaction_write_lock_mark: int (struct tdb_context *)
> +tdb_transaction_write_lock_unmark: int (struct tdb_context *)
> +tdb_traverse: int (struct tdb_context *, tdb_traverse_func, void *)
> +tdb_traverse_chain: int (struct tdb_context *, unsigned int, tdb_traverse_func, void *)
> +tdb_traverse_key_chain: int (struct tdb_context *, TDB_DATA, tdb_traverse_func, void *)
> +tdb_traverse_read: int (struct tdb_context *, tdb_traverse_func, void *)
> +tdb_unlock: int (struct tdb_context *, int, int)
> +tdb_unlockall: int (struct tdb_context *)
> +tdb_unlockall_read: int (struct tdb_context *)
> +tdb_validate_freelist: int (struct tdb_context *, int *)
> +tdb_wipe_all: int (struct tdb_context *)
> diff --git a/lib/tdb/wscript b/lib/tdb/wscript
> index 483d8e0ebec..6a6adabc4c1 100644
> --- a/lib/tdb/wscript
> +++ b/lib/tdb/wscript
> @@ -1,7 +1,7 @@
> #!/usr/bin/env python
>
> APPNAME = 'tdb'
> -VERSION = '1.3.16'
> +VERSION = '1.3.17'
>
> import sys, os
>
> --
> 2.11.0
>
>
> From 3488b6fc3324eceac763d3b0c156e91c52fbc4ce Mon Sep 17 00:00:00 2001
> From: Volker Lendecke <vl at samba.org>
> Date: Mon, 22 Oct 2018 08:57:00 +0200
> Subject: [PATCH 4/9] tdb: Allow !CLEAR_IF_FIRST & MUTEX_LOCKING
>
> This is a prerequisite to allow gencache to run on a non-transactioned
> database with mutexes.
>
> Signed-off-by: Volker Lendecke <vl at samba.org>
> ---
> lib/tdb/common/open.c | 91 +++++++++++++++++--------------------
> lib/tdb/test/run-mutex-openflags2.c | 7 ---
> 2 files changed, 42 insertions(+), 56 deletions(-)
>
> diff --git a/lib/tdb/common/open.c b/lib/tdb/common/open.c
> index 899a2fcaffb..be5f8075557 100644
> --- a/lib/tdb/common/open.c
> +++ b/lib/tdb/common/open.c
> @@ -230,8 +230,6 @@ static bool check_header_hash(struct tdb_context *tdb,
> static bool tdb_mutex_open_ok(struct tdb_context *tdb,
> const struct tdb_header *header)
> {
> - int locked;
> -
> if (tdb->flags & TDB_NOLOCK) {
> /*
> * We don't look at locks, so it does not matter to have a
> @@ -240,37 +238,6 @@ static bool tdb_mutex_open_ok(struct tdb_context *tdb,
> return true;
> }
>
> - locked = tdb_nest_lock(tdb, ACTIVE_LOCK, F_WRLCK,
> - TDB_LOCK_NOWAIT|TDB_LOCK_PROBE);
> -
> - if ((locked == -1) && (tdb->ecode == TDB_ERR_LOCK)) {
> - /*
> - * CLEAR_IF_FIRST still active. The tdb was created on this
> - * host, so we can assume the mutex implementation is
> - * compatible. Important for tools like tdbdump on a still
> - * open locking.tdb.
> - */
> - goto check_local_settings;
> - }
> -
> - /*
> - * We got the CLEAR_IF_FIRST lock. That means the database was
> - * potentially copied from somewhere else. The mutex implementation
> - * might be incompatible.
> - */
> -
> - if (tdb_nest_unlock(tdb, ACTIVE_LOCK, F_WRLCK, false) == -1) {
> - /*
> - * Should not happen
> - */
> - TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_mutex_open_ok: "
> - "failed to release ACTIVE_LOCK on %s: %s\n",
> - tdb->name, strerror(errno)));
> - return false;
> - }
> -
> -check_local_settings:
> -
> if (!(tdb->flags & TDB_MUTEX_LOCKING)) {
> TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_mutex_open_ok[%s]: "
> "Can use mutexes only with "
> @@ -416,14 +383,6 @@ _PUBLIC_ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int td
> * the runtime check for existing tdb's comes later.
> */
>
> - if (!(tdb->flags & TDB_CLEAR_IF_FIRST)) {
> - TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: "
> - "invalid flags for %s - TDB_MUTEX_LOCKING "
> - "requires TDB_CLEAR_IF_FIRST\n", name));
> - errno = EINVAL;
> - goto fail;
> - }
> -
> if (tdb->flags & TDB_INTERNAL) {
> TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: "
> "invalid flags for %s - TDB_MUTEX_LOCKING and "
> @@ -632,6 +591,30 @@ _PUBLIC_ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int td
> * mutex locking.
> */
> tdb->hdr_ofs = header.mutex_size;
> +
> + if ((!(tdb_flags & TDB_CLEAR_IF_FIRST)) && (!tdb->read_only)) {
> + /*
> + * Open an existing mutexed tdb, but without
> + * CLEAR_IF_FIRST. We need to initialize the
> + * mutex array and keep the CLEAR_IF_FIRST
> + * lock locked.
> + */
> + ret = tdb_nest_lock(tdb, ACTIVE_LOCK, F_WRLCK,
> + TDB_LOCK_NOWAIT|TDB_LOCK_PROBE);
> + locked = (ret == 0);
> +
> + if (locked) {
> + ret = tdb_mutex_init(tdb);
> + if (ret == -1) {
> + TDB_LOG((tdb,
> + TDB_DEBUG_FATAL,
> + "tdb_open_ex: tdb_mutex_init "
> + "failed for ""%s: %s\n",
> + name, strerror(errno)));
> + goto fail;
> + }
> + }
> + }
> }
>
> if ((header.magic1_hash == 0) && (header.magic2_hash == 0)) {
> @@ -708,15 +691,19 @@ _PUBLIC_ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int td
> goto fail;
> }
>
> +
> }
>
> - /* We always need to do this if the CLEAR_IF_FIRST flag is set, even if
> - we didn't get the initial exclusive lock as we need to let all other
> - users know we're using it. */
> + if (locked || (tdb_flags & TDB_CLEAR_IF_FIRST)) {
> + /*
> + * We always need to do this if the CLEAR_IF_FIRST
> + * flag is set, even if we didn't get the initial
> + * exclusive lock as we need to let all other users
> + * know we're using it.
> + */
>
> - if (tdb_flags & TDB_CLEAR_IF_FIRST) {
> - /* leave this lock in place to indicate it's in use */
> - if (tdb_nest_lock(tdb, ACTIVE_LOCK, F_RDLCK, TDB_LOCK_WAIT) == -1) {
> + ret = tdb_nest_lock(tdb, ACTIVE_LOCK, F_RDLCK, TDB_LOCK_WAIT);
> + if (ret == -1) {
> goto fail;
> }
> }
> @@ -932,7 +919,10 @@ fail:
> seek pointer from our parent and to re-establish locks */
> _PUBLIC_ int tdb_reopen(struct tdb_context *tdb)
> {
> - return tdb_reopen_internal(tdb, tdb->flags & TDB_CLEAR_IF_FIRST);
> + bool active_lock;
> + active_lock = (tdb->flags & (TDB_CLEAR_IF_FIRST|TDB_MUTEX_LOCKING));
> +
> + return tdb_reopen_internal(tdb, active_lock);
> }
>
> /* reopen all tdb's */
> @@ -941,7 +931,10 @@ _PUBLIC_ int tdb_reopen_all(int parent_longlived)
> struct tdb_context *tdb;
>
> for (tdb=tdbs; tdb; tdb = tdb->next) {
> - bool active_lock = (tdb->flags & TDB_CLEAR_IF_FIRST);
> + bool active_lock;
> +
> + active_lock =
> + (tdb->flags & (TDB_CLEAR_IF_FIRST|TDB_MUTEX_LOCKING));
>
> /*
> * If the parent is longlived (ie. a
> diff --git a/lib/tdb/test/run-mutex-openflags2.c b/lib/tdb/test/run-mutex-openflags2.c
> index 6522ae42faa..89603e638c2 100644
> --- a/lib/tdb/test/run-mutex-openflags2.c
> +++ b/lib/tdb/test/run-mutex-openflags2.c
> @@ -112,13 +112,6 @@ int main(int argc, char *argv[])
> data.dsize = strlen("world");
> data.dptr = discard_const_p(uint8_t, "world");
>
> - tdb = tdb_open_ex("mutex-openflags2.tdb", 0,
> - TDB_INCOMPATIBLE_HASH|
> - TDB_MUTEX_LOCKING,
> - O_RDWR|O_CREAT, 0755, &nolog_ctx, NULL);
> - ok((tdb == NULL) && (errno == EINVAL), "TDB_MUTEX_LOCKING without "
> - "TDB_CLEAR_IF_FIRST should fail with EINVAL - %d", errno);
> -
> if (!runtime_support) {
> tdb = tdb_open_ex("mutex-openflags2.tdb", 0,
> TDB_CLEAR_IF_FIRST|
> --
> 2.11.0
>
>
> From 58a769b9a932c39c58c7f01cce9e34e5ed181c6f Mon Sep 17 00:00:00 2001
> From: Volker Lendecke <vl at samba.org>
> Date: Wed, 10 Oct 2018 16:12:28 +0200
> Subject: [PATCH 5/9] gencache: Convert to a binary timestamp
>
> Two reasons:
>
> The ascii conversion shows up on profiles.
>
> In a further commit we will get checksums for gencache entries to
> protect at hidden corruption due to a crash on the non-transactioned
> gencache.tdb. Next to the timestamp this is a second field that is
> gencache metadata, and I don't want to deal with a second ascii number
> when at least some of the gencache values are binary already.
>
> Signed-off-by: Volker Lendecke <vl at samba.org>
> ---
> source3/lib/gencache.c | 37 ++++++-------------------------------
> 1 file changed, 6 insertions(+), 31 deletions(-)
>
> diff --git a/source3/lib/gencache.c b/source3/lib/gencache.c
> index f566534a1a6..5aa52a06e7c 100644
> --- a/source3/lib/gencache.c
> +++ b/source3/lib/gencache.c
> @@ -31,8 +31,6 @@
> #undef DBGC_CLASS
> #define DBGC_CLASS DBGC_TDB
>
> -#define CACHE_DATA_FMT "%12u/"
> -
> static struct tdb_wrap *cache;
> static struct tdb_wrap *cache_notrans;
>
> @@ -269,8 +267,6 @@ bool gencache_set_data_blob(const char *keystr, DATA_BLOB blob,
> {
> TDB_DATA key;
> int ret;
> - fstring hdr;
> - int hdr_len;
> time_t last_stabilize;
> static int writecount;
> TDB_DATA dbufs[2];
> @@ -297,13 +293,8 @@ bool gencache_set_data_blob(const char *keystr, DATA_BLOB blob,
> return true;
> }
>
> - hdr_len = fstr_sprintf(hdr, CACHE_DATA_FMT, (int)timeout);
> -
> - if (hdr_len == -1) {
> - return false;
> - }
> -
> - dbufs[0] = (TDB_DATA) { .dptr = (uint8_t *)hdr, .dsize = hdr_len };
> + dbufs[0] = (TDB_DATA) { .dptr = (uint8_t *)&timeout,
> + .dsize = sizeof(time_t) };
> dbufs[1] = (TDB_DATA) { .dptr = blob.data, .dsize = blob.length };
>
> DEBUG(10, ("Adding cache entry with key=[%s] and timeout="
> @@ -408,32 +399,16 @@ bool gencache_del(const char *keystr)
>
> static bool gencache_pull_timeout(TDB_DATA data, time_t *pres, DATA_BLOB *payload)
> {
> - time_t res;
> - char *slash = NULL;
> - char *endptr;
> -
> - if (data.dptr == NULL) {
> - return false;
> - }
> - slash = memchr(data.dptr, '/', data.dsize);
> - if (slash == NULL) {
> - return false;
> - }
> -
> - res = strtol((char *)data.dptr, &endptr, 10);
> -
> - if ((endptr == NULL) || (*endptr != '/')) {
> - DBG_WARNING("Invalid gencache data format\n");
> + if ((data.dptr == NULL) || (data.dsize < sizeof(time_t))) {
> return false;
> }
> if (pres != NULL) {
> - *pres = res;
> + memcpy(pres, data.dptr, sizeof(time_t));
> }
> if (payload != NULL) {
> - endptr += 1;
> *payload = (DATA_BLOB) {
> - .data = discard_const_p(uint8_t, endptr),
> - .length = data.dsize - PTR_DIFF(endptr, data.dptr),
> + .data = data.dptr + sizeof(time_t),
> + .length = data.dsize - sizeof(time_t),
> };
> }
> return true;
> --
> 2.11.0
>
>
> From ebb12be3cf534a6d6f039d88c709a78eff22d784 Mon Sep 17 00:00:00 2001
> From: Volker Lendecke <vl at samba.org>
> Date: Wed, 10 Oct 2018 16:53:10 +0200
> Subject: [PATCH 6/9] gencache: Add crc check
>
> This covers key, timestamp and data. This will detect silent
> corruption of gencache data after a system crash
>
> Signed-off-by: Volker Lendecke <vl at samba.org>
> ---
> source3/lib/gencache.c | 48 +++++++++++++++++++++++++++++++++++++++---------
> 1 file changed, 39 insertions(+), 9 deletions(-)
>
> diff --git a/source3/lib/gencache.c b/source3/lib/gencache.c
> index 5aa52a06e7c..ec7c397ad73 100644
> --- a/source3/lib/gencache.c
> +++ b/source3/lib/gencache.c
> @@ -27,6 +27,7 @@
> #include "system/glob.h"
> #include "util_tdb.h"
> #include "tdb_wrap/tdb_wrap.h"
> +#include "zlib.h"
>
> #undef DBGC_CLASS
> #define DBGC_CLASS DBGC_TDB
> @@ -269,7 +270,8 @@ bool gencache_set_data_blob(const char *keystr, DATA_BLOB blob,
> int ret;
> time_t last_stabilize;
> static int writecount;
> - TDB_DATA dbufs[2];
> + TDB_DATA dbufs[3];
> + uint32_t crc;
>
> if ((keystr == NULL) || (blob.data == NULL)) {
> return false;
> @@ -297,13 +299,21 @@ bool gencache_set_data_blob(const char *keystr, DATA_BLOB blob,
> .dsize = sizeof(time_t) };
> dbufs[1] = (TDB_DATA) { .dptr = blob.data, .dsize = blob.length };
>
> + crc = crc32(0, Z_NULL, 0);
> + crc = crc32(crc, key.dptr, key.dsize);
> + crc = crc32(crc, dbufs[0].dptr, dbufs[0].dsize);
> + crc = crc32(crc, dbufs[1].dptr, dbufs[1].dsize);
> +
> + dbufs[2] = (TDB_DATA) { .dptr = (uint8_t *)&crc,
> + .dsize = sizeof(crc) };
> +
> DEBUG(10, ("Adding cache entry with key=[%s] and timeout="
> "[%s] (%d seconds %s)\n", keystr,
> timestring(talloc_tos(), timeout),
> (int)(timeout - time(NULL)),
> timeout > time(NULL) ? "ahead" : "in the past"));
>
> - ret = tdb_storev(cache_notrans->tdb, key, dbufs, 2, 0);
> + ret = tdb_storev(cache_notrans->tdb, key, dbufs, ARRAY_SIZE(dbufs), 0);
> if (ret != 0) {
> return false;
> }
> @@ -397,18 +407,38 @@ bool gencache_del(const char *keystr)
> return result;
> }
>
> -static bool gencache_pull_timeout(TDB_DATA data, time_t *pres, DATA_BLOB *payload)
> +static bool gencache_pull_timeout(TDB_DATA key,
> + TDB_DATA data,
> + time_t *pres,
> + DATA_BLOB *payload)
> {
> - if ((data.dptr == NULL) || (data.dsize < sizeof(time_t))) {
> + size_t crc_ofs;
> + uint32_t crc, stored_crc;
> +
> + if ((data.dptr == NULL) ||
> + (data.dsize < (sizeof(time_t) + sizeof(uint32_t)))) {
> + return false;
> + }
> +
> + crc_ofs = data.dsize - sizeof(uint32_t);
> +
> + crc = crc32(0, Z_NULL, 0);
> + crc = crc32(crc, key.dptr, key.dsize);
> + crc = crc32(crc, data.dptr, crc_ofs);
> +
> + memcpy(&stored_crc, data.dptr + crc_ofs, sizeof(uint32_t));
> +
> + if (stored_crc != crc) {
> return false;
> }
> +
> if (pres != NULL) {
> memcpy(pres, data.dptr, sizeof(time_t));
> }
> if (payload != NULL) {
> *payload = (DATA_BLOB) {
> - .data = data.dptr + sizeof(time_t),
> - .length = data.dsize - sizeof(time_t),
> + .data = data.dptr+sizeof(time_t),
> + .length = data.dsize-sizeof(time_t)-sizeof(uint32_t),
> };
> }
> return true;
> @@ -429,7 +459,7 @@ static int gencache_parse_fn(TDB_DATA key, TDB_DATA data, void *private_data)
> DATA_BLOB payload;
> bool ret;
>
> - ret = gencache_pull_timeout(data, &t.timeout, &payload);
> + ret = gencache_pull_timeout(key, data, &t.timeout, &payload);
> if (!ret) {
> return -1;
> }
> @@ -692,7 +722,7 @@ static int stabilize_fn(struct tdb_context *tdb, TDB_DATA key, TDB_DATA val,
> return 0;
> }
>
> - if (!gencache_pull_timeout(val, &timeout, NULL)) {
> + if (!gencache_pull_timeout(key, val, &timeout, NULL)) {
> DEBUG(10, ("Ignoring invalid entry\n"));
> return 0;
> }
> @@ -817,7 +847,7 @@ static int gencache_iterate_blobs_fn(struct tdb_context *tdb, TDB_DATA key,
> }
> }
>
> - if (!gencache_pull_timeout(data, &timeout, &payload)) {
> + if (!gencache_pull_timeout(key, data, &timeout, &payload)) {
> goto done;
> }
>
> --
> 2.11.0
>
>
> From 37b513cee8d5927dd9de87c8baafb0504905d753 Mon Sep 17 00:00:00 2001
> From: Volker Lendecke <vl at samba.org>
> Date: Thu, 11 Oct 2018 12:52:40 +0200
> Subject: [PATCH 7/9] gencache: Remove transaction-based tdb
>
> At more than one large site I've seen significant problems due to
> gencache_stabilize. gencache_stabilize was mainly introduced to
> survive machine crashes with the cache still being in place. Given
> that most installations crash rarely and this is still a cache, this
> safety is overkill and causes real problems.
>
> With the recent changes to tdb, we should be safe enough to run on
> completely corrupted databases and properly detect errors. A further
> commit will introduce code that wipes the gencache.tdb if such a
> corruption is detected.
>
> There is one kind of corruption that we don't properly handle:
> Orphaned space in the database. I don't have a good idea yet how to
> handle this in a graceful and efficient way during normal operations,
> but maybe this idea pops up at some point.
>
> Signed-off-by: Volker Lendecke <vl at samba.org>
> ---
> source3/lib/gencache.c | 423 +----------------------------------
> source3/lib/gencache.h | 1 -
> source3/nmbd/nmbd.c | 2 -
> source3/smbd/server_exit.c | 1 -
> source3/torture/test_namemap_cache.c | 2 -
> source3/utils/net.c | 2 -
> source3/utils/net_cache.c | 26 ---
> source3/winbindd/winbindd.c | 2 -
> 8 files changed, 9 insertions(+), 450 deletions(-)
>
> diff --git a/source3/lib/gencache.c b/source3/lib/gencache.c
> index ec7c397ad73..1d8349c5702 100644
> --- a/source3/lib/gencache.c
> +++ b/source3/lib/gencache.c
> @@ -33,7 +33,6 @@
> #define DBGC_CLASS DBGC_TDB
>
> static struct tdb_wrap *cache;
> -static struct tdb_wrap *cache_notrans;
>
> /**
> * @file gencache.c
> @@ -72,7 +71,7 @@ static bool gencache_init(void)
>
> hash_size = lp_parm_int(-1, "gencache", "hash_size", 10000);
>
> - cache_fname = cache_path(talloc_tos(), "gencache.tdb");
> + cache_fname = lock_path(talloc_tos(), "gencache.tdb");
> if (cache_fname == NULL) {
> return false;
> }
> @@ -80,44 +79,14 @@ static bool gencache_init(void)
> DEBUG(5, ("Opening cache file at %s\n", cache_fname));
>
> cache = tdb_wrap_open(NULL, cache_fname, hash_size,
> - TDB_DEFAULT|TDB_INCOMPATIBLE_HASH,
> + TDB_INCOMPATIBLE_HASH|
> + TDB_NOSYNC|
> + TDB_MUTEX_LOCKING,
> open_flags, 0644);
> -
> - if (!cache && (errno == EACCES)) {
> - open_flags = O_RDONLY;
> - cache = tdb_wrap_open(NULL, cache_fname, hash_size,
> - TDB_DEFAULT|TDB_INCOMPATIBLE_HASH,
> - open_flags, 0644);
> - if (cache) {
> - DEBUG(5, ("gencache_init: Opening cache file %s read-only.\n", cache_fname));
> - }
> - }
> - TALLOC_FREE(cache_fname);
> -
> - if (!cache) {
> - DEBUG(5, ("Attempt to open gencache.tdb has failed.\n"));
> - return false;
> - }
> -
> - cache_fname = lock_path(talloc_tos(), "gencache_notrans.tdb");
> - if (cache_fname == NULL) {
> - TALLOC_FREE(cache);
> - return false;
> - }
> -
> - DEBUG(5, ("Opening cache file at %s\n", cache_fname));
> -
> - cache_notrans = tdb_wrap_open(NULL, cache_fname, hash_size,
> - TDB_CLEAR_IF_FIRST|
> - TDB_INCOMPATIBLE_HASH|
> - TDB_NOSYNC|
> - TDB_MUTEX_LOCKING,
> - open_flags, 0644);
> - if (cache_notrans == NULL) {
> + if (cache == NULL) {
> DEBUG(5, ("Opening %s failed: %s\n", cache_fname,
> strerror(errno)));
> TALLOC_FREE(cache_fname);
> - TALLOC_FREE(cache);
> return false;
> }
> TALLOC_FREE(cache_fname);
> @@ -125,132 +94,6 @@ static bool gencache_init(void)
> return true;
> }
>
> -static TDB_DATA last_stabilize_key(void)
> -{
> - const char key[] = "@LAST_STABILIZED";
> -
> - return (TDB_DATA) {
> - .dptr = discard_const_p(uint8_t, key),
> - .dsize = sizeof(key),
> - };
> -}
> -
> -struct gencache_have_val_state {
> - time_t new_timeout;
> - const DATA_BLOB *data;
> - bool gotit;
> -};
> -
> -static void gencache_have_val_parser(const struct gencache_timeout *old_timeout,
> - DATA_BLOB data,
> - void *private_data)
> -{
> - struct gencache_have_val_state *state =
> - (struct gencache_have_val_state *)private_data;
> - time_t now = time(NULL);
> - int cache_time_left, new_time_left, additional_time;
> -
> - /*
> - * Excuse the many variables, but these time calculations are
> - * confusing to me. We do not want to write to gencache with a
> - * possibly expensive transaction if we are about to write the same
> - * value, just extending the remaining timeout by less than 10%.
> - */
> -
> - cache_time_left = old_timeout->timeout - now;
> - if (cache_time_left <= 0) {
> - /*
> - * timed out, write new value
> - */
> - return;
> - }
> -
> - new_time_left = state->new_timeout - now;
> - if (new_time_left <= 0) {
> - /*
> - * Huh -- no new timeout?? Write it.
> - */
> - return;
> - }
> -
> - if (new_time_left < cache_time_left) {
> - /*
> - * Someone wants to shorten the timeout. Let it happen.
> - */
> - return;
> - }
> -
> - /*
> - * By how much does the new timeout extend the remaining cache time?
> - */
> - additional_time = new_time_left - cache_time_left;
> -
> - if (additional_time * 10 < 0) {
> - /*
> - * Integer overflow. We extend by so much that we have to write it.
> - */
> - return;
> - }
> -
> - /*
> - * The comparison below is essentially equivalent to
> - *
> - * new_time_left > cache_time_left * 1.10
> - *
> - * but without floating point calculations.
> - */
> -
> - if (additional_time * 10 > cache_time_left) {
> - /*
> - * We extend the cache timeout by more than 10%. Do it.
> - */
> - return;
> - }
> -
> - /*
> - * Now the more expensive data compare.
> - */
> - if (data_blob_cmp(state->data, &data) != 0) {
> - /*
> - * Write a new value. Certainly do it.
> - */
> - return;
> - }
> -
> - /*
> - * Extending the timeout by less than 10% for the same cache value is
> - * not worth the trouble writing a value into gencache under a
> - * possibly expensive transaction.
> - */
> - state->gotit = true;
> -}
> -
> -static bool gencache_have_val(const char *keystr, const DATA_BLOB *data,
> - time_t timeout)
> -{
> - struct gencache_have_val_state state;
> -
> - state.new_timeout = timeout;
> - state.data = data;
> - state.gotit = false;
> -
> - if (!gencache_parse(keystr, gencache_have_val_parser, &state)) {
> - return false;
> - }
> - return state.gotit;
> -}
> -
> -static int last_stabilize_parser(TDB_DATA key, TDB_DATA data,
> - void *private_data)
> -{
> - time_t *last_stabilize = private_data;
> -
> - if ((data.dsize != 0) && (data.dptr[data.dsize-1] == '\0')) {
> - *last_stabilize = atoi((char *)data.dptr);
> - }
> - return 0;
> -}
> -
> /**
> * Set an entry in the cache file. If there's no such
> * one, then add it.
> @@ -268,8 +111,6 @@ bool gencache_set_data_blob(const char *keystr, DATA_BLOB blob,
> {
> TDB_DATA key;
> int ret;
> - time_t last_stabilize;
> - static int writecount;
> TDB_DATA dbufs[3];
> uint32_t crc;
>
> @@ -279,22 +120,10 @@ bool gencache_set_data_blob(const char *keystr, DATA_BLOB blob,
>
> key = string_term_tdb_data(keystr);
>
> - ret = tdb_data_cmp(key, last_stabilize_key());
> - if (ret == 0) {
> - DEBUG(10, ("Can't store %s as a key\n", keystr));
> - return false;
> - }
> -
> if (!gencache_init()) {
> return false;
> }
>
> - if ((timeout != 0) && gencache_have_val(keystr, &blob, timeout)) {
> - DEBUG(10, ("Did not store value for %s, we already got it\n",
> - keystr));
> - return true;
> - }
> -
> dbufs[0] = (TDB_DATA) { .dptr = (uint8_t *)&timeout,
> .dsize = sizeof(time_t) };
> dbufs[1] = (TDB_DATA) { .dptr = blob.data, .dsize = blob.length };
> @@ -313,53 +142,11 @@ bool gencache_set_data_blob(const char *keystr, DATA_BLOB blob,
> (int)(timeout - time(NULL)),
> timeout > time(NULL) ? "ahead" : "in the past"));
>
> - ret = tdb_storev(cache_notrans->tdb, key, dbufs, ARRAY_SIZE(dbufs), 0);
> - if (ret != 0) {
> - return false;
> - }
> -
> - /*
> - * Every 100 writes within a single process, stabilize the cache with
> - * a transaction. This is done to prevent a single transaction to
> - * become huge and chew lots of memory.
> - */
> - writecount += 1;
> - if (writecount > lp_parm_int(-1, "gencache", "stabilize_count", 100)) {
> - gencache_stabilize();
> - writecount = 0;
> - goto done;
> - }
> -
> - /*
> - * Every 5 minutes, call gencache_stabilize() to not let grow
> - * gencache_notrans.tdb too large.
> - */
> + ret = tdb_storev(cache->tdb, key, dbufs, ARRAY_SIZE(dbufs), 0);
>
> - last_stabilize = 0;
> -
> - tdb_parse_record(cache_notrans->tdb, last_stabilize_key(),
> - last_stabilize_parser, &last_stabilize);
> -
> - if ((last_stabilize
> - + lp_parm_int(-1, "gencache", "stabilize_interval", 300))
> - < time(NULL)) {
> - gencache_stabilize();
> - }
> -
> -done:
> return ret == 0;
> }
>
> -static void gencache_del_parser(const struct gencache_timeout *t,
> - DATA_BLOB blob,
> - void *private_data)
> -{
> - if (t->timeout != 0) {
> - bool *exists = private_data;
> - *exists = true;
> - }
> -}
> -
> /**
> * Delete one entry from the cache file.
> *
> @@ -372,8 +159,6 @@ static void gencache_del_parser(const struct gencache_timeout *t,
> bool gencache_del(const char *keystr)
> {
> TDB_DATA key = string_term_tdb_data(keystr);
> - bool exists = false;
> - bool result = false;
> int ret;
>
> if (keystr == NULL) {
> @@ -386,25 +171,9 @@ bool gencache_del(const char *keystr)
>
> DEBUG(10, ("Deleting cache entry (key=[%s])\n", keystr));
>
> - ret = tdb_chainlock(cache_notrans->tdb, key);
> - if (ret == -1) {
> - return false;
> - }
> -
> - gencache_parse(keystr, gencache_del_parser, &exists);
> -
> - if (exists) {
> - /*
> - * We delete an element by setting its timeout to
> - * 0. This way we don't have to do a transaction on
> - * gencache.tdb every time we delete an element.
> - */
> - result = gencache_set(keystr, "", 0);
> - }
> -
> - tdb_chainunlock(cache_notrans->tdb, key);
> + ret = tdb_delete(cache->tdb, key);
>
> - return result;
> + return (ret != -1);
> }
>
> static bool gencache_pull_timeout(TDB_DATA key,
> @@ -449,7 +218,6 @@ struct gencache_parse_state {
> DATA_BLOB blob,
> void *private_data);
> void *private_data;
> - bool copy_to_notrans;
> };
>
> static int gencache_parse_fn(TDB_DATA key, TDB_DATA data, void *private_data)
> @@ -466,10 +234,6 @@ static int gencache_parse_fn(TDB_DATA key, TDB_DATA data, void *private_data)
> state = (struct gencache_parse_state *)private_data;
> state->parser(&t, payload, state->private_data);
>
> - if (state->copy_to_notrans) {
> - tdb_store(cache_notrans->tdb, key, data, 0);
> - }
> -
> return 0;
> }
>
> @@ -486,44 +250,19 @@ bool gencache_parse(const char *keystr,
> if (keystr == NULL) {
> return false;
> }
> - if (tdb_data_cmp(key, last_stabilize_key()) == 0) {
> - return false;
> - }
> if (!gencache_init()) {
> return false;
> }
>
> state.parser = parser;
> state.private_data = private_data;
> - state.copy_to_notrans = false;
>
> - ret = tdb_chainlock(cache_notrans->tdb, key);
> - if (ret != 0) {
> - return false;
> - }
> -
> - ret = tdb_parse_record(cache_notrans->tdb, key,
> + ret = tdb_parse_record(cache->tdb, key,
> gencache_parse_fn, &state);
> if (ret == 0) {
> - tdb_chainunlock(cache_notrans->tdb, key);
> return true;
> }
>
> - state.copy_to_notrans = true;
> -
> - ret = tdb_parse_record(cache->tdb, key, gencache_parse_fn, &state);
> -
> - if ((ret == -1) && (tdb_error(cache->tdb) == TDB_ERR_NOEXIST)) {
> - /*
> - * The record does not exist. Set a delete-marker in
> - * gencache_notrans, so that we don't have to look at
> - * the fcntl-based cache again.
> - */
> - gencache_set(keystr, "", 0);
> - }
> -
> - tdb_chainunlock(cache_notrans->tdb, key);
> -
> return (ret == 0);
> }
>
> @@ -617,138 +356,6 @@ fail:
> return false;
> }
>
> -struct stabilize_state {
> - bool written;
> -};
> -static int stabilize_fn(struct tdb_context *tdb, TDB_DATA key, TDB_DATA val,
> - void *priv);
> -
> -/**
> - * Stabilize gencache
> - *
> - * Migrate the clear-if-first gencache data to the stable,
> - * transaction-based gencache.tdb
> - */
> -
> -bool gencache_stabilize(void)
> -{
> - struct stabilize_state state;
> - int res;
> - char *now;
> -
> - if (!gencache_init()) {
> - return false;
> - }
> -
> - res = tdb_transaction_start_nonblock(cache->tdb);
> - if (res != 0) {
> - if (tdb_error(cache->tdb) == TDB_ERR_NOLOCK)
> - {
> - /*
> - * Someone else already does the stabilize,
> - * this does not have to be done twice
> - */
> - return true;
> - }
> -
> - DEBUG(10, ("Could not start transaction on gencache.tdb: "
> - "%s\n", tdb_errorstr(cache->tdb)));
> - return false;
> - }
> -
> - res = tdb_lockall_nonblock(cache_notrans->tdb);
> - if (res != 0) {
> - tdb_transaction_cancel(cache->tdb);
> - DEBUG(10, ("Could not get allrecord lock on "
> - "gencache_notrans.tdb: %s\n",
> - tdb_errorstr(cache_notrans->tdb)));
> - return false;
> - }
> -
> - state.written = false;
> -
> - res = tdb_traverse(cache_notrans->tdb, stabilize_fn, &state);
> - if (res < 0) {
> - tdb_unlockall(cache_notrans->tdb);
> - tdb_transaction_cancel(cache->tdb);
> - return false;
> - }
> -
> - if (!state.written) {
> - tdb_unlockall(cache_notrans->tdb);
> - tdb_transaction_cancel(cache->tdb);
> - return true;
> - }
> -
> - res = tdb_transaction_commit(cache->tdb);
> - if (res != 0) {
> - DEBUG(10, ("tdb_transaction_commit on gencache.tdb failed: "
> - "%s\n", tdb_errorstr(cache->tdb)));
> - tdb_unlockall(cache_notrans->tdb);
> - return false;
> - }
> -
> - res = tdb_wipe_all(cache_notrans->tdb);
> - if (res < 0) {
> - DBG_DEBUG("tdb_wipe_all on gencache_notrans.tdb failed: %s\n",
> - tdb_errorstr(cache_notrans->tdb));
> - }
> -
> - now = talloc_asprintf(talloc_tos(), "%d", (int)time(NULL));
> - if (now != NULL) {
> - tdb_store(cache_notrans->tdb, last_stabilize_key(),
> - string_term_tdb_data(now), 0);
> - TALLOC_FREE(now);
> - }
> -
> - res = tdb_unlockall(cache_notrans->tdb);
> - if (res != 0) {
> - DEBUG(10, ("tdb_unlockall on gencache.tdb failed: "
> - "%s\n", tdb_errorstr(cache->tdb)));
> - return false;
> - }
> -
> - return true;
> -}
> -
> -static int stabilize_fn(struct tdb_context *tdb, TDB_DATA key, TDB_DATA val,
> - void *priv)
> -{
> - struct stabilize_state *state = (struct stabilize_state *)priv;
> - int res;
> - time_t timeout;
> -
> - if (tdb_data_cmp(key, last_stabilize_key()) == 0) {
> - return 0;
> - }
> -
> - if (!gencache_pull_timeout(key, val, &timeout, NULL)) {
> - DEBUG(10, ("Ignoring invalid entry\n"));
> - return 0;
> - }
> - if ((timeout < time(NULL)) || (val.dsize == 0)) {
> - res = tdb_delete(cache->tdb, key);
> - if (res == 0) {
> - state->written = true;
> - } else if (tdb_error(cache->tdb) == TDB_ERR_NOEXIST) {
> - res = 0;
> - }
> - } else {
> - res = tdb_store(cache->tdb, key, val, 0);
> - if (res == 0) {
> - state->written = true;
> - }
> - }
> -
> - if (res != 0) {
> - DEBUG(10, ("Transfer to gencache.tdb failed: %s\n",
> - tdb_errorstr(cache->tdb)));
> - return -1;
> - }
> -
> - return 0;
> -}
> -
> /**
> * Get existing entry from the cache file.
> *
> @@ -816,7 +423,6 @@ struct gencache_iterate_blobs_state {
> time_t timeout, void *private_data);
> const char *pattern;
> void *private_data;
> - bool in_persistent;
> };
>
> static int gencache_iterate_blobs_fn(struct tdb_context *tdb, TDB_DATA key,
> @@ -829,13 +435,6 @@ static int gencache_iterate_blobs_fn(struct tdb_context *tdb, TDB_DATA key,
> time_t timeout;
> DATA_BLOB payload;
>
> - if (tdb_data_cmp(key, last_stabilize_key()) == 0) {
> - return 0;
> - }
> - if (state->in_persistent && tdb_exists(cache_notrans->tdb, key)) {
> - return 0;
> - }
> -
> if (key.dptr[key.dsize-1] == '\0') {
> keystr = (char *)key.dptr;
> } else {
> @@ -887,10 +486,6 @@ void gencache_iterate_blobs(void (*fn)(const char *key, DATA_BLOB value,
> state.pattern = pattern;
> state.private_data = private_data;
>
> - state.in_persistent = false;
> - tdb_traverse(cache_notrans->tdb, gencache_iterate_blobs_fn, &state);
> -
> - state.in_persistent = true;
> tdb_traverse(cache->tdb, gencache_iterate_blobs_fn, &state);
> }
>
> diff --git a/source3/lib/gencache.h b/source3/lib/gencache.h
> index 3d8ebe53872..ccf16e9d2c2 100644
> --- a/source3/lib/gencache.h
> +++ b/source3/lib/gencache.h
> @@ -48,7 +48,6 @@ bool gencache_parse(const char *keystr,
> bool gencache_get_data_blob(const char *keystr, TALLOC_CTX *mem_ctx,
> DATA_BLOB *blob,
> time_t *timeout, bool *was_expired);
> -bool gencache_stabilize(void);
> bool gencache_set_data_blob(const char *keystr, DATA_BLOB blob,
> time_t timeout);
> void gencache_iterate_blobs(void (*fn)(const char *key, DATA_BLOB value,
> diff --git a/source3/nmbd/nmbd.c b/source3/nmbd/nmbd.c
> index 85c3d7177f3..fa2bfbb287a 100644
> --- a/source3/nmbd/nmbd.c
> +++ b/source3/nmbd/nmbd.c
> @@ -70,8 +70,6 @@ static void terminate(struct messaging_context *msg)
> /* If there was an async dns child - kill it. */
> kill_async_dns_child();
>
> - gencache_stabilize();
> -
> pidfile_unlink(lp_pid_directory(), "nmbd");
>
> exit(0);
> diff --git a/source3/smbd/server_exit.c b/source3/smbd/server_exit.c
> index 2a34f067a24..2378c0c15ca 100644
> --- a/source3/smbd/server_exit.c
> +++ b/source3/smbd/server_exit.c
> @@ -237,7 +237,6 @@ static void exit_server_common(enum server_exit_reason how,
> if (am_parent) {
> pidfile_unlink(lp_pid_directory(), "smbd");
> }
> - gencache_stabilize();
> }
>
> exit(0);
> diff --git a/source3/torture/test_namemap_cache.c b/source3/torture/test_namemap_cache.c
> index 4b0ead1a25b..b30fe8743d4 100644
> --- a/source3/torture/test_namemap_cache.c
> +++ b/source3/torture/test_namemap_cache.c
> @@ -266,7 +266,5 @@ bool run_local_namemap_cache1(int dummy)
> return false;
> }
>
> - gencache_stabilize();
> -
> return true;
> }
> diff --git a/source3/utils/net.c b/source3/utils/net.c
> index 1138d1984e6..e0776c8d82c 100644
> --- a/source3/utils/net.c
> +++ b/source3/utils/net.c
> @@ -1107,8 +1107,6 @@ static struct functable net_func[] = {
>
> DEBUG(2,("return code = %d\n", rc));
>
> - gencache_stabilize();
> -
> libnetapi_free(c->netapi_ctx);
>
> poptFreeContext(pc);
> diff --git a/source3/utils/net_cache.c b/source3/utils/net_cache.c
> index 2d6989918f5..baa69f6554c 100644
> --- a/source3/utils/net_cache.c
> +++ b/source3/utils/net_cache.c
> @@ -347,24 +347,6 @@ static int net_cache_flush(struct net_context *c, int argc, const char **argv)
> return 0;
> }
>
> -static int net_cache_stabilize(struct net_context *c, int argc,
> - const char **argv)
> -{
> - if (c->display_usage) {
> - d_printf( "%s\n"
> - "net cache stabilize\n"
> - " %s\n",
> - _("Usage:"),
> - _("Move transient cache content to stable storage"));
> - return 0;
> - }
> -
> - if (!gencache_stabilize()) {
> - return -1;
> - }
> - return 0;
> -}
> -
> static int netsamlog_cache_for_all_cb(const char *sid_str,
> time_t when_cached,
> struct netr_SamInfo3 *info3,
> @@ -655,14 +637,6 @@ int net_cache(struct net_context *c, int argc, const char **argv)
> " Delete all cache entries")
> },
> {
> - "stabilize",
> - net_cache_stabilize,
> - NET_TRANSPORT_LOCAL,
> - N_("Move transient cache content to stable storage"),
> - N_("net cache stabilize\n"
> - " Move transient cache content to stable storage")
> - },
> - {
> "samlogon",
> net_cache_samlogon,
> NET_TRANSPORT_LOCAL,
> diff --git a/source3/winbindd/winbindd.c b/source3/winbindd/winbindd.c
> index 237841881ac..07a81329358 100644
> --- a/source3/winbindd/winbindd.c
> +++ b/source3/winbindd/winbindd.c
> @@ -208,8 +208,6 @@ static void terminate(bool is_parent)
>
> idmap_close();
>
> - gencache_stabilize();
> -
> netlogon_creds_cli_close_global_db();
>
> #if 0
> --
> 2.11.0
>
>
> From 04680f43c4fedb002e3ea45129e3a6fc97370106 Mon Sep 17 00:00:00 2001
> From: Volker Lendecke <vl at samba.org>
> Date: Fri, 2 Nov 2018 16:58:53 +0100
> Subject: [PATCH 8/9] gencache: Wipe corrupt databases
>
> Signed-off-by: Volker Lendecke <vl at samba.org>
> ---
> source3/lib/gencache.c | 66 ++++++++++++++++++++++++++++++++++++++++----------
> 1 file changed, 53 insertions(+), 13 deletions(-)
>
> diff --git a/source3/lib/gencache.c b/source3/lib/gencache.c
> index 1d8349c5702..124715ed10d 100644
> --- a/source3/lib/gencache.c
> +++ b/source3/lib/gencache.c
> @@ -144,7 +144,17 @@ bool gencache_set_data_blob(const char *keystr, DATA_BLOB blob,
>
> ret = tdb_storev(cache->tdb, key, dbufs, ARRAY_SIZE(dbufs), 0);
>
> - return ret == 0;
> + if (ret == 0) {
> + return true;
> + }
> + if (tdb_error(cache->tdb) != TDB_ERR_CORRUPT) {
> + return false;
> + }
> +
> + ret = tdb_wipe_all(cache->tdb);
> + SMB_ASSERT(ret == 0);
> +
> + return false;
> }
>
> /**
> @@ -173,7 +183,17 @@ bool gencache_del(const char *keystr)
>
> ret = tdb_delete(cache->tdb, key);
>
> - return (ret != -1);
> + if (ret == 0) {
> + return true;
> + }
> + if (tdb_error(cache->tdb) != TDB_ERR_CORRUPT) {
> + return false;
> + }
> +
> + ret = tdb_wipe_all(cache->tdb);
> + SMB_ASSERT(ret == 0);
> +
> + return true; /* We've deleted a bit more... */
> }
>
> static bool gencache_pull_timeout(TDB_DATA key,
> @@ -218,20 +238,21 @@ struct gencache_parse_state {
> DATA_BLOB blob,
> void *private_data);
> void *private_data;
> + bool format_error;
> };
>
> static int gencache_parse_fn(TDB_DATA key, TDB_DATA data, void *private_data)
> {
> - struct gencache_parse_state *state;
> + struct gencache_parse_state *state = private_data;
> struct gencache_timeout t;
> DATA_BLOB payload;
> bool ret;
>
> ret = gencache_pull_timeout(key, data, &t.timeout, &payload);
> if (!ret) {
> - return -1;
> + state->format_error = true;
> + return 0;
> }
> - state = (struct gencache_parse_state *)private_data;
> state->parser(&t, payload, state->private_data);
>
> return 0;
> @@ -243,7 +264,9 @@ bool gencache_parse(const char *keystr,
> void *private_data),
> void *private_data)
> {
> - struct gencache_parse_state state;
> + struct gencache_parse_state state = {
> + .parser = parser, .private_data = private_data
> + };
> TDB_DATA key = string_term_tdb_data(keystr);
> int ret;
>
> @@ -254,16 +277,27 @@ bool gencache_parse(const char *keystr,
> return false;
> }
>
> - state.parser = parser;
> - state.private_data = private_data;
> -
> ret = tdb_parse_record(cache->tdb, key,
> gencache_parse_fn, &state);
> - if (ret == 0) {
> - return true;
> + if ((ret == -1) && (tdb_error(cache->tdb) == TDB_ERR_CORRUPT)) {
> + goto wipe;
> + }
> + if (ret == -1) {
> + return false;
> + }
> + if (state.format_error) {
> + ret = tdb_delete(cache->tdb, key);
> + if (ret == -1) {
> + goto wipe;
> + }
> + return false;
> }
> + return true;
>
> - return (ret == 0);
> +wipe:
> + ret = tdb_wipe_all(cache->tdb);
> + SMB_ASSERT(ret == 0);
> + return false;
> }
>
> struct gencache_get_data_blob_state {
> @@ -475,6 +509,7 @@ void gencache_iterate_blobs(void (*fn)(const char *key, DATA_BLOB value,
> void *private_data, const char *pattern)
> {
> struct gencache_iterate_blobs_state state;
> + int ret;
>
> if ((fn == NULL) || (pattern == NULL) || !gencache_init()) {
> return;
> @@ -486,7 +521,12 @@ void gencache_iterate_blobs(void (*fn)(const char *key, DATA_BLOB value,
> state.pattern = pattern;
> state.private_data = private_data;
>
> - tdb_traverse(cache->tdb, gencache_iterate_blobs_fn, &state);
> + ret = tdb_traverse(cache->tdb, gencache_iterate_blobs_fn, &state);
> +
> + if ((ret == -1) && (tdb_error(cache->tdb) == TDB_ERR_CORRUPT)) {
> + ret = tdb_wipe_all(cache->tdb);
> + SMB_ASSERT(ret == 0);
> + }
> }
>
> /**
> --
> 2.11.0
>
>
> From 071dc1dbb44bb1e3097d8e0566f8f955334e4ba4 Mon Sep 17 00:00:00 2001
> From: Volker Lendecke <vl at samba.org>
> Date: Wed, 24 Oct 2018 10:51:40 +0200
> Subject: [PATCH 9/9] gencache: Prune expired entries
>
> This solves the problem that gencache never shrinks right
> now. Whenever we write an entry, we now walk that entry's chain and
> delete expired entries. This should be a good balance between
> performance and cleanup actions: Reading is still unaffected, and
> those who write pay a small penalty while keeping gencache size under
> control.
>
> Signed-off-by: Volker Lendecke <vl at samba.org>
> ---
> source3/lib/gencache.c | 88 ++++++++++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 88 insertions(+)
>
> diff --git a/source3/lib/gencache.c b/source3/lib/gencache.c
> index 124715ed10d..d7c3ad32921 100644
> --- a/source3/lib/gencache.c
> +++ b/source3/lib/gencache.c
> @@ -28,6 +28,7 @@
> #include "util_tdb.h"
> #include "tdb_wrap/tdb_wrap.h"
> #include "zlib.h"
> +#include "lib/util/strv.h"
>
> #undef DBGC_CLASS
> #define DBGC_CLASS DBGC_TDB
> @@ -41,6 +42,11 @@ static struct tdb_wrap *cache;
> *
> **/
>
> +static bool gencache_pull_timeout(TDB_DATA key,
> + TDB_DATA data,
> + time_t *pres,
> + DATA_BLOB *payload);
> +
> struct gencache_timeout {
> time_t timeout;
> };
> @@ -94,6 +100,77 @@ static bool gencache_init(void)
> return true;
> }
>
> +/*
> + * Walk the hash chain for "key", deleting all expired entries for
> + * that hash chain
> + */
> +struct gencache_prune_expired_state {
> + TALLOC_CTX *mem_ctx;
> + char *keys;
> +};
> +
> +static int gencache_prune_expired_fn(struct tdb_context *tdb,
> + TDB_DATA key,
> + TDB_DATA data,
> + void *private_data)
> +{
> + struct gencache_prune_expired_state *state = private_data;
> + struct gencache_timeout t;
> + bool ok = false;
> + bool expired = false;
> +
> + if ((key.dsize == 0) || (key.dptr[key.dsize-1] != '\0')) {
> + /* not a valid record, should never happen */
> + return 0;
> + }
> +
> + ok = gencache_pull_timeout(key, data, &t.timeout, NULL);
> + if (ok) {
> + expired = gencache_timeout_expired(&t);
> + }
> +
> + if (!ok || expired) {
> + /*
> + * Ignore failure, this is "just" background cleanup
> + */
> + strv_add(state->mem_ctx, &state->keys, (char *)key.dptr);
> + }
> +
> + return 0;
> +}
> +
> +static void gencache_prune_expired(struct tdb_context *tdb,
> + TDB_DATA chain_key)
> +{
> + struct gencache_prune_expired_state state = {
> + .mem_ctx = talloc_tos(),
> + };
> + char *keystr = NULL;
> + int ret;
> +
> + ret = tdb_traverse_key_chain(
> + tdb, chain_key, gencache_prune_expired_fn, &state);
> + if (ret == -1) {
> + DBG_DEBUG("tdb_traverse_key_chain failed: %s\n",
> + tdb_errorstr(tdb));
> + return;
> + }
> +
> + while ((keystr = strv_next(state.keys, keystr)) != NULL) {
> + TDB_DATA key = string_term_tdb_data(keystr);
> +
> + /*
> + * We expect the hash chain of "chain_key" to be
> + * locked. So between gencache_prune_expired_fn
> + * figuring out "keystr" is expired and the
> + * tdb_delete, nobody can have reset the timeout.
> + */
> + tdb_delete(tdb, key);
> + }
> +
> + TALLOC_FREE(state.keys);
> +}
> +
> /**
> * Set an entry in the cache file. If there's no such
> * one, then add it.
> @@ -142,8 +219,19 @@ bool gencache_set_data_blob(const char *keystr, DATA_BLOB blob,
> (int)(timeout - time(NULL)),
> timeout > time(NULL) ? "ahead" : "in the past"));
>
> + ret = tdb_chainlock(cache->tdb, key);
> + if (ret == -1) {
> + DBG_WARNING("tdb_chainlock for key [%s] failed: %s\n",
> + keystr, tdb_errorstr(cache->tdb));
> + return false;
> + }
> +
> + gencache_prune_expired(cache->tdb, key);
> +
> ret = tdb_storev(cache->tdb, key, dbufs, ARRAY_SIZE(dbufs), 0);
>
> + tdb_chainunlock(cache->tdb, key);
> +
> if (ret == 0) {
> return true;
> }
> --
> 2.11.0
>
More information about the samba-technical
mailing list