Tune gencache a bit

Tue Nov 27 06:18:24 MST 2012

On Fri, Nov 23, 2012 at 03:26:20PM +0100, Volker Lendecke wrote:
> Hi!
> 
> Recently I came across a workload where we were writing the
> same value into gencache over and over again. This led to a
> significant slowdown due to the frequent transaction_commit
> calls doing msync and fsync syscalls.
> 
> The attached patchset reduces this workload. We do not write
> into the cache if we write the same value, extending the
> remaining cache time by less than 10%.
> 
> Comments?

Attached find a new patchset with some comments of Christian
Ambach taken care of.

Thanks,

Volker

-- 
SerNet GmbH, Bahnhofsallee 1b, 37081 Göttingen
phone: +49-551-370000-0, fax: +49-551-370000-9
AG Göttingen, HRB 2816, GF: Dr. Johannes Loxen
http://www.sernet.de, mailto:kontakt at sernet.de
-------------- next part --------------
From b0566c8b58bf9c91a6edf25d1b62ad08781552de Mon Sep 17 00:00:00 2001
From: Volker Lendecke <vl at samba.org>
Date: Tue, 20 Nov 2012 09:50:57 +0100
Subject: [PATCH 1/2] s3: Avoid some transaction_commit on gencache.tdb

Commits are expensive, and in some scenarios we are overwriting existing values
again and again.
---
 source3/lib/gencache.c |  110 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 110 insertions(+), 0 deletions(-)

diff --git a/source3/lib/gencache.c b/source3/lib/gencache.c
index 95b4811..a64677d 100644
--- a/source3/lib/gencache.c
+++ b/source3/lib/gencache.c
@@ -127,6 +127,110 @@ static TDB_DATA last_stabilize_key(void)
 	return result;
 }
 
+struct gencache_have_val_state {
+	time_t new_timeout;
+	const DATA_BLOB *data;
+	bool gotit;
+};
+
+static void gencache_have_val_parser(time_t old_timeout, DATA_BLOB data,
+				     void *private_data)
+{
+	struct gencache_have_val_state *state =
+		(struct gencache_have_val_state *)private_data;
+	time_t now = time(NULL);
+	int cache_time_left, new_time_left, additional_time;
+
+	/*
+	 * Excuse the many variables, but these time calculations are
+	 * confusing to me. We do not want to write to gencache with a
+	 * possibly expensive transaction if we are about to write the same
+	 * value, just extending the remaining timeout by less than 10%.
+	 */
+
+	cache_time_left = old_timeout - now;
+	if (cache_time_left <= 0) {
+		/*
+		 * timed out, write new value
+		 */
+		return;
+	}
+
+	new_time_left = state->new_timeout - now;
+	if (new_time_left <= 0) {
+		/*
+		 * Huh -- no new timeout?? Write it.
+		 */
+		return;
+	}
+
+	if (new_time_left < cache_time_left) {
+		/*
+		 * Someone wants to shorten the timeout. Let it happen.
+		 */
+		return;
+	}
+
+	/*
+	 * By how much does the new timeout extend the remaining cache time?
+	 */
+	additional_time = new_time_left - cache_time_left;
+
+	if (additional_time * 10 < 0) {
+		/*
+		 * Integer overflow. We extend by so much that we have to write it.
+		 */
+		return;
+	}
+
+	/*
+	 * The comparison below is essentially equivalent to
+	 *
+	 *    new_time_left > cache_time_left * 1.10
+	 *
+	 * but without floating point calculations.
+	 */
+
+	if (additional_time * 10 > cache_time_left) {
+		/*
+		 * We extend the cache timeout by more than 10%. Do it.
+		 */
+		return;
+	}
+
+	/*
+	 * Now the more expensive data compare.
+	 */
+	if (data_blob_cmp(state->data, &data) != 0) {
+		/*
+		 * Write a new value. Certainly do it.
+		 */
+		return;
+	}
+
+	/*
+	 * Extending the timeout by less than 10% for the same cache value is
+	 * not worth the trouble writing a value into gencache under a
+	 * possibly expensive transaction.
+	 */
+	state->gotit = true;
+}
+
+static bool gencache_have_val(const char *keystr, const DATA_BLOB *data,
+			      time_t timeout)
+{
+	struct gencache_have_val_state state;
+
+	state.new_timeout = timeout;
+	state.data = data;
+	state.gotit = false;
+
+	if (!gencache_parse(keystr, gencache_have_val_parser, &state)) {
+		return false;
+	}
+	return state.gotit;
+}
+
 /**
  * Set an entry in the cache file. If there's no such
  * one, then add it.
@@ -160,6 +264,12 @@ bool gencache_set_data_blob(const char *keystr, const DATA_BLOB *blob,
 
 	if (!gencache_init()) return False;
 
+	if (gencache_have_val(keystr, blob, timeout)) {
+		DEBUG(10, ("Did not store value for %s, we already got it\n",
+			   keystr));
+		return true;
+	}
+
 	val = talloc_asprintf(talloc_tos(), CACHE_DATA_FMT, (int)timeout);
 	if (val == NULL) {
 		return False;
-- 
1.7.3.4


From eb6c8433ed85bb815c5b913e0b58503b0cdac0cb Mon Sep 17 00:00:00 2001
From: Volker Lendecke <vl at samba.org>
Date: Tue, 20 Nov 2012 10:02:07 +0100
Subject: [PATCH 2/2] s3: Open gencache_notrans with TDB_NOSYNC

We are doing CLEAR_IF_FIRST here, and we are doing the transactions only to
protect gencache_stabilize against concurrent writers. tdb's transaction.c
says:

  - if TDB_NOSYNC is passed to flags in tdb_open then transactions are
    still available, but no fsync/msync calls are made.  This means we
    are still proof against a process dying during transaction commit,
    but not against machine reboot.

For gencache_notrans.tdb this is exactly what we want and avoids some expensive
disk syncs.
---
 source3/lib/gencache.c |    5 ++++-
 1 files changed, 4 insertions(+), 1 deletions(-)

diff --git a/source3/lib/gencache.c b/source3/lib/gencache.c
index a64677d..0b24c0c 100644
--- a/source3/lib/gencache.c
+++ b/source3/lib/gencache.c
@@ -106,7 +106,10 @@ again:
 
 	DEBUG(5, ("Opening cache file at %s\n", cache_fname));
 
-	cache_notrans = tdb_open_log(cache_fname, 0, TDB_CLEAR_IF_FIRST|TDB_INCOMPATIBLE_HASH,
+	cache_notrans = tdb_open_log(cache_fname, 0,
+				     TDB_CLEAR_IF_FIRST|
+				     TDB_INCOMPATIBLE_HASH|
+				     TDB_NOSYNC,
 				     open_flags, 0644);
 	if (cache_notrans == NULL) {
 		DEBUG(5, ("Opening %s failed: %s\n", cache_fname,
-- 
1.7.3.4