[PATCH] speed up tdbbackup

Volker Lendecke Volker.Lendecke at SerNet.DE
Wed Jul 22 10:07:55 UTC 2015


Hi!

Attached find patches that speed up tdb measurably. I don't
get really reliable results with "time", but I've seen up to
25% improvement on a 10-million small record tdb file on
/dev/shm/. If I run a 1-million record tdbbackup, callgrind
goes down from 6,916,115,142 to 4,888,697,132 instructions
run. This pretty repeatable.

Now guess what patch has the larger effect... It's the
tdb_oob one. I can only guess why this is the case. Probably
pointer chasing is really expensive and the CPU caches are
better utilized if we can call a common function.

Review&push appreciated!

Thanks,

Volker

-- 
SerNet GmbH, Bahnhofsallee 1b, 37081 Göttingen
phone: +49-551-370000-0, fax: +49-551-370000-9
AG Göttingen, HRB 2816, GF: Dr. Johannes Loxen
http://www.sernet.de, mailto:kontakt at sernet.de
-------------- next part --------------
From f9a8d2e1d29b872b57aa918824a4a28d32b5e59a Mon Sep 17 00:00:00 2001
From: Volker Lendecke <vl at samba.org>
Date: Tue, 21 Jul 2015 13:30:35 +0200
Subject: [PATCH 1/3] tdb: rename tdb_oob->tdb_notrans_oob

Signed-off-by: Volker Lendecke <vl at samba.org>
---
 lib/tdb/common/io.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/lib/tdb/common/io.c b/lib/tdb/common/io.c
index fe47d18..9d87dc1 100644
--- a/lib/tdb/common/io.c
+++ b/lib/tdb/common/io.c
@@ -97,8 +97,8 @@ static int tdb_fstat(struct tdb_context *tdb, struct stat *buf)
    see if the database has been expanded by someone else and expand
    if necessary
 */
-static int tdb_oob(struct tdb_context *tdb, tdb_off_t off, tdb_len_t len,
-		   int probe)
+static int tdb_notrans_oob(struct tdb_context *tdb, tdb_off_t off,
+			   tdb_len_t len, int probe)
 {
 	struct stat st;
 	if (len + off < len) {
@@ -663,7 +663,7 @@ static const struct tdb_methods io_methods = {
 	tdb_read,
 	tdb_write,
 	tdb_next_hash_chain,
-	tdb_oob,
+	tdb_notrans_oob,
 	tdb_expand_file,
 };
 
-- 
1.9.1


From ac301783a3a3bc56ce5ca7577a01ebca86eec32a Mon Sep 17 00:00:00 2001
From: Volker Lendecke <vl at samba.org>
Date: Wed, 22 Jul 2015 11:19:08 +0200
Subject: [PATCH 2/3] tdb: Don't malloc for every record in traverse

This gains a few percent in tdbbackup

Signed-off-by: Volker Lendecke <vl at samba.org>
---
 lib/tdb/common/traverse.c | 45 +++++++++++++++++++++++++++++++++++++++------
 1 file changed, 39 insertions(+), 6 deletions(-)

diff --git a/lib/tdb/common/traverse.c b/lib/tdb/common/traverse.c
index e18e3c3..fcb632d 100644
--- a/lib/tdb/common/traverse.c
+++ b/lib/tdb/common/traverse.c
@@ -148,6 +148,13 @@ static int tdb_traverse_internal(struct tdb_context *tdb,
 	struct tdb_record rec;
 	int ret = 0, count = 0;
 	tdb_off_t off;
+	size_t recbuf_len;
+
+	recbuf_len = 4096;
+	key.dptr = malloc(recbuf_len);
+	if (key.dptr == NULL) {
+		return -1;
+	}
 
 	/* This was in the initialization, above, but the IRIX compiler
 	 * did not like it.  crh
@@ -159,15 +166,43 @@ static int tdb_traverse_internal(struct tdb_context *tdb,
 
 	/* tdb_next_lock places locks on the record returned, and its chain */
 	while ((off = tdb_next_lock(tdb, tl, &rec)) != 0) {
+		tdb_len_t full_len = rec.key_len + rec.data_len;
+
+		if (full_len > recbuf_len) {
+			recbuf_len = full_len;
+
+			/*
+			 * No realloc, we don't need the old data and thus can
+			 * do without the memcpy
+			 */
+			free(key.dptr);
+			key.dptr = malloc(recbuf_len);
+
+			if (key.dptr == NULL) {
+				ret = -1;
+				if (tdb_unlock(tdb, tl->hash, tl->lock_rw)
+				    != 0) {
+					goto out;
+				}
+				if (tdb_unlock_record(tdb, tl->off) != 0) {
+					TDB_LOG((tdb, TDB_DEBUG_FATAL,
+						 "tdb_traverse: malloc "
+						 "failed and unlock_record "
+						 "failed!\n"));
+				}
+				goto out;
+			}
+		}
+
 		if (off == TDB_NEXT_LOCK_ERR) {
 			ret = -1;
 			goto out;
 		}
 		count++;
 		/* now read the full record */
-		key.dptr = tdb_alloc_read(tdb, tl->off + sizeof(rec),
-					  rec.key_len + rec.data_len);
-		if (!key.dptr) {
+		if (tdb->methods->tdb_read(
+			    tdb, tl->off + sizeof(rec), key.dptr, full_len,
+			    DOCONV()) == -1) {
 			ret = -1;
 			if (tdb_unlock(tdb, tl->hash, tl->lock_rw) != 0)
 				goto out;
@@ -184,7 +219,6 @@ static int tdb_traverse_internal(struct tdb_context *tdb,
 		/* Drop chain lock, call out */
 		if (tdb_unlock(tdb, tl->hash, tl->lock_rw) != 0) {
 			ret = -1;
-			SAFE_FREE(key.dptr);
 			goto out;
 		}
 		if (fn && fn(tdb, key, dbuf, private_data)) {
@@ -194,13 +228,12 @@ static int tdb_traverse_internal(struct tdb_context *tdb,
 				TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_traverse: unlock_record failed!\n"));;
 				ret = -1;
 			}
-			SAFE_FREE(key.dptr);
 			goto out;
 		}
-		SAFE_FREE(key.dptr);
 	}
 	tdb_trace(tdb, "tdb_traverse_end");
 out:
+	SAFE_FREE(key.dptr);
 	tdb->travlocks.next = tl->next;
 	if (ret < 0)
 		return -1;
-- 
1.9.1


From 490fb35f690e138390178d6d7116adb49f49d313 Mon Sep 17 00:00:00 2001
From: Volker Lendecke <vl at samba.org>
Date: Tue, 21 Jul 2015 13:58:40 +0200
Subject: [PATCH 3/3] tdb: Factor out basic checks to tdb_oob

Speeds up tdbbackup measurably

Signed-off-by: Volker Lendecke <vl at samba.org>
---
 lib/tdb/common/check.c       |  6 +++---
 lib/tdb/common/freelist.c    |  2 +-
 lib/tdb/common/io.c          | 20 +++++++++++++++-----
 lib/tdb/common/open.c        |  4 ++--
 lib/tdb/common/rescue.c      |  4 ++--
 lib/tdb/common/tdb_private.h |  1 +
 lib/tdb/common/transaction.c |  2 +-
 7 files changed, 25 insertions(+), 14 deletions(-)

diff --git a/lib/tdb/common/check.c b/lib/tdb/common/check.c
index e632af5..94b3778 100644
--- a/lib/tdb/common/check.c
+++ b/lib/tdb/common/check.c
@@ -94,7 +94,7 @@ static bool tdb_check_record(struct tdb_context *tdb,
 			 off, rec->next));
 		goto corrupt;
 	}
-	if (tdb->methods->tdb_oob(tdb, rec->next, sizeof(*rec), 0))
+	if (tdb_oob(tdb, rec->next, sizeof(*rec), 0))
 		goto corrupt;
 
 	/* Check rec_len: similar to rec->next, implies next record. */
@@ -112,7 +112,7 @@ static bool tdb_check_record(struct tdb_context *tdb,
 		goto corrupt;
 	}
 	/* OOB allows "right at the end" access, so this works for last rec. */
-	if (tdb->methods->tdb_oob(tdb, off, sizeof(*rec)+rec->rec_len, 0))
+	if (tdb_oob(tdb, off, sizeof(*rec)+rec->rec_len, 0))
 		goto corrupt;
 
 	/* Check tailer. */
@@ -347,7 +347,7 @@ _PUBLIC_ int tdb_check(struct tdb_context *tdb,
 	}
 
 	/* Make sure we know true size of the underlying file. */
-	tdb->methods->tdb_oob(tdb, tdb->map_size, 1, 1);
+	tdb_oob(tdb, tdb->map_size, 1, 1);
 
 	/* Header must be OK: also gets us the recovery ptr, if any. */
 	if (!tdb_check_header(tdb, &recovery_start))
diff --git a/lib/tdb/common/freelist.c b/lib/tdb/common/freelist.c
index 86fac2f..cb256de 100644
--- a/lib/tdb/common/freelist.c
+++ b/lib/tdb/common/freelist.c
@@ -56,7 +56,7 @@ int tdb_rec_free_read(struct tdb_context *tdb, tdb_off_t off, struct tdb_record
 			   rec->magic, off));
 		return -1;
 	}
-	if (tdb->methods->tdb_oob(tdb, rec->next, sizeof(*rec), 0) != 0)
+	if (tdb_oob(tdb, rec->next, sizeof(*rec), 0) != 0)
 		return -1;
 	return 0;
 }
diff --git a/lib/tdb/common/io.c b/lib/tdb/common/io.c
index 9d87dc1..4fba22b 100644
--- a/lib/tdb/common/io.c
+++ b/lib/tdb/common/io.c
@@ -93,6 +93,16 @@ static int tdb_fstat(struct tdb_context *tdb, struct stat *buf)
 	return ret;
 }
 
+int tdb_oob(struct tdb_context *tdb, tdb_off_t off, tdb_len_t len, int probe)
+{
+	tdb_off_t new_off = off+len;
+
+	if ((new_off >= len) && (new_off <= tdb->map_size)) {
+		return 0;
+	}
+	return tdb->methods->tdb_oob(tdb, off, len, probe);
+}
+
 /* check for an out of bounds access - if it is out of bounds then
    see if the database has been expanded by someone else and expand
    if necessary
@@ -177,7 +187,7 @@ static int tdb_write(struct tdb_context *tdb, tdb_off_t off,
 		return -1;
 	}
 
-	if (tdb->methods->tdb_oob(tdb, off, len, 0) != 0)
+	if (tdb_oob(tdb, off, len, 0) != 0)
 		return -1;
 
 	if (tdb->map_ptr) {
@@ -232,7 +242,7 @@ void *tdb_convert(void *buf, uint32_t size)
 static int tdb_read(struct tdb_context *tdb, tdb_off_t off, void *buf,
 		    tdb_len_t len, int cv)
 {
-	if (tdb->methods->tdb_oob(tdb, off, len, 0) != 0) {
+	if (tdb_oob(tdb, off, len, 0) != 0) {
 		return -1;
 	}
 
@@ -505,7 +515,7 @@ int tdb_expand(struct tdb_context *tdb, tdb_off_t size)
 	}
 
 	/* must know about any previous expansions by another process */
-	tdb->methods->tdb_oob(tdb, tdb->map_size, 1, 1);
+	tdb_oob(tdb, tdb->map_size, 1, 1);
 
 	/*
 	 * Note: that we don't care about tdb->hdr_ofs != 0 here
@@ -623,7 +633,7 @@ int tdb_parse_data(struct tdb_context *tdb, TDB_DATA key,
 		 * Optimize by avoiding the malloc/memcpy/free, point the
 		 * parser directly at the mmap area.
 		 */
-		if (tdb->methods->tdb_oob(tdb, offset, len, 0) != 0) {
+		if (tdb_oob(tdb, offset, len, 0) != 0) {
 			return -1;
 		}
 		data.dptr = offset + (unsigned char *)tdb->map_ptr;
@@ -650,7 +660,7 @@ int tdb_rec_read(struct tdb_context *tdb, tdb_off_t offset, struct tdb_record *r
 		TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_rec_read bad magic 0x%x at offset=%u\n", rec->magic, offset));
 		return -1;
 	}
-	return tdb->methods->tdb_oob(tdb, rec->next, sizeof(*rec), 0);
+	return tdb_oob(tdb, rec->next, sizeof(*rec), 0);
 }
 
 int tdb_rec_write(struct tdb_context *tdb, tdb_off_t offset, struct tdb_record *rec)
diff --git a/lib/tdb/common/open.c b/lib/tdb/common/open.c
index 3b53fa7..252f0d2 100644
--- a/lib/tdb/common/open.c
+++ b/lib/tdb/common/open.c
@@ -659,7 +659,7 @@ _PUBLIC_ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int td
 	 * As this skips tdb->hdr_ofs.
 	 */
 	tdb->map_size = 0;
-	ret = tdb->methods->tdb_oob(tdb, 0, 1, 0);
+	ret = tdb_oob(tdb, 0, 1, 0);
 	if (ret == -1) {
 		errno = EIO;
 		goto fail;
@@ -880,7 +880,7 @@ static int tdb_reopen_internal(struct tdb_context *tdb, bool active_lock)
 	 * As this skips tdb->hdr_ofs.
 	 */
 	tdb->map_size = 0;
-	if (tdb->methods->tdb_oob(tdb, 0, 1, 0) != 0) {
+	if (tdb_oob(tdb, 0, 1, 0) != 0) {
 		goto fail;
 	}
 #endif /* fake pread or pwrite */
diff --git a/lib/tdb/common/rescue.c b/lib/tdb/common/rescue.c
index 17e7ed8..7deab7f 100644
--- a/lib/tdb/common/rescue.c
+++ b/lib/tdb/common/rescue.c
@@ -60,7 +60,7 @@ static bool looks_like_valid_record(struct tdb_context *tdb,
 	if (rec->next > 0 && rec->next < TDB_DATA_START(tdb->hash_size))
 		return false;
 
-	if (tdb->methods->tdb_oob(tdb, rec->next, sizeof(*rec), 1))
+	if (tdb_oob(tdb, rec->next, sizeof(*rec), 1))
 		return false;
 
 	key->dsize = rec->key_len;
@@ -228,7 +228,7 @@ _PUBLIC_ int tdb_rescue(struct tdb_context *tdb,
 	}
 
 	/* Make sure we know true size of the underlying file. */
-	tdb->methods->tdb_oob(tdb, tdb->map_size, 1, 1);
+	tdb_oob(tdb, tdb->map_size, 1, 1);
 
 	/* Suppress logging, since we anticipate errors. */
 	tdb->log.log_fn = logging_suppressed;
diff --git a/lib/tdb/common/tdb_private.h b/lib/tdb/common/tdb_private.h
index de8d9e6..a22a950 100644
--- a/lib/tdb/common/tdb_private.h
+++ b/lib/tdb/common/tdb_private.h
@@ -323,5 +323,6 @@ int tdb_mutex_allrecord_lock(struct tdb_context *tdb, int ltype,
 int tdb_mutex_allrecord_unlock(struct tdb_context *tdb);
 int tdb_mutex_allrecord_upgrade(struct tdb_context *tdb);
 void tdb_mutex_allrecord_downgrade(struct tdb_context *tdb);
+int tdb_oob(struct tdb_context *tdb, tdb_off_t off, tdb_len_t len, int probe);
 
 #endif /* TDB_PRIVATE_H */
diff --git a/lib/tdb/common/transaction.c b/lib/tdb/common/transaction.c
index 0dd057b..800fee1 100644
--- a/lib/tdb/common/transaction.c
+++ b/lib/tdb/common/transaction.c
@@ -504,7 +504,7 @@ static int _tdb_transaction_start(struct tdb_context *tdb,
 
 	/* make sure we know about any file expansions already done by
 	   anyone else */
-	tdb->methods->tdb_oob(tdb, tdb->map_size, 1, 1);
+	tdb_oob(tdb, tdb->map_size, 1, 1);
 	tdb->transaction->old_map_size = tdb->map_size;
 
 	/* finally hook the io methods, replacing them with
-- 
1.9.1



More information about the samba-technical mailing list