[PATCH] Discard unusable hash table entries

Michael Chapman mike at very.puzzling.org
Sun Jul 28 21:15:23 MDT 2013


During an in-place update, a block can not be matched if its offset is
prior to the sender offset, if SUMFLG_SAME_OFFSET is not set. As the
sender offset is only ever incremented, and SUMFLG_SAME_OFFSET is never
cleared on a block, a block that fails this test will never pass it in
the future. It can be removed from the hash table chain.

For hash tables with extremely long chains (i.e. where the receiver has
multiple blocks with the same checksum), this can be a significant
performance win.
---
 match.c | 25 ++++++++++++++++---------
 1 file changed, 16 insertions(+), 9 deletions(-)

diff --git a/match.c b/match.c
index bafab9f..987836e 100644
--- a/match.c
+++ b/match.c
@@ -178,7 +178,8 @@ static void hash_search(int f,struct sum_struct *s,
 
 	do {
 		int done_csum2 = 0;
-		int32 i;
+		uint32 hash_entry;
+		int32 i, *prev;
 
 		if (DEBUG_GTE(DELTASUM, 4)) {
 			rprintf(FINFO, "offset=%s sum=%04x%04x\n",
@@ -186,19 +187,31 @@ static void hash_search(int f,struct sum_struct *s,
 		}
 
 		if (tablesize == TRADITIONAL_TABLESIZE) {
-			if ((i = hash_table[SUM2HASH2(s1,s2)]) < 0)
+			hash_entry = SUM2HASH2(s1,s2);
+			if ((i = hash_table[hash_entry]) < 0)
 				goto null_hash;
 			sum = (s1 & 0xffff) | (s2 << 16);
 		} else {
 			sum = (s1 & 0xffff) | (s2 << 16);
-			if ((i = hash_table[BIG_SUM2HASH(sum)]) < 0)
+			hash_entry = BIG_SUM2HASH(sum);
+			if ((i = hash_table[hash_entry]) < 0)
 				goto null_hash;
 		}
+		prev = &hash_table[hash_entry];
 
 		hash_hits++;
 		do {
 			int32 l;
 
+			/* in-place: ensure chunk's offset is either >= our
+			 * offset or that the data didn't move. */
+			if (updating_basis_file && s->sums[i].offset < offset
+			    && !(s->sums[i].flags & SUMFLG_SAME_OFFSET)) {
+				*prev = s->sums[i].chain;
+				continue;
+			}
+			prev = &s->sums[i].chain;
+
 			if (sum != s->sums[i].sum1)
 				continue;
 
@@ -207,12 +220,6 @@ static void hash_search(int f,struct sum_struct *s,
 			if (l != s->sums[i].len)
 				continue;
 
-			/* in-place: ensure chunk's offset is either >= our
-			 * offset or that the data didn't move. */
-			if (updating_basis_file && s->sums[i].offset < offset
-			    && !(s->sums[i].flags & SUMFLG_SAME_OFFSET))
-				continue;
-
 			if (DEBUG_GTE(DELTASUM, 3)) {
 				rprintf(FINFO,
 					"potential match at %s i=%ld sum=%08x\n",
-- 
1.8.3.1



More information about the rsync mailing list