[SCM] The rsync repository. - branch master updated

Rsync CVS commit messages rsync-cvs at lists.samba.org
Sat Aug 3 11:02:17 MDT 2013


The branch, master has been updated
       via  de94193 Remove bypassed checksums in --inplace to improve speed.
      from  05fce65 Preparing for release of 3.1.0pre1

;a=shortlog;h=master


- Log -----------------------------------------------------------------
commit de94193353864221280be9fbb6193d92eb133000
Author: Wayne Davison <wayned at samba.org>
Date:   Sat Aug 3 09:44:13 2013 -0700

    Remove bypassed checksums in --inplace to improve speed.
    
    When checking a checksum that refers to a part of an --inplace file that
    has been overwritten w/o getting SUMFLG_SAME_OFFSET set, we remove the
    checksum from the list.  This will speed up files that have a lot of
    identical checksum blocks (e.g. sequences of zeros) that we can't use
    due to them not getting marked as being the same.  Patch provided by
    Michael Chapman.

-----------------------------------------------------------------------

Summary of changes:
 NEWS    |    3 +++
 match.c |   26 +++++++++++++++++---------
 2 files changed, 20 insertions(+), 9 deletions(-)


Changeset truncated at 500 lines:

diff --git a/NEWS b/NEWS
index 040ac2d..eec631d 100644
--- a/NEWS
+++ b/NEWS
@@ -154,6 +154,9 @@ Changes since 3.0.9:
       file for one way to package the resulting files.  (Suggestions for
       how to make this even easier to install & use are welcomed.)
 
+    - Improved the speed of some --inplace updates when there are lots of
+      identical checksum blocks that end up being unsuable.
+
     - Added the --outbuf=N|L|B option for chosing the output buffering.
 
     - Repating the --fuzzy option now causes the code to look for fuzzy matches
diff --git a/match.c b/match.c
index bafab9f..a8bd1f3 100644
--- a/match.c
+++ b/match.c
@@ -178,7 +178,8 @@ static void hash_search(int f,struct sum_struct *s,
 
 	do {
 		int done_csum2 = 0;
-		int32 i;
+		uint32 hash_entry;
+		int32 i, *prev;
 
 		if (DEBUG_GTE(DELTASUM, 4)) {
 			rprintf(FINFO, "offset=%s sum=%04x%04x\n",
@@ -186,19 +187,32 @@ static void hash_search(int f,struct sum_struct *s,
 		}
 
 		if (tablesize == TRADITIONAL_TABLESIZE) {
-			if ((i = hash_table[SUM2HASH2(s1,s2)]) < 0)
+			hash_entry = SUM2HASH2(s1,s2);
+			if ((i = hash_table[hash_entry]) < 0)
 				goto null_hash;
 			sum = (s1 & 0xffff) | (s2 << 16);
 		} else {
 			sum = (s1 & 0xffff) | (s2 << 16);
-			if ((i = hash_table[BIG_SUM2HASH(sum)]) < 0)
+			hash_entry = BIG_SUM2HASH(sum);
+			if ((i = hash_table[hash_entry]) < 0)
 				goto null_hash;
 		}
+		prev = &hash_table[hash_entry];
 
 		hash_hits++;
 		do {
 			int32 l;
 
+			/* When updating in-place, the chunk's offset must be
+			 * either >= our offset or identical data at that offset.
+			 * Remove any bypassed entries that we can never use. */
+			if (updating_basis_file && s->sums[i].offset < offset
+			    && !(s->sums[i].flags & SUMFLG_SAME_OFFSET)) {
+				*prev = s->sums[i].chain;
+				continue;
+			}
+			prev = &s->sums[i].chain;
+
 			if (sum != s->sums[i].sum1)
 				continue;
 
@@ -207,12 +221,6 @@ static void hash_search(int f,struct sum_struct *s,
 			if (l != s->sums[i].len)
 				continue;
 
-			/* in-place: ensure chunk's offset is either >= our
-			 * offset or that the data didn't move. */
-			if (updating_basis_file && s->sums[i].offset < offset
-			    && !(s->sums[i].flags & SUMFLG_SAME_OFFSET))
-				continue;
-
 			if (DEBUG_GTE(DELTASUM, 3)) {
 				rprintf(FINFO,
 					"potential match at %s i=%ld sum=%08x\n",


-- 
The rsync repository.


More information about the rsync-cvs mailing list