reducing memmoves

Wayne Davison wayned at samba.org
Mon Aug 2 17:54:19 GMT 2004


On Sun, Aug 01, 2004 at 06:16:05PM -0400, Chris Shoemaker wrote:
> Attached is a patch that makes window strides constant when files are
> walked with a constant block size.  In these cases, it completely
> avoids all memmoves.

Seems like a good start to me.  Here's a patch I created that also makes
these changes:

    - The map_file() function now takes the window-size directly
      rather than the block-size.  This lets the the caller choose
      the value.

    - Figure out an appropriate window-size for the receiver,
      sender, generator, and the file_checksum() function to send to
      map_file().

    - Also removed the (offset > 2*CHUNK_SIZE) check in map_ptr().
      (Did you leave this in for a reason?)

    - The sender now calls map_ptr() with a range of memory that
      encompasses both the rolling-checksum data and the data at
      last_match that we may need to reread.

    - Defined MAX_BLOCK_SIZE as a separate value from MAX_MAP_SIZE.

    - Increased the size of MAX_MAP_SIZE.

I think this should improve several things.  Comments?

..wayne..
-------------- next part --------------
--- checksum.c	2 Aug 2004 16:49:20 -0000	1.33
+++ checksum.c	2 Aug 2004 17:48:02 -0000
@@ -102,7 +102,7 @@ void file_checksum(char *fname,char *sum
 	if (fd == -1)
 		return;
 
-	buf = map_file(fd, size, CSUM_CHUNK);
+	buf = map_file(fd, size, CSUM_CHUNK * 1024);
 
 	mdfour_begin(&m);
 
--- fileio.c	20 Jul 2004 21:35:52 -0000	1.15
+++ fileio.c	2 Aug 2004 17:48:05 -0000
@@ -143,7 +143,7 @@ int write_file(int f,char *buf,size_t le
    read(). It gives sliding window access to a file. mmap() is not
    used because of the possibility of another program (such as a
    mailer) truncating the file thus giving us a SIGBUS */
-struct map_struct *map_file(int fd, OFF_T len, size_t block_size)
+struct map_struct *map_file(int fd, OFF_T len, size_t window_size)
 {
 	struct map_struct *map;
 
@@ -153,7 +153,7 @@ struct map_struct *map_file(int fd, OFF_
 	memset(map, 0, sizeof map[0]);
 	map->fd = fd;
 	map->file_size = len;
-	map->def_window_size = MAX(MAX_MAP_SIZE, block_size * 32);
+	map->def_window_size = window_size;
 
 	return map;
 }
@@ -181,12 +181,7 @@ char *map_ptr(struct map_struct *map,OFF
 	}
 
 	/* nope, we are going to have to do a read. Work out our desired window */
-	if (offset > 2*CHUNK_SIZE) {
-		window_start = offset - 2*CHUNK_SIZE;
-		window_start &= ~((OFF_T)(CHUNK_SIZE-1)); /* assumes power of 2 */
-	} else {
-		window_start = 0;
-	}
+	window_start = offset;
 	window_size = map->def_window_size;
 	if (window_start + window_size > map->file_size) {
 		window_size = map->file_size - window_start;
--- generator.c	29 Jul 2004 16:45:48 -0000	1.107
+++ generator.c	2 Aug 2004 17:48:05 -0000
@@ -154,7 +154,7 @@ static void sum_sizes_sqroot(struct sum_
 			c >>= 1;
 		} while (c >= 8);	/* round to multiple of 8 */
 		blength = MAX(blength, BLOCK_SIZE);
-		blength = MIN(blength, MAX_MAP_SIZE);
+		blength = MIN(blength, MAX_BLOCK_SIZE);
 	}
 
 	if (protocol_version < 27) {
@@ -208,9 +208,13 @@ static void generate_and_send_sums(int f
 
 	sum_sizes_sqroot(&sum, len);
 
-	if (len > 0)
-		mapbuf = map_file(fd, len, sum.blength);
-	else
+	if (len > 0) {
+		OFF_T map_size = MAX_MAP_SIZE;
+		if (map_size % sum.blength)
+			map_size += sum.blength - (map_size % sum.blength);
+
+		mapbuf = map_file(fd, len, map_size);
+	} else
 		mapbuf = NULL;
 
 	write_sum_head(f_out, &sum);
--- match.c	16 Jul 2004 20:06:24 -0000	1.67
+++ match.c	2 Aug 2004 17:48:05 -0000
@@ -141,11 +141,12 @@ static void matched(int f,struct sum_str
 static void hash_search(int f,struct sum_struct *s,
 			struct map_struct *buf, OFF_T len)
 {
-	OFF_T offset, end;
+	OFF_T offset, end, backup;
 	unsigned int k;
 	size_t want_i;
 	char sum2[SUM_LENGTH];
 	uint32 s1, s2, sum;
+	int more;
 	schar *map;
 
 	/* want_i is used to encourage adjacent matches, allowing the RLL
@@ -271,14 +272,20 @@ static void hash_search(int f,struct sum
 		} while (++j < s->count && targets[j].t == t);
 
 	null_tag:
+		backup = offset - last_match;
+		/* We sometimes read 1 byte prior to last_match... */
+		if (backup < 0)
+			backup = 0;
+
 		/* Trim off the first byte from the checksum */
-		map = (schar *)map_ptr(buf, offset, k+1);
+		more = offset + k < len;
+		map = (schar *)map_ptr(buf, offset - backup, k + more) + backup;
 		s1 -= map[0] + CHAR_OFFSET;
 		s2 -= k * (map[0]+CHAR_OFFSET);
 
 		/* Add on the next byte (if there is one) to the checksum */
-		if (k < (len-offset)) {
-			s1 += (map[k]+CHAR_OFFSET);
+		if (more) {
+			s1 += map[k] + CHAR_OFFSET;
 			s2 += s1;
 		} else
 			--k;
@@ -289,9 +296,8 @@ static void hash_search(int f,struct sum
 		   match. The 3 reads are caused by the
 		   running match, the checksum update and the
 		   literal send. */
-		if (offset > last_match
-		 && offset-last_match >= CHUNK_SIZE+s->blength
-		 && end-offset > CHUNK_SIZE) {
+		if (backup >= CHUNK_SIZE + s->blength
+		    && end - offset > CHUNK_SIZE) {
 			matched(f,s,buf,offset - s->blength, -2);
 		}
 	} while (++offset < end);
--- options.c	2 Aug 2004 07:40:34 -0000	1.170
+++ options.c	2 Aug 2004 17:48:05 -0000
@@ -660,10 +660,10 @@ int parse_arguments(int *argc, const cha
 	}
 #endif
 
-	if (block_size > MAX_MAP_SIZE) {
+	if (block_size > MAX_BLOCK_SIZE) {
 		rprintf(FINFO, "limiting block-size to %d bytes\n",
-			MAX_MAP_SIZE);
-		block_size = MAX_MAP_SIZE;
+			MAX_BLOCK_SIZE);
+		block_size = MAX_BLOCK_SIZE;
 	}
 
 	if (write_batch && read_batch) {
--- receiver.c	2 Aug 2004 04:50:33 -0000	1.103
+++ receiver.c	2 Aug 2004 17:48:06 -0000
@@ -222,7 +222,13 @@ static int receive_data(int f_in, char *
 	read_sum_head(f_in, &sum);
 
 	if (fd_r >= 0 && size_r > 0) {
-		mapbuf = map_file(fd_r, size_r, sum.blength);
+		OFF_T map_size = MAX(sum.blength * 2, 16*1024);
+		map_size = MIN(map_size, MAX_MAP_SIZE);
+		if (sum.blength && (map_size % sum.blength))
+			map_size += sum.blength - (map_size % sum.blength);
+
+		mapbuf = map_file(fd_r, size_r, map_size);
+
 		if (verbose > 2) {
 			rprintf(FINFO, "recv mapped %s of size %.0f\n",
 				safe_fname(fname_r), (double)size_r);
--- rsync.h	29 Jul 2004 16:06:38 -0000	1.209
+++ rsync.h	2 Aug 2004 17:48:06 -0000
@@ -90,7 +90,8 @@
 #define SPARSE_WRITE_SIZE (1024)
 #define WRITE_SIZE (32*1024)
 #define CHUNK_SIZE (32*1024)
-#define MAX_MAP_SIZE (256*1024)
+#define MAX_MAP_SIZE (512*1024)
+#define MAX_BLOCK_SIZE (256*1024)
 #define IO_BUFFER_SIZE (4092)
 
 #define IOERR_GENERAL	(1<<0) /* For backward compatibility, this must == 1 */
--- sender.c	26 Jul 2004 16:36:59 -0000	1.45
+++ sender.c	2 Aug 2004 17:48:06 -0000
@@ -208,7 +208,14 @@ void send_files(struct file_list *flist,
 			return;
 		}
 
-		mbuf = st.st_size ? map_file(fd, st.st_size, s->blength) : NULL;
+		if (st.st_size) {
+			OFF_T map_size = MAX(s->blength * 3, MAX_MAP_SIZE);
+			if (s->blength && (map_size % s->blength))
+				map_size += s->blength - (map_size % s->blength);
+
+			mbuf = map_file(fd, st.st_size, map_size);
+		} else
+			mbuf = NULL;
 
 		if (verbose > 2) {
 			rprintf(FINFO, "send_files mapped %s of size %.0f\n",


More information about the rsync mailing list