[SCM] The rsync repository. - branch master updated

Rsync CVS commit messages rsync-cvs at lists.samba.org
Sat May 23 01:06:00 UTC 2020


The branch, master has been updated
       via  a28bc3eb Promoting xxhash support.
       via  55bb4dab Some checksum improvements
      from  5fa4209c AVX2 optimized version of get_checksum1() for x86-64

https://git.samba.org/?p=rsync.git;a=shortlog;h=master


- Log -----------------------------------------------------------------
commit a28bc3ebf672113cdd90432ef0270782d8ab6c2c
Author: Wayne Davison <wayne at opencoder.net>
Date:   Fri May 22 17:40:08 2020 -0700

    Promoting xxhash support.

commit 55bb4dab7a4f132537b5ed93e504ffc618b643b9
Author: Wayne Davison <wayne at opencoder.net>
Date:   Fri May 22 15:06:20 2020 -0700

    Some checksum improvements
    
    - Improve csum negotation logic.
    - Define the csum names in a single structure.
    - Add --debug=CSUM.

-----------------------------------------------------------------------

Summary of changes:
 checksum.c   | 240 +++++++++++++++++++++++++++++++++++++++++++++++++++--------
 compat.c     |  21 +++---
 configure.ac |  14 +++-
 flist.c      |   2 +-
 io.c         |   2 +-
 options.c    |  11 ++-
 rsync.h      |   3 +-
 rsync.yo     |   5 +-
 8 files changed, 247 insertions(+), 51 deletions(-)


Changeset truncated at 500 lines:

diff --git a/checksum.c b/checksum.c
index 54e2c4aa..17a9507a 100644
--- a/checksum.c
+++ b/checksum.c
@@ -20,6 +20,9 @@
  */
 
 #include "rsync.h"
+#ifdef SUPPORT_XXHASH
+#include "xxhash.h"
+#endif
 
 extern int am_server;
 extern int local_server;
@@ -36,9 +39,22 @@ extern char *checksum_choice;
 #define CSUM_MD4_OLD 3
 #define CSUM_MD4 4
 #define CSUM_MD5 5
+#define CSUM_XXHASH 6
+
+#define CSUM_SAW_BUFLEN 10
 
-const char *default_checksum_list =
-	"md5 md4";
+struct csum_struct {
+	int num;
+	const char *name;
+} valid_checksums[] = {
+#ifdef SUPPORT_XXHASH
+	{ CSUM_XXHASH, "xxhash" },
+#endif
+	{ CSUM_MD5, "md5" },
+	{ CSUM_MD4, "md4" },
+	{ CSUM_NONE, "none" },
+	{ -1, NULL }
+};
 
 #define MAX_CHECKSUM_LIST 1024
 
@@ -48,6 +64,8 @@ const char *negotiated_csum_name = NULL;
 
 static int parse_csum_name(const char *name, int len, int allow_auto)
 {
+	struct csum_struct *cs;
+
 	if (len < 0 && name)
 		len = strlen(name);
 
@@ -60,12 +78,11 @@ static int parse_csum_name(const char *name, int len, int allow_auto)
 			return CSUM_MD4_BUSTED;
 		return CSUM_MD4_ARCHAIC;
 	}
-	if (len == 3 && strncasecmp(name, "md4", 3) == 0)
-		return CSUM_MD4;
-	if (len == 3 && strncasecmp(name, "md5", 3) == 0)
-		return CSUM_MD5;
-	if (len == 4 && strncasecmp(name, "none", 4) == 0)
-		return CSUM_NONE;
+
+	for (cs = valid_checksums; cs->name; cs++) {
+		if (strncasecmp(name, cs->name, len) == 0 && cs->name[len] == '\0')
+			return cs->num;
+	}
 
 	if (allow_auto) {
 		rprintf(FERROR, "unknown checksum name: %s\n", name);
@@ -75,7 +92,22 @@ static int parse_csum_name(const char *name, int len, int allow_auto)
 	return -1;
 }
 
-void parse_checksum_choice(void)
+static const char *checksum_name(int num)
+{
+	struct csum_struct *cs;
+
+	for (cs = valid_checksums; cs->name; cs++) {
+		if (num == cs->num)
+			return cs->name;
+	}
+
+	if (num < CSUM_MD4)
+		return "MD4";
+
+	return "UNKNOWN";
+}
+
+void parse_checksum_choice(int final_call)
 {
 	if (!negotiated_csum_name) {
 		char *cp = checksum_choice ? strchr(checksum_choice, ',') : NULL;
@@ -85,46 +117,124 @@ void parse_checksum_choice(void)
 		} else
 			xfersum_type = checksum_type = parse_csum_name(checksum_choice, -1, 1);
 	}
+
 	if (xfersum_type == CSUM_NONE)
 		whole_file = 1;
+
+	if (final_call && DEBUG_GTE(CSUM, 1)) {
+		if (negotiated_csum_name)
+			rprintf(FINFO, "[%s] negotiated checksum: %s\n", who_am_i(), negotiated_csum_name);
+		else if (xfersum_type == checksum_type) {
+			rprintf(FINFO, "[%s] %s checksum: %s\n", who_am_i(),
+				checksum_choice ? "chosen" : "protocol-based",
+				checksum_name(xfersum_type));
+		} else {
+			rprintf(FINFO, "[%s] chosen transfer checksum: %s\n",
+				who_am_i(), checksum_name(xfersum_type));
+			rprintf(FINFO, "[%s] chosen pre-transfer checksum: %s\n",
+				who_am_i(), checksum_name(checksum_type));
+		}
+	}
+}
+
+static int parse_checksum_list(const char *from, char *sumbuf, int sumbuf_len, char *saw)
+{
+	char *to = sumbuf, *tok = NULL;
+	int cnt = 0;
+
+	memset(saw, 0, CSUM_SAW_BUFLEN);
+
+	while (1) {
+		if (*from == ' ' || !*from) {
+			if (tok) {
+				int sum_type = parse_csum_name(tok, to - tok, 0);
+				if (sum_type >= 0 && !saw[sum_type])
+					saw[sum_type] = ++cnt;
+				else
+					to = tok - (tok != sumbuf);
+				tok = NULL;
+			}
+			if (!*from++)
+				break;
+			continue;
+		}
+		if (!tok) {
+			if (to != sumbuf)
+				*to++ = ' ';
+			tok = to;
+		}
+		if (to - sumbuf >= sumbuf_len - 1) {
+			to = tok - (tok != sumbuf);
+			break;
+		}
+		*to++ = *from++;
+	}
+	*to = '\0';
+
+	return to - sumbuf;
 }
 
-void negotiate_checksum(int f_in, int f_out, const char *csum_list)
+void negotiate_checksum(int f_in, int f_out, const char *csum_list, int saw_fail)
 {
-	char *tok, sumbuf[MAX_CHECKSUM_LIST];
+	char *tok, sumbuf[MAX_CHECKSUM_LIST], saw[CSUM_SAW_BUFLEN];
 	int sum_type, len;
 
-	if (!am_server || local_server) {
-		if (!csum_list || !*csum_list)
-			csum_list = default_checksum_list;
-		len = strlen(csum_list);
-		if (len >= (int)sizeof sumbuf) {
-			rprintf(FERROR, "The checksum list is too long.\n");
-			exit_cleanup(RERR_UNSUPPORTED);
+	/* Simplify the user-provided string so that it contains valid
+	 * checksum names without any duplicates. The client side also
+	 * makes use of the saw values when scanning the server's list. */
+	if (csum_list && *csum_list && (!am_server || local_server)) {
+		len = parse_checksum_list(csum_list, sumbuf, sizeof sumbuf, saw);
+		if (saw_fail && !len)
+			len = strlcpy(sumbuf, "FAIL", sizeof sumbuf);
+		csum_list = sumbuf;
+	} else
+		csum_list = NULL;
+
+	if (!csum_list || !*csum_list) {
+		struct csum_struct *cs;
+		for (tok = sumbuf, cs = valid_checksums, len = 0; cs->name; cs++) {
+			if (cs->num == CSUM_NONE)
+				continue;
+			if (tok != sumbuf)
+				*tok++ = ' ';
+			tok += strlcpy(tok, cs->name, sizeof sumbuf - (tok - sumbuf));
+			saw[cs->num] = ++len;
 		}
-		if (!local_server)
-			write_vstring(f_out, csum_list, len);
+		*tok = '\0';
+		len = tok - sumbuf;
 	}
 
-	if (local_server && !read_batch)
-		memcpy(sumbuf, csum_list, len+1);
-	else
+	/* Each side sends their list of valid checksum names to the other side and
+	 * then both sides pick the first name in the client's list that is also in
+	 * the server's list. */
+	if (!local_server)
+		write_vstring(f_out, sumbuf, len);
+
+	if (!local_server || read_batch)
 		len = read_vstring(f_in, sumbuf, sizeof sumbuf);
 
 	if (len > 0) {
+		int best = CSUM_SAW_BUFLEN; /* We want best == 1 from the client list */
+		if (am_server)
+			memset(saw, 1, CSUM_SAW_BUFLEN); /* The first client's choice is the best choice */
 		for (tok = strtok(sumbuf, " \t"); tok; tok = strtok(NULL, " \t")) {
-			len = strlen(tok);
-			sum_type = parse_csum_name(tok, len, 0);
-			if (sum_type >= CSUM_NONE) {
-				xfersum_type = checksum_type = sum_type;
-				if (am_server && !local_server)
-					write_vstring(f_out, tok, len);
-				negotiated_csum_name = strdup(tok);
-				return;
-			}
+			sum_type = parse_csum_name(tok, -1, 0);
+			if (sum_type < 0 || !saw[sum_type] || best < saw[sum_type])
+				continue;
+			xfersum_type = checksum_type = sum_type;
+			negotiated_csum_name = tok;
+			best = saw[sum_type];
+			if (best == 1)
+				break;
+		}
+		if (negotiated_csum_name) {
+			negotiated_csum_name = strdup(negotiated_csum_name);
+			return;
 		}
 	}
 
+	if (!am_server)
+		msleep(20);
 	rprintf(FERROR, "Failed to negotiate a common checksum\n");
 	exit_cleanup(RERR_UNSUPPORTED);
 }
@@ -144,6 +254,10 @@ int csum_len_for_type(int cst, BOOL flist_csum)
 		return MD4_DIGEST_LEN;
 	  case CSUM_MD5:
 		return MD5_DIGEST_LEN;
+#ifdef SUPPORT_XXHASH
+	  case CSUM_XXHASH:
+		return sizeof (XXH64_hash_t);
+#endif
 	  default: /* paranoia to prevent missing case values */
 		exit_cleanup(RERR_UNSUPPORTED);
 	}
@@ -242,6 +356,11 @@ void get_checksum2(char *buf, int32 len, char *sum)
 		mdfour_result(&m, (uchar *)sum);
 		break;
 	  }
+#ifdef SUPPORT_XXHASH
+	  case CSUM_XXHASH: 
+		SIVAL64(sum, 0, XXH64(buf, len, checksum_seed));
+		break;
+#endif
 	  default: /* paranoia to prevent missing case values */
 		exit_cleanup(RERR_UNSUPPORTED);
 	}
@@ -298,6 +417,34 @@ void file_checksum(const char *fname, const STRUCT_STAT *st_p, char *sum)
 
 		mdfour_result(&m, (uchar *)sum);
 		break;
+#ifdef SUPPORT_XXHASH
+	  case CSUM_XXHASH: {
+		XXH64_state_t* state = XXH64_createState();
+		if (state == NULL)
+			out_of_memory("file_checksum xx64");
+
+		if (XXH64_reset(state, 0) == XXH_ERROR) {
+			rprintf(FERROR, "error resetting XXH64 seed");
+			exit_cleanup(RERR_STREAMIO);
+		}
+
+		for (i = 0; i + CSUM_CHUNK <= len; i += CSUM_CHUNK) {
+			XXH_errorcode const updateResult =
+			    XXH64_update(state, (uchar *)map_ptr(buf, i, CSUM_CHUNK), CSUM_CHUNK);
+			if (updateResult == XXH_ERROR) {
+				rprintf(FERROR, "error computing XX64 hash");
+				exit_cleanup(RERR_STREAMIO);
+			}
+		}
+		remainder = (int32)(len - i);
+		if (remainder > 0)
+			XXH64_update(state, (uchar *)map_ptr(buf, i, CSUM_CHUNK), remainder);
+		SIVAL64(sum, 0, XXH64_digest(state));
+
+		XXH64_freeState(state);
+		break;
+	  }
+#endif
 	  default:
 		rprintf(FERROR, "invalid checksum-choice for the --checksum option (%d)\n", checksum_type);
 		exit_cleanup(RERR_UNSUPPORTED);
@@ -310,6 +457,9 @@ void file_checksum(const char *fname, const STRUCT_STAT *st_p, char *sum)
 static int32 sumresidue;
 static md_context md;
 static int cursum_type;
+#ifdef SUPPORT_XXHASH
+XXH64_state_t* xxh64_state = NULL;
+#endif
 
 void sum_init(int csum_type, int seed)
 {
@@ -335,6 +485,19 @@ void sum_init(int csum_type, int seed)
 		SIVAL(s, 0, seed);
 		sum_update(s, 4);
 		break;
+#ifdef SUPPORT_XXHASH
+	  case CSUM_XXHASH:
+		if (xxh64_state == NULL) {
+			xxh64_state = XXH64_createState();
+			if (xxh64_state == NULL)
+				out_of_memory("sum_init xxh64");
+		}
+		if (XXH64_reset(xxh64_state, 0) == XXH_ERROR) {
+			rprintf(FERROR, "error resetting XXH64 state");
+			exit_cleanup(RERR_STREAMIO);
+		}
+		break;
+#endif
 	  case CSUM_NONE:
 		break;
 	  default: /* paranoia to prevent missing case values */
@@ -384,6 +547,14 @@ void sum_update(const char *p, int32 len)
 		if (sumresidue)
 			memcpy(md.buffer, p, sumresidue);
 		break;
+#ifdef SUPPORT_XXHASH
+	  case CSUM_XXHASH:
+		if (XXH64_update(xxh64_state, p, len) == XXH_ERROR) {
+			rprintf(FERROR, "error computing XX64 hash");
+			exit_cleanup(RERR_STREAMIO);
+		}
+		break;
+#endif
 	  case CSUM_NONE:
 		break;
 	  default: /* paranoia to prevent missing case values */
@@ -412,6 +583,11 @@ int sum_end(char *sum)
 			mdfour_update(&md, (uchar *)md.buffer, sumresidue);
 		mdfour_result(&md, (uchar *)sum);
 		break;
+#ifdef SUPPORT_XXHASH
+	  case CSUM_XXHASH:
+		SIVAL64(sum, 0, XXH64_digest(xxh64_state));
+		break;
+#endif
 	  case CSUM_NONE:
 		*sum = '\0';
 		break;
diff --git a/compat.c b/compat.c
index b29b9637..8c77ea69 100644
--- a/compat.c
+++ b/compat.c
@@ -367,24 +367,25 @@ void setup_protocol(int f_out,int f_in)
 	}
 #endif
 
-	if (am_server) {
-		if (!checksum_seed)
-			checksum_seed = time(NULL) ^ (getpid() << 6);
-		write_int(f_out, checksum_seed);
-	} else {
-		checksum_seed = read_int(f_in);
-	}
-
 	if (!checksum_choice) {
 		const char *rcl = getenv("RSYNC_CHECKSUM_LIST");
+		int saw_fail = rcl && strstr(rcl, "FAIL");
 		if (csum_exchange)
-			negotiate_checksum(f_in, f_out, rcl);
-		else if (!am_server && rcl && *rcl && strstr(rcl, "FAIL")) {
+			negotiate_checksum(f_in, f_out, rcl, saw_fail);
+		else if (!am_server && saw_fail) {
 			rprintf(FERROR, "Remote rsync is too old for checksum negotation\n");
 			exit_cleanup(RERR_UNSUPPORTED);
 		}
 	}
 
+	if (am_server) {
+		if (!checksum_seed)
+			checksum_seed = time(NULL) ^ (getpid() << 6);
+		write_int(f_out, checksum_seed);
+	} else {
+		checksum_seed = read_int(f_in);
+	}
+
 	init_flist();
 }
 
diff --git a/configure.ac b/configure.ac
index 554cf4ac..394f5b52 100644
--- a/configure.ac
+++ b/configure.ac
@@ -381,9 +381,21 @@ AC_CHECK_HEADERS(sys/fcntl.h sys/select.h fcntl.h sys/time.h sys/unistd.h \
     netdb.h malloc.h float.h limits.h iconv.h libcharset.h langinfo.h \
     sys/acl.h acl/libacl.h attr/xattr.h sys/xattr.h sys/extattr.h \
     popt.h popt/popt.h linux/falloc.h netinet/in_systm.h netinet/ip.h \
-    zlib.h)
+    zlib.h xxhash.h)
 AC_HEADER_MAJOR_FIXED
 
+dnl Do you want to disable use of xxhash checksums
+AC_ARG_ENABLE([xxhash],
+	AS_HELP_STRING([--disable-xxhash],[disable xxhash checksums]))
+AH_TEMPLATE([SUPPORT_XXHASH],
+[Undefine if you do not want xxhash checksums.  By default this is defined.])
+if test x"$enable_xxhash" != x"no"; then
+    if test x"$ac_cv_header_xxhash_h" = x"yes"; then
+	AC_SEARCH_LIBS(XXH64_createState, xxhash)
+	AC_DEFINE(SUPPORT_XXHASH)
+    fi
+fi
+
 AC_CACHE_CHECK([if makedev takes 3 args],rsync_cv_MAKEDEV_TAKES_3_ARGS,[
 AC_RUN_IFELSE([AC_LANG_SOURCE([[
 #include <sys/types.h>
diff --git a/flist.c b/flist.c
index 1f2b278d..940071fd 100644
--- a/flist.c
+++ b/flist.c
@@ -142,7 +142,7 @@ void init_flist(void)
 		rprintf(FINFO, "FILE_STRUCT_LEN=%d, EXTRA_LEN=%d\n",
 			(int)FILE_STRUCT_LEN, (int)EXTRA_LEN);
 	}
-	parse_checksum_choice(); /* Sets checksum_type && xfersum_type */
+	parse_checksum_choice(1); /* Sets checksum_type && xfersum_type */
 	flist_csum_len = csum_len_for_type(checksum_type, 1);
 
 	show_filelist_progress = INFO_GTE(FLIST, 1) && xfer_dirs && !am_server && !inc_recurse;
diff --git a/io.c b/io.c
index 446a5f34..189bc232 100644
--- a/io.c
+++ b/io.c
@@ -2369,8 +2369,8 @@ void start_write_batch(int fd)
 	write_int(batch_fd, protocol_version);
 	if (protocol_version >= 30)
 		write_varint(batch_fd, compat_flags);
-	write_int(batch_fd, checksum_seed);
 	maybe_write_checksum(batch_fd);
+	write_int(batch_fd, checksum_seed);
 
 	if (am_sender)
 		write_batch_monitor_out = fd;
diff --git a/options.c b/options.c
index e2adcf83..959c4205 100644
--- a/options.c
+++ b/options.c
@@ -276,6 +276,7 @@ static struct output_struct debug_words[COUNT_DEBUG+1] = {
 	DEBUG_WORD(CHDIR, W_CLI|W_SRV, "Debug when the current directory changes"),
 	DEBUG_WORD(CONNECT, W_CLI, "Debug connection events (levels 1-2)"),
 	DEBUG_WORD(CMD, W_CLI, "Debug commands+options that are issued (levels 1-2)"),
+	DEBUG_WORD(CSUM, W_CLI|W_SRV, "Debug checksum negotiation"),
 	DEBUG_WORD(DEL, W_REC, "Debug delete actions (levels 1-3)"),
 	DEBUG_WORD(DELTASUM, W_SND|W_REC, "Debug delta-transfer checksumming (levels 1-4)"),
 	DEBUG_WORD(DUP, W_REC, "Debug weeding of duplicate names"),
@@ -579,6 +580,7 @@ static void print_rsync_version(enum logcode f)
 	char const *iconv = "no ";
 	char const *ipv6 = "no ";
 	char const *simd = "no ";
+	char const *xxhash = "no ";
 	STRUCT_STAT *dumstat;
 
 #if SUBPROTOCOL_VERSION != 0
@@ -618,6 +620,9 @@ static void print_rsync_version(enum logcode f)
 #ifdef HAVE_SIMD
 	simd = "";
 #endif
+#ifdef SUPPORT_XXHASH
+	xxhash = "";
+#endif
 
 	rprintf(f, "%s  version %s  protocol version %d%s\n",
 		RSYNC_NAME, RSYNC_VERSION, PROTOCOL_VERSION, subprotocol);
@@ -631,8 +636,8 @@ static void print_rsync_version(enum logcode f)
 		(int)(sizeof (int64) * 8));
 	rprintf(f, "    %ssocketpairs, %shardlinks, %ssymlinks, %sIPv6, batchfiles, %sinplace,\n",
 		got_socketpair, hardlinks, links, ipv6, have_inplace);
-	rprintf(f, "    %sappend, %sACLs, %sxattrs, %siconv, %ssymtimes, %sprealloc, %sSIMD\n",
-		have_inplace, acls, xattrs, iconv, symtimes, prealloc, simd);
+	rprintf(f, "    %sappend, %sACLs, %sxattrs, %siconv, %ssymtimes, %sprealloc, %sSIMD, %sxxhash\n",
+		have_inplace, acls, xattrs, iconv, symtimes, prealloc, simd, xxhash);
 
 #ifdef MAINTAINER_MODE
 	rprintf(f, "Panic Action: \"%s\"\n", get_panic_action());
@@ -1932,7 +1937,7 @@ int parse_arguments(int *argc_p, const char ***argv_p)
 		/* Call this early to verify the args and figure out if we need to force
 		 * --whole-file. Note that the parse function will get called again later,
 		 * just in case an "auto" choice needs to know the protocol_version. */
-		parse_checksum_choice();
+		parse_checksum_choice(0);
 	} else
 		checksum_choice = NULL;
 
diff --git a/rsync.h b/rsync.h
index 0eb4fbc0..d6c5de52 100644
--- a/rsync.h


-- 
The rsync repository.



More information about the rsync-cvs mailing list