[SCM] The rsync repository. - branch master updated

Rsync CVS commit messages rsync-cvs at lists.samba.org
Wed Nov 23 13:36:18 MST 2011


The branch, master has been updated
       via  48b51d0 make repeated --fuzzy option look into alt-dest dirs.
      from  7da1714 Add compatibility with an unmodified zlib.

;a=shortlog;h=master


- Log -----------------------------------------------------------------
commit 48b51d0004922cb029c55fe921f5e7df1c0bff23
Author: Wayne Davison <wayned at samba.org>
Date:   Tue Nov 22 08:14:01 2011 -0800

    make repeated --fuzzy option look into alt-dest dirs.

-----------------------------------------------------------------------

Summary of changes:
 generator.c |  134 +++++++++++++++++++++++++++++++++++++----------------------
 main.c      |   35 ++++++++-------
 options.c   |   17 ++++++--
 receiver.c  |   17 +++++---
 rsync.yo    |    4 ++
 5 files changed, 131 insertions(+), 76 deletions(-)


Changeset truncated at 500 lines:

diff --git a/generator.c b/generator.c
index 25648ce..df690da 100644
--- a/generator.c
+++ b/generator.c
@@ -733,56 +733,75 @@ static int generate_and_send_sums(int fd, OFF_T len, int f_out, int f_copy)
 
 
 /* Try to find a filename in the same dir as "fname" with a similar name. */
-static int find_fuzzy(struct file_struct *file, struct file_list *dirlist)
+static struct file_struct *find_fuzzy(struct file_struct *file, struct file_list *dirlist_array[], uchar *fnamecmp_type_ptr)
 {
 	int fname_len, fname_suf_len;
 	const char *fname_suf, *fname = file->basename;
 	uint32 lowest_dist = 25 << 16; /* ignore a distance greater than 25 */
-	int j, lowest_j = -1;
+	int i, j;
+	struct file_struct *lowest_fp = NULL;
 
 	fname_len = strlen(fname);
 	fname_suf = find_filename_suffix(fname, fname_len, &fname_suf_len);
 
-	for (j = 0; j < dirlist->used; j++) {
-		struct file_struct *fp = dirlist->files[j];
-		const char *suf, *name;
-		int len, suf_len;
-		uint32 dist;
+	/* Try to find an exact size+mtime match first. */
+	for (i = 0; i < fuzzy_basis; i++) {
+		struct file_list *dirlist = dirlist_array[i];
 
-		if (!S_ISREG(fp->mode) || !F_LENGTH(fp)
-		 || fp->flags & FLAG_FILE_SENT)
+		if (!dirlist)
 			continue;
 
-		name = fp->basename;
+		for (j = 0; j < dirlist->used; j++) {
+			struct file_struct *fp = dirlist->files[j];
 
-		if (F_LENGTH(fp) == F_LENGTH(file)
-		    && cmp_time(fp->modtime, file->modtime) == 0) {
-			if (DEBUG_GTE(FUZZY, 2)) {
-				rprintf(FINFO,
-					"fuzzy size/modtime match for %s\n",
-					name);
+			if (!S_ISREG(fp->mode) || !F_LENGTH(fp) || fp->flags & FLAG_FILE_SENT)
+				continue;
+
+			if (F_LENGTH(fp) == F_LENGTH(file) && cmp_time(fp->modtime, file->modtime) == 0) {
+				if (DEBUG_GTE(FUZZY, 2))
+					rprintf(FINFO, "fuzzy size/modtime match for %s\n", f_name(fp, NULL));
+				*fnamecmp_type_ptr = FNAMECMP_FUZZY + i;
+				return fp;
 			}
-			return j;
+
 		}
+	}
 
-		len = strlen(name);
-		suf = find_filename_suffix(name, len, &suf_len);
+	for (i = 0; i < fuzzy_basis; i++) {
+		struct file_list *dirlist = dirlist_array[i];
 
-		dist = fuzzy_distance(name, len, fname, fname_len);
-		/* Add some extra weight to how well the suffixes match. */
-		dist += fuzzy_distance(suf, suf_len, fname_suf, fname_suf_len)
-		      * 10;
-		if (DEBUG_GTE(FUZZY, 2)) {
-			rprintf(FINFO, "fuzzy distance for %s = %d.%05d\n",
-				name, (int)(dist>>16), (int)(dist&0xFFFF));
-		}
-		if (dist <= lowest_dist) {
-			lowest_dist = dist;
-			lowest_j = j;
+		if (!dirlist)
+			continue;
+
+		for (j = 0; j < dirlist->used; j++) {
+			struct file_struct *fp = dirlist->files[j];
+			const char *suf, *name;
+			int len, suf_len;
+			uint32 dist;
+
+			if (!S_ISREG(fp->mode) || !F_LENGTH(fp) || fp->flags & FLAG_FILE_SENT)
+				continue;
+
+			name = fp->basename;
+			len = strlen(name);
+			suf = find_filename_suffix(name, len, &suf_len);
+
+			dist = fuzzy_distance(name, len, fname, fname_len);
+			/* Add some extra weight to how well the suffixes match. */
+			dist += fuzzy_distance(suf, suf_len, fname_suf, fname_suf_len) * 10;
+			if (DEBUG_GTE(FUZZY, 2)) {
+				rprintf(FINFO, "fuzzy distance for %s = %d.%05d\n",
+					f_name(fp, NULL), (int)(dist>>16), (int)(dist&0xFFFF));
+			}
+			if (dist <= lowest_dist) {
+				lowest_dist = dist;
+				lowest_fp = fp;
+				*fnamecmp_type_ptr = FNAMECMP_FUZZY + i;
+			}
 		}
 	}
 
-	return lowest_j;
+	return lowest_fp;
 }
 
 /* Copy a file found in our --copy-dest handling. */
@@ -1128,7 +1147,7 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
 	/* Missing dir whose contents are skipped altogether due to
 	 * --ignore-non-existing, daemon exclude, or mkdir failure. */
 	static struct file_struct *skip_dir = NULL;
-	static struct file_list *fuzzy_dirlist = NULL;
+	static struct file_list *fuzzy_dirlist[MAX_BASIS_DIRS+1];
 	static int need_fuzzy_dirlist = 0;
 	struct file_struct *fuzzy_file = NULL;
 	int fd = -1, f_copy = -1;
@@ -1187,10 +1206,13 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
 	}
 
 	if (dry_run > 1 || (dry_missing_dir && is_below(file, dry_missing_dir))) {
+		int i;
 	  parent_is_dry_missing:
-		if (fuzzy_dirlist) {
-			flist_free(fuzzy_dirlist);
-			fuzzy_dirlist = NULL;
+		for (i = 0; i < fuzzy_basis; i++) {
+			if (fuzzy_dirlist[i]) {
+				flist_free(fuzzy_dirlist[i]);
+				fuzzy_dirlist[i] = NULL;
+			}
 		}
 		parent_dirname = "";
 		statret = -1;
@@ -1209,12 +1231,16 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
 						full_fname(dn));
 				}
 			}
-			if (fuzzy_dirlist) {
-				flist_free(fuzzy_dirlist);
-				fuzzy_dirlist = NULL;
-			}
-			if (fuzzy_basis)
+			if (fuzzy_basis) {
+				int i;
+				for (i = 0; i < fuzzy_basis; i++) {
+					if (fuzzy_dirlist[i]) {
+						flist_free(fuzzy_dirlist[i]);
+						fuzzy_dirlist[i] = NULL;
+					}
+				}
 				need_fuzzy_dirlist = 1;
+			}
 #ifdef SUPPORT_ACLS
 			if (!preserve_perms)
 				dflt_perms = default_perms_for_dir(dn);
@@ -1223,8 +1249,17 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
 		parent_dirname = dn;
 
 		if (need_fuzzy_dirlist && S_ISREG(file->mode)) {
+			int i;
 			strlcpy(fnamecmpbuf, dn, sizeof fnamecmpbuf);
-			fuzzy_dirlist = get_dirlist(fnamecmpbuf, -1, GDL_IGNORE_FILTER_RULES);
+			for (i = 0; i < fuzzy_basis; i++) {
+				if (i && pathjoin(fnamecmpbuf, MAXPATHLEN, basis_dir[i-1], dn) >= MAXPATHLEN)
+					continue;
+				fuzzy_dirlist[i] = get_dirlist(fnamecmpbuf, -1, GDL_IGNORE_FILTER_RULES);
+				if (fuzzy_dirlist[i] && fuzzy_dirlist[i]->used == 0) {
+					flist_free(fuzzy_dirlist[i]);
+					fuzzy_dirlist[i] = NULL;
+				}
+			}
 			need_fuzzy_dirlist = 0;
 		}
 
@@ -1629,10 +1664,10 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
 	} else
 		partialptr = NULL;
 
-	if (statret != 0 && fuzzy_dirlist) {
-		int j = find_fuzzy(file, fuzzy_dirlist);
-		if (j >= 0) {
-			fuzzy_file = fuzzy_dirlist->files[j];
+	if (statret != 0 && fuzzy_basis) {
+		/* Sets fnamecmp_type to FNAMECMP_FUZZY or above. */
+		fuzzy_file = find_fuzzy(file, fuzzy_dirlist, &fnamecmp_type);
+		if (fuzzy_file) {
 			f_name(fuzzy_file, fnamecmpbuf);
 			if (DEBUG_GTE(FUZZY, 1)) {
 				rprintf(FINFO, "fuzzy basis selected for %s: %s\n",
@@ -1641,7 +1676,6 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
 			sx.st.st_size = F_LENGTH(fuzzy_file);
 			statret = 0;
 			fnamecmp = fnamecmpbuf;
-			fnamecmp_type = FNAMECMP_FUZZY;
 		}
 	}
 
@@ -1717,10 +1751,10 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
 		goto notify_others;
 	}
 
-	if (fuzzy_dirlist) {
-		int j = flist_find(fuzzy_dirlist, file);
+	if (fuzzy_dirlist[0]) {
+		int j = flist_find(fuzzy_dirlist[0], file);
 		if (j >= 0) /* don't use changing file as future fuzzy basis */
-			fuzzy_dirlist->files[j]->flags |= FLAG_FILE_SENT;
+			fuzzy_dirlist[0]->files[j]->flags |= FLAG_FILE_SENT;
 	}
 
 	/* open the file */
@@ -1790,7 +1824,7 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
 			iflags |= ITEM_REPORT_CHANGE;
 		if (fnamecmp_type != FNAMECMP_FNAME)
 			iflags |= ITEM_BASIS_TYPE_FOLLOWS;
-		if (fnamecmp_type == FNAMECMP_FUZZY)
+		if (fnamecmp_type >= FNAMECMP_FUZZY)
 			iflags |= ITEM_XNAME_FOLLOWS;
 		itemize(fnamecmp, file, -1, real_ret, &real_sx, iflags, fnamecmp_type,
 			fuzzy_file ? fuzzy_file->basename : NULL);
diff --git a/main.c b/main.c
index 17ba62d..93cd50d 100644
--- a/main.c
+++ b/main.c
@@ -76,6 +76,7 @@ extern size_t bwlimit_writemax;
 extern unsigned int module_dirlen;
 extern BOOL flist_receiving_enabled;
 extern BOOL shutting_down;
+extern int basis_dir_cnt;
 extern struct stats stats;
 extern char *stdout_format;
 extern char *logfile_format;
@@ -705,33 +706,35 @@ static char *get_local_name(struct file_list *flist, char *dest_path)
 static void check_alt_basis_dirs(void)
 {
 	STRUCT_STAT st;
-	char **dir_p, *slash = strrchr(curr_dir, '/');
-
-	for (dir_p = basis_dir; *dir_p; dir_p++) {
-		if (dry_run > 1 && **dir_p != '/') {
-			int len = curr_dir_len + 1 + strlen(*dir_p) + 1;
+	char *slash = strrchr(curr_dir, '/');
+	int j;
+
+	for (j = 0; j < basis_dir_cnt; j++) {
+		char *bdir = basis_dir[j];
+		int bd_len = strlen(bdir);
+		if (bd_len > 1 && bdir[bd_len-1] == '/')
+			bdir[--bd_len] = '\0';
+		if (dry_run > 1 && *bdir != '/') {
+			int len = curr_dir_len + 1 + bd_len + 1;
 			char *new = new_array(char, len);
 			if (!new)
 				out_of_memory("check_alt_basis_dirs");
-			if (slash && strncmp(*dir_p, "../", 3) == 0) {
+			if (slash && strncmp(bdir, "../", 3) == 0) {
 			    /* We want to remove only one leading "../" prefix for
 			     * the directory we couldn't create in dry-run mode:
 			     * this ensures that any other ".." references get
 			     * evaluated the same as they would for a live copy. */
 			    *slash = '\0';
-			    pathjoin(new, len, curr_dir, *dir_p + 3);
+			    pathjoin(new, len, curr_dir, bdir + 3);
 			    *slash = '/';
 			} else
-			    pathjoin(new, len, curr_dir, *dir_p);
-			*dir_p = new;
-		}
-		if (do_stat(*dir_p, &st) < 0) {
-			rprintf(FWARNING, "%s arg does not exist: %s\n",
-				dest_option, *dir_p);
-		} else if (!S_ISDIR(st.st_mode)) {
-			rprintf(FWARNING, "%s arg is not a dir: %s\n",
-				dest_option, *dir_p);
+			    pathjoin(new, len, curr_dir, bdir);
+			basis_dir[j] = bdir = new;
 		}
+		if (do_stat(bdir, &st) < 0)
+			rprintf(FWARNING, "%s arg does not exist: %s\n", dest_option, bdir);
+		else if (!S_ISDIR(st.st_mode))
+			rprintf(FWARNING, "%s arg is not a dir: %s\n", dest_option, bdir);
 	}
 }
 
diff --git a/options.c b/options.c
index e8db07c..9e95c86 100644
--- a/options.c
+++ b/options.c
@@ -955,7 +955,7 @@ static struct poptOption long_options[] = {
   {"compare-dest",     0,  POPT_ARG_STRING, 0, OPT_COMPARE_DEST, 0, 0 },
   {"copy-dest",        0,  POPT_ARG_STRING, 0, OPT_COPY_DEST, 0, 0 },
   {"link-dest",        0,  POPT_ARG_STRING, 0, OPT_LINK_DEST, 0, 0 },
-  {"fuzzy",           'y', POPT_ARG_VAL,    &fuzzy_basis, 1, 0, 0 },
+  {"fuzzy",           'y', POPT_ARG_NONE,   0, 'y', 0, 0 },
   {"no-fuzzy",         0,  POPT_ARG_VAL,    &fuzzy_basis, 0, 0, 0 },
   {"no-y",             0,  POPT_ARG_VAL,    &fuzzy_basis, 0, 0, 0 },
   {"compress",        'z', POPT_ARG_NONE,   0, 'z', 0, 0 },
@@ -1500,6 +1500,10 @@ int parse_arguments(int *argc_p, const char ***argv_p)
 			verbose++;
 			break;
 
+		case 'y':
+			fuzzy_basis++;
+			break;
+
 		case 'q':
 			quiet++;
 			break;
@@ -1845,6 +1849,9 @@ int parse_arguments(int *argc_p, const char ***argv_p)
 	}
 #endif
 
+	if (fuzzy_basis > 1)
+		fuzzy_basis = basis_dir_cnt + 1;
+
 	if (protect_args == 1 && am_server)
 		return 1;
 
@@ -2342,6 +2349,11 @@ void server_options(char **args, int *argc_p)
 			argstr[x++] = 'O';
 		if (omit_link_times)
 			argstr[x++] = 'J';
+		if (fuzzy_basis) {
+			argstr[x++] = 'y';
+			if (fuzzy_basis > 1)
+				argstr[x++] = 'y';
+		}
 	} else {
 		if (copy_links)
 			argstr[x++] = 'L';
@@ -2680,9 +2692,6 @@ void server_options(char **args, int *argc_p)
 	if (relative_paths && !implied_dirs && (!am_sender || protocol_version >= 30))
 		args[ac++] = "--no-implied-dirs";
 
-	if (fuzzy_basis && am_sender)
-		args[ac++] = "--fuzzy";
-
 	if (remove_source_files == 1)
 		args[ac++] = "--remove-source-files";
 	else if (remove_source_files)
diff --git a/receiver.c b/receiver.c
index 1819830..3ab893d 100644
--- a/receiver.c
+++ b/receiver.c
@@ -699,21 +699,26 @@ int recv_files(int f_in, int f_out, char *local_name)
 				break;
 			case FNAMECMP_FUZZY:
 				if (file->dirname) {
-					pathjoin(fnamecmpbuf, MAXPATHLEN,
-						 file->dirname, xname);
+					pathjoin(fnamecmpbuf, sizeof fnamecmpbuf, file->dirname, xname);
 					fnamecmp = fnamecmpbuf;
 				} else
 					fnamecmp = xname;
 				break;
 			default:
-				if (fnamecmp_type >= basis_dir_cnt) {
+				if (fnamecmp_type > FNAMECMP_FUZZY && fnamecmp_type-FNAMECMP_FUZZY <= basis_dir_cnt) {
+					fnamecmp_type -= FNAMECMP_FUZZY + 1;
+					if (file->dirname) {
+						stringjoin(fnamecmpbuf, sizeof fnamecmpbuf,
+							   basis_dir[fnamecmp_type], "/", file->dirname, "/", xname, NULL);
+					} else
+						pathjoin(fnamecmpbuf, sizeof fnamecmpbuf, basis_dir[fnamecmp_type], xname);
+				} else if (fnamecmp_type >= basis_dir_cnt) {
 					rprintf(FERROR,
 						"invalid basis_dir index: %d.\n",
 						fnamecmp_type);
 					exit_cleanup(RERR_PROTOCOL);
-				}
-				pathjoin(fnamecmpbuf, sizeof fnamecmpbuf,
-					 basis_dir[fnamecmp_type], fname);
+				} else
+					pathjoin(fnamecmpbuf, sizeof fnamecmpbuf, basis_dir[fnamecmp_type], fname);
 				fnamecmp = fnamecmpbuf;
 				break;
 			}
diff --git a/rsync.yo b/rsync.yo
index 3c0bfc0..43f264d 100644
--- a/rsync.yo
+++ b/rsync.yo
@@ -1748,6 +1748,10 @@ looks in the same directory as the destination file for either a file that
 has an identical size and modified-time, or a similarly-named file.  If
 found, rsync uses the fuzzy basis file to try to speed up the transfer.
 
+If the option is repeated, the fuzzy scan will also be done in any alternate
+destination directories that are specified via bf(--compare-dest),
+bf(--copy-dest), or bf(--link-dest).
+
 Note that the use of the bf(--delete) option might get rid of any potential
 fuzzy-match files, so either use bf(--delete-after) or specify some
 filename exclusions if you need to prevent this.


-- 
The rsync repository.


More information about the rsync-cvs mailing list