Added functionality --compare-file and --compare-auto

Andy Henson 30929 at zexia.co.uk
Sat Oct 18 04:49:00 EST 2003


Recently various needs for multiple version handling were discussed
and I put forward a plan of mine.  Subsequently the proposal for a
--compare-file=<FILE> switch had support, so I have implemented
this. I have also implemented an experimental --compare-auto which
decides which file to match against using a rule.

Instructions for patch:

1. Install rsync-2.5.6 source
2. "patch -p1 < rsync-2.5.6-arh1.patch" (the code below)
3. edit configure to add "arh1" to the RSYNC_VERSION string and run
./configure, or if you've already run this, edit config.h to add
"arh1" to the RSYNC_VERSION string.
4. "make proto"  - to update proto.h file
5. "make"

Here's rsync-2.5.6-arh1.patch:
-----cut here-----
diff -aur rsync-2.5.6/generator.c rsync-arh/generator.c
--- rsync-2.5.6/generator.c	Thu Aug 29 14:44:55 2002
+++ rsync-arh/generator.c	Fri Oct 17 15:48:56 2003
@@ -5,6 +5,7 @@
    Copyright (C) 1996-2000 by Andrew Tridgell 
    Copyright (C) Paul Mackerras 1996
    Copyright (C) 2002 by Martin Pool <mbp at samba.org>
+   Copyright (C) 2003, Andy Henson, Zexia Access Ltd
    
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -41,6 +42,8 @@
 extern int always_checksum;
 extern int modify_window;
 extern char *compare_dest;
+extern char *compare_file;
+extern int compare_auto;
 extern int link_dest;
 
 
@@ -357,29 +360,36 @@
 
 	fnamecmp = fname;
 
-	if ((statret == -1) && (compare_dest != NULL)) {
-		/* try the file at compare_dest instead */
+	if ((statret == -1) && compare_auto) {
+		compare_file = findcomparename(fname,fnamecmpbuf);
+	} else if ((statret == -1) && (compare_dest != NULL)) {
+		snprintf(fnamecmpbuf,MAXPATHLEN,"%s/%s",
+					compare_dest,fname);
+		compare_file = fnamecmpbuf;
+	}
+
+	if ((statret == -1) && (compare_file != NULL)) {
+		/*try this file instead (--compare-dest, --compare-file, --compare-auto)*/
 		int saveerrno = errno;
-		snprintf(fnamecmpbuf,MAXPATHLEN,"%s/%s",compare_dest,fname);
-		statret = link_stat(fnamecmpbuf,&st);
+		statret = link_stat(compare_file,&st);
 		if (!S_ISREG(st.st_mode))
 			statret = -1;
 		if (statret == -1)
 			errno = saveerrno;
 #if HAVE_LINK
 		else if (link_dest && !dry_run) {
-			if (do_link(fnamecmpbuf, fname) != 0) {
+			if (do_link(compare_file, fname) != 0) {
 				if (verbose > 0)
 					rprintf(FINFO,"link %s => %s : %s\n",
-						fnamecmpbuf,
+						compare_file,
 						fname,
 						strerror(errno));
 			}
-			fnamecmp = fnamecmpbuf;
+			fnamecmp = compare_file;
 		}
 #endif
 		else
-			fnamecmp = fnamecmpbuf;
+			fnamecmp = compare_file;
 	}
 
 	if (statret == -1) {
@@ -534,3 +544,86 @@
 		write_int(f,-1);
 	}
 }
+
+
+
+char * findcomparename(const char* fname, char* buf)
+	/* returns compare name, a valid file with name similar to @param fname.
+	 * Implements the --compare-auto name function.
+	 * May use @param buf as buffer for the name (size is MAXPATHLEN). */
+
+	 /* The algorithm: scans the directory for filenames where the names
+	 match once version information is stripped out.  Version information
+	 is assumed to be digits after one of - . ; and it continues until
+	 either . and non-digit or - and non-digit, t, p, r.  This rather
+	 odd rule permits 2.4-test2, 2.4-rc4, 2.4-pre3 to be ignored as versions.
+	 Finally it selects the most recent of these which has a size no smaller
+	 than 90% of the biggest of any of them.
+	 I acknowlege these are pretty arbitrary rules - arh 17 October 2003 */
+{
+	char newname[MAXPATHLEN];
+	char tmpname[MAXPATHLEN];
+	time_t newtime=0;
+	size_t newsize=0;
+	struct dirent *di;
+	DIR *d;
+	char* dirname;
+	char *name;
+
+	strncpy(buf,fname,MAXPATHLEN);
+	dirname = buf;
+	name = strrchr(buf,'/');
+	if (name)
+		*name++ = 0;	//terminate name at end of directory part		
+	else {
+		name = (char*)fname;
+		dirname = ".";
+	}
+	if (compare_dest)
+		dirname = compare_dest;
+	if (verbose > 1)
+		rprintf(FINFO,"findcomparename: dir %s name %s\n",dirname,name);
+	d = opendir(dirname);
+	if (d) {
+		for (di = readdir(d); di; di = readdir(d)) {
+			char *dname = d_name(di);
+			char *p = name;
+			char *q = dname;
+			STRUCT_STAT st;
+			/* are files version-stripped names identical? */
+			while (*p && *q && *p==*q) {
+				if ((*p=='-'||*p=='.'||*p==';') && isdigit(p[1]))		
	/* skip version part */
+					do ++p; while (*p && (*p!='-' || 
isdigit(p[1])||p[1]=='t'||p[1]=='p'||p[1]=='r') && (*p!='.' || isdigit(p[1])));
+				if ((*q=='-'||*q=='.'||*q==';') && isdigit(q[1]))		
	/* skip version part */
+					do ++q; while (*q && (*q!='-' || 
isdigit(q[1])||q[1]=='t'||q[1]=='p'||q[1]=='r') && (*q!='.' || isdigit(q[1])));
+				++p;
+				++q;
+			}
+			if (*p != *q)
+				continue;	/* not identical */
+			/* identical: take best fit */
+			if (verbose > 1)
+				rprintf(FINFO,"findcomparename: candidate %s\n",dname);
+			strcpy(tmpname,dirname);
+			strcat(tmpname,"/");
+			strncat(tmpname,dname,MAXPATHLEN-strlen(tmpname));
+			tmpname[MAXPATHLEN-1]=0;
+			(void) link_stat(tmpname,&st);
+			if ((st.st_size >= newsize*9/10 && st.st_ctime >= newtime) || st.st_size >= 
newsize*10/9)
+			{
+				newsize=st.st_size;
+				newtime=st.st_ctime;
+				strcpy(newname,tmpname);
+			}
+		}
+		closedir(d);
+		if (newsize > 0)	/* ie, if we found one... */
+		{
+			strcpy(buf,newname);
+			if (verbose > 1)
+				rprintf(FINFO,"findcomparename: chose %s\n",buf);
+			return buf;
+		}
+	}
+	return 0;
+}
diff -aur rsync-2.5.6/options.c rsync-arh/options.c
--- rsync-2.5.6/options.c	Tue Jan 28 03:11:57 2003
+++ rsync-arh/options.c	Fri Oct 17 15:28:18 2003
@@ -2,6 +2,7 @@
  * 
  * Copyright (C) 1998-2001 by Andrew Tridgell <tridge at samba.org>
  * Copyright (C) 2000, 2001, 2002 by Martin Pool <mbp at samba.org>
+ * Copyright (C) 2003, Andy Henson, Zexia Access Ltd <andy.31016 at zexia.co.uk>
  * 
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -83,6 +84,7 @@
 int ignore_errors=0;
 int modify_window=0;
 int blocking_io=-1;
+int compare_auto=0;
 
 
 /** Network address family. **/
@@ -104,6 +106,7 @@
 char *backup_suffix = BACKUP_SUFFIX;
 char *tmpdir = NULL;
 char *compare_dest = NULL;
+char *compare_file = NULL;
 char *config_file = NULL;
 char *shell_cmd = NULL;
 char *log_format = NULL;
@@ -155,7 +158,7 @@
         rprintf(f, "%s  version %s  protocol version %d\n",
                 RSYNC_NAME, RSYNC_VERSION, PROTOCOL_VERSION);
         rprintf(f,
-                "Copyright (C) 1996-2002 by Andrew Tridgell and others\n");
+                "Copyright (C) 1996-2003 by Andrew Tridgell and others\n");
 	rprintf(f, "<http://rsync.samba.org/>\n");
         rprintf(f, "Capabilities: %d-bit files, %ssocketpairs, "
                 "%shard links, %ssymlinks, batchfiles, \n",
@@ -247,8 +250,10 @@
   rprintf(F," -I, --ignore-times          don't exclude files that match length and time\n");
   rprintf(F,"     --size-only             only use file size when determining if a file should be 
transferred\n");
   rprintf(F,"     --modify-window=NUM     Timestamp window (seconds) for file match 
(default=%d)\n",modify_window);
-  rprintf(F," -T  --temp-dir=DIR          create temporary files in directory DIR\n");
+  rprintf(F," -T, --temp-dir=DIR          create temporary files in directory DIR\n");
   rprintf(F,"     --compare-dest=DIR      also compare destination files relative to DIR\n");
+  rprintf(F,"     --compare-file=FILE     also compare to FILE\n");
+  rprintf(F," -A, --compare-auto          also compare to automatically selected file\n");
   rprintf(F," -P                          equivalent to --partial --progress\n");
   rprintf(F," -z, --compress              compress file data\n");
   rprintf(F,"     --exclude=PATTERN       exclude files matching PATTERN\n");
@@ -345,6 +350,8 @@
   {"timeout",          0,  POPT_ARG_INT,    &io_timeout , 0, 0, 0 },
   {"temp-dir",        'T', POPT_ARG_STRING, &tmpdir , 0, 0, 0 },
   {"compare-dest",     0,  POPT_ARG_STRING, &compare_dest , 0, 0, 0 },
+  {"compare-file",     0,  POPT_ARG_STRING, &compare_file , 0, 0, 0 },
+  {"compare-auto",    'A', POPT_ARG_NONE,   0 ,              'A', 0, 0 },
   {"link-dest",        0,  POPT_ARG_STRING, 0,               OPT_LINK_DEST, 0, 0 },
   /* TODO: Should this take an optional int giving the compression level? */
   {"compress",        'z', POPT_ARG_NONE,   &do_compression , 0, 0, 0 },
@@ -567,6 +574,10 @@
 			preserve_devices=1;
 			break;
 
+		case 'A':
+			compare_auto=1;
+			break;
+
 		case OPT_SENDER:
 			if (!am_server) {
 				usage(FERROR);
@@ -615,6 +626,13 @@
 		}
 	}
 
+	if ((compare_dest||compare_auto) && compare_file) {
+	    snprintf(err_buf,sizeof(err_buf),
+		"--compare-file cannot be used with another --compare-xxx option\n");
+	    rprintf(FERROR,"ERROR: --compare-file cannot be used with another --compare-xxx 
option\n");
+	    return 0;
+	}
+
 	if (write_batch && read_batch) {
 	    snprintf(err_buf,sizeof(err_buf),
 		"write-batch and read-batch can not be used together\n");
@@ -841,6 +859,13 @@
 		args[ac++] = link_dest ? "--link-dest" : "--compare-dest";
 		args[ac++] = compare_dest;
 	}
+	if (compare_file && am_sender) {
+		args[ac++] = "--compare-file";
+		args[ac++] = compare_file;
+	}
+	if (compare_auto && am_sender) {
+		args[ac++] = "--compare-auto";
+	}
 
 	*argc = ac;
 }
diff -aur rsync-2.5.6/proto.h rsync-arh/proto.h
--- rsync-2.5.6/proto.h	Mon Jan 27 03:35:09 2003
+++ rsync-arh/proto.h	Fri Oct 17 08:26:52 2003
@@ -31,7 +31,7 @@
 void sum_init(void);
 void sum_update(char *p, int len);
 void sum_end(char *sum);
-void close_all(void);
+void close_all();
 void _exit_cleanup(int code, const char *file, int line);
 void cleanup_disable(void);
 void cleanup_set(char *fnametmp, char *fname, struct file_struct *file,
@@ -93,6 +93,7 @@
 char *f_name(struct file_struct *f);
 void recv_generator(char *fname, struct file_list *flist, int i, int f_out);
 void generate_files(int f,struct file_list *flist,char *local_name,int f_recv);
+char * findcomparename(const char* fname, char* buf);
 int main(int argc, char *argv[]);
 void init_hard_links(struct file_list *flist);
 int check_hard_link(struct file_struct *file);
@@ -224,7 +225,6 @@
 OFF_T do_lseek(int fd, OFF_T offset, int whence);
 void *do_mmap(void *start, int len, int prot, int flags, int fd, OFF_T offset);
 char *d_name(struct dirent *di);
-int main(int argc, char **argv);
 int main (int argc, char *argv[]);
 void set_compression(char *fname);
 void send_token(int f,int token,struct map_struct *buf,OFF_T offset,
@@ -232,6 +232,7 @@
 int recv_token(int f,char **data);
 void see_token(char *data, int toklen);
 int main(int argc, char **argv);
+int main(int argc, char **argv);
 void add_uid(uid_t uid);
 void add_gid(gid_t gid);
 void send_uid_list(int f);
diff -aur rsync-2.5.6/receiver.c rsync-arh/receiver.c
--- rsync-2.5.6/receiver.c	Mon Jan 20 23:32:17 2003
+++ rsync-arh/receiver.c	Fri Oct 17 09:57:48 2003
@@ -2,6 +2,7 @@
    
    Copyright (C) 1996-2000 by Andrew Tridgell
    Copyright (C) Paul Mackerras 1996
+   Copyright (C) 2003, Andy Henson, Zexia Access Ltd
    
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -34,6 +35,8 @@
 extern int io_error;
 extern char *tmpdir;
 extern char *compare_dest;
+extern char *compare_file;
+extern int compare_auto;
 extern int make_backups;
 extern char *backup_suffix;
 
@@ -373,11 +376,17 @@
 		/* open the file */  
 		fd1 = do_open(fnamecmp, O_RDONLY, 0);
 
-		if ((fd1 == -1) && (compare_dest != NULL)) {
-			/* try the file at compare_dest instead */
+		if ((fd1 == -1) && compare_auto) {
+			compare_file = findcomparename(fname,fnamecmpbuf);
+		} else if ((fd1 == -1) && (compare_dest != NULL)) {
 			snprintf(fnamecmpbuf,MAXPATHLEN,"%s/%s",
 						compare_dest,fname);
-			fnamecmp = fnamecmpbuf;
+			compare_file = fnamecmpbuf;
+		}
+
+		if ((fd1 == -1) && (compare_file != NULL)) {
+			/*try this file instead (--compare-dest, --compare-file, --compare-auto)*/
+			fnamecmp = compare_file;
 			fd1 = do_open(fnamecmp, O_RDONLY, 0);
 		}
 
@@ -446,6 +455,8 @@
 
 		if (!am_server) {
 			log_transfer(file, fname);
+			if (compare_auto && (verbose > 1))
+				rprintf(FINFO,"(compare-file %s)\n",fnamecmp);
 		}
 
 		/* recv file data */
diff -aur rsync-2.5.6/rsync.1 rsync-arh/rsync.1
--- rsync-2.5.6/rsync.1	Tue Jan 28 03:11:57 2003
+++ rsync-arh/rsync.1	Fri Oct 17 10:42:54 2003
@@ -364,6 +364,8 @@
      --modify-window=NUM     Timestamp window (seconds) for file match (default=0)
  -T  --temp-dir=DIR          create temporary files in directory DIR
      --compare-dest=DIR      also compare destination files relative to DIR
+     --compare-file=FILE     also compare to FILE
+ -A  --compare-auto          also compare to automatically selected file
      --link-dest=DIR         create hardlinks to DIR for unchanged files
  -P                          equivalent to --partial --progress
  -z, --compress              compress file data
@@ -781,8 +783,8 @@
 .IP "\fB--compare-dest=DIR\fP" 
 This option instructs rsync to use DIR on
 the destination machine as an additional directory to compare destination
-files against when doing transfers if the files are missing in the
-destination directory\&.  This is useful for doing transfers to a new
+files against when doing transfers {if the files are missing in the
+destination directory}\&.  This is useful for doing transfers to a new
 destination while leaving existing files intact, and then doing a
 flash-cutover when all files have been successfully transferred (for
 example by moving directories around and removing the old directory,
@@ -792,6 +794,18 @@
 have a chance to be completed\&.  If DIR is a relative path, it is relative
 to the destination directory\&.
 .IP 
+.IP "\fB--compare-file=FILE\fP"
+This option instructs rsync to use FILE on the destination machine as
+an additional file to compare against when doing transfers {if the
+files are missing in the destination directory}\&.
+.IP 
+.IP "\fB--compare-auto\fP"
+This option instructs rsync to automatically select a file on the destination
+machine to compare against when doing transfers {if the
+files are missing in the destination directory}\&.  The file will be selected
+from the files in the same directory unless --compare-dest is used to select
+another directory\&.
+.IP 
 .IP "\fB--link-dest=DIR\fP" 
 This option behaves like \fB--compare-dest\fP but
 also will create hard links from \fIDIR\fP to the destination directory for
-----cut here-----

Andy Henson
Zexia Access Ltd



More information about the rsync mailing list