[PATCH][RFC] space saving incrementals

jw schultz jw at pegasys.ws
Fri Mar 8 08:25:50 EST 2002


Please CC me directly as i'm not on the list.

I have attached a patch against latest CVS (cvs diff -u)
that adds the following functionality.  I can break it up if
you would prefer it in pieces.  Comments welcome.

	o add compare-perms option
		This creates a new inode for a file even if only
		the perms have changed.  This way if a file
		outside of destdir is hardlinked to a dentry
		inside destdir the permissions (uid, gid,
		mode) will be untouched preserving history
		etc.

	o link-dest option
		After setting compare_dest this causes
		unchanged files in destdir to be hardlinked
		to link-dest.

	o modified make_exclude_list to support stdin
		if --exclude-from has argument of "-"
		stdin will be read.
		This lets us pipe a include/exclude list
		into rsync so that we can generate it on the
		fly.

The upshot of these is to allow rsync to make incremental
backups without modifying earlier versions but keep every
version as a complete tree.  It then becomes possible to
make tapes or restore from any image.

Although --compare-perms saves on block count this patch
only applies it to regular files so symlinks and device
nodes will still chew up inodes. :( 

for the sake of an example here is the command line i'm using:
	/site/bin/rsync -v --stats -a -H -e ssh --compare-perms
	    --delete --delete-excluded --numeric-ids --exclude-from -
	    --link-dest /e/backup1/home/update1/tree leto:/efs/home/
	    /e/backup1/home/update2/tree > /e/backup1/home/update2/log

i have tried to keep my changes small and consistent with the
existing coding style.  Feel free to dink with it.  I only
care about the performance.


-- 
________________________________________________________________
	J.W. Schultz            Pegasystems Technologies
	email address:		jw at pegasys.ws

		Remember Cernan and Schmitt
-------------- next part --------------
Index: exclude.c
===================================================================
RCS file: /cvsroot/rsync/exclude.c,v
retrieving revision 1.42
diff -u -r1.42 exclude.c
--- exclude.c	18 Feb 2002 19:10:28 -0000	1.42
+++ exclude.c	7 Mar 2002 20:56:02 -0000
@@ -219,8 +219,14 @@
 					  int fatal, int include)
 {
 	struct exclude_struct **list=list1;
-	FILE *f = fopen(fname,"r");
+	FILE *f;
 	char line[MAXPATHLEN];
+
+	if (strcmp(fname, "-")) {
+		f = fopen(fname,"r");
+	} else {
+		f = fdopen(0, "r");
+	}
 	if (!f) {
 		if (fatal) {
 			rsyserr(FERROR, errno,
Index: generator.c
===================================================================
RCS file: /cvsroot/rsync/generator.c,v
retrieving revision 1.33
diff -u -r1.33 generator.c
--- generator.c	7 Feb 2002 16:36:12 -0000	1.33
+++ generator.c	7 Mar 2002 20:56:02 -0000
@@ -42,6 +42,8 @@
 extern int always_checksum;
 extern int modify_window;
 extern char *compare_dest;
+extern int compare_perms;
+extern int link_dest;
 
 
 /* choose whether to skip a particular file */
@@ -51,6 +53,15 @@
 	if (st->st_size != file->length) {
 		return 0;
 	}
+	if (compare_perms) {
+		if((st->st_mode & ~_S_IFMT) !=  (file->mode & ~_S_IFMT)) {
+			return 0;
+		}
+		if (st->st_uid != file->uid || st->st_gid != file->gid) {
+			return 0;
+		}
+	}
+
 	
 	/* if always checksum is set then we use the checksum instead 
 	   of the file time to determine whether to sync */
@@ -352,6 +363,17 @@
 			statret = -1;
 		if (statret == -1)
 			errno = saveerrno;
+#if HAVE_LINK
+		else if (link_dest)
+		if (do_link(fnamecmpbuf, fname) != 0) {
+			if (verbose > 0)
+				rprintf(FINFO,"link %s => %s : %s\n",
+					fnamecmpbuf,
+					fname,
+					strerror(errno));
+			fnamecmp = fnamecmpbuf;
+		}
+#endif
 		else
 			fnamecmp = fnamecmpbuf;
 	}
Index: options.c
===================================================================
RCS file: /cvsroot/rsync/options.c,v
retrieving revision 1.80
diff -u -r1.80 options.c
--- options.c	27 Feb 2002 22:49:57 -0000	1.80
+++ options.c	7 Mar 2002 20:56:03 -0000
@@ -106,6 +106,9 @@
 int always_checksum = 0;
 int list_only = 0;
 
+int compare_perms = 0;
+int link_dest = 0;
+
 char *batch_prefix = NULL;
 
 static int modify_window_set;
@@ -195,6 +198,7 @@
   rprintf(F,"     --safe-links            ignore links outside the destination tree\n");
   rprintf(F," -H, --hard-links            preserve hard links\n");
   rprintf(F," -p, --perms                 preserve permissions\n");
+  rprintf(F,"     --compare-perms         update if permissions have changed\n");
   rprintf(F," -o, --owner                 preserve owner (root only)\n");
   rprintf(F," -g, --group                 preserve group\n");
   rprintf(F," -D, --devices               preserve devices (root only)\n");
@@ -224,6 +228,7 @@
   rprintf(F,"     --modify-window=NUM     Timestamp window (seconds) for file match (default=%d)\n",modify_window);
   rprintf(F," -T  --temp-dir=DIR          create temporary files in directory DIR\n");
   rprintf(F,"     --compare-dest=DIR      also compare destination files relative to DIR\n");
+  rprintf(F,"     --link-dest=DIR         create hardlinks to DIR for unchanged files\n");
   rprintf(F," -P                          equivalent to --partial --progress\n");
   rprintf(F," -z, --compress              compress file data\n");
   rprintf(F,"     --exclude=PATTERN       exclude files matching PATTERN\n");
@@ -266,7 +271,8 @@
       OPT_DELETE_AFTER, OPT_EXISTING, OPT_MAX_DELETE, OPT_BACKUP_DIR, 
       OPT_IGNORE_ERRORS, OPT_BWLIMIT, OPT_BLOCKING_IO,
       OPT_NO_BLOCKING_IO, OPT_NO_WHOLE_FILE,
-      OPT_MODIFY_WINDOW, OPT_READ_BATCH, OPT_WRITE_BATCH, OPT_IGNORE_EXISTING};
+      OPT_MODIFY_WINDOW, OPT_READ_BATCH, OPT_WRITE_BATCH, OPT_IGNORE_EXISTING,
+      OPT_COMPARE_PERMS, OPT_LINK_DEST};
 
 static struct poptOption long_options[] = {
   /* longName, shortName, argInfo, argPtr, value, descrip, argDesc */
@@ -302,6 +308,7 @@
   {"no-whole-file",    0,  POPT_ARG_NONE,   0, 		     OPT_NO_WHOLE_FILE},
   {"copy-unsafe-links", 0, POPT_ARG_NONE,   &copy_unsafe_links},
   {"perms",           'p', POPT_ARG_NONE,   &preserve_perms},
+  {"compare-perms",    0,  POPT_ARG_NONE,   &compare_perms},
   {"owner",           'o', POPT_ARG_NONE,   &preserve_uid},
   {"group",           'g', POPT_ARG_NONE,   &preserve_gid},
   {"devices",         'D', POPT_ARG_NONE,   &preserve_devices},
@@ -320,6 +327,7 @@
   {"timeout",          0,  POPT_ARG_INT,    &io_timeout},
   {"temp-dir",        'T', POPT_ARG_STRING, &tmpdir},
   {"compare-dest",     0,  POPT_ARG_STRING, &compare_dest},
+  {"link-dest",        0,  POPT_ARG_STRING, 0,               OPT_LINK_DEST},
   /* TODO: Should this take an optional int giving the compression level? */
   {"compress",        'z', POPT_ARG_NONE,   &do_compression},
   {"daemon",           0,  POPT_ARG_NONE,   &am_daemon},
@@ -535,6 +543,19 @@
 			/* popt stores the filename in batch_prefix for us */
 			read_batch = 1;
 			break;
+		case OPT_LINK_DEST:
+#if HAVE_LINK
+			compare_dest = poptGetOptArg(pc);
+			link_dest = 1;
+			break;
+#else
+			snprintf(err_buf,sizeof(err_buf),
+                                 "hard links are not supported on this %s\n",
+				 am_server ? "server" : "client");
+			rprintf(FERROR,"ERROR: hard links not supported on this platform\n");
+			return 0;
+#endif
+
 
 		default:
                         /* FIXME: If --daemon is specified, then errors for later
Index: rsync.1
===================================================================
RCS file: /cvsroot/rsync/rsync.1,v
retrieving revision 1.111
diff -u -r1.111 rsync.1
--- rsync.1	6 Feb 2002 21:21:19 -0000	1.111
+++ rsync.1	7 Mar 2002 20:56:04 -0000
@@ -261,6 +261,7 @@
      --safe-links            ignore links outside the destination tree
  -H, --hard-links            preserve hard links
  -p, --perms                 preserve permissions
+     --compare-perms         update if permissions have changed
  -o, --owner                 preserve owner (root only)
  -g, --group                 preserve group
  -D, --devices               preserve devices (root only)
@@ -290,6 +291,7 @@
      --modify-window=NUM     Timestamp window (seconds) for file match (default=0)
  -T  --temp-dir=DIR          create temporary files in directory DIR
      --compare-dest=DIR      also compare destination files relative to DIR
+     --link-dest=DIR         create hardlinks to DIR for unchanged files
  -P                          equivalent to --partial --progress
  -z, --compress              compress file data
      --exclude=PATTERN       exclude files matching PATTERN
@@ -485,6 +487,12 @@
 This option causes rsync to update the remote
 permissions to be the same as the local permissions\&.
 .IP 
+.IP "\fB--compare-perms\fP" 
+This option causes rsync to update
+the remote file when permissions have chaned even if the file is unmodified\&.
+This preserves the permissions of any files outside the destination directory
+that have been hardlinked into the destination\&.
+.IP 
 .IP "\fB-o, --owner\fP" 
 This option causes rsync to set the owner of the
 destination file to be the same as the source file\&.  On most systems,
@@ -616,6 +624,7 @@
 option, but instead it adds all exclude patterns listed in the file
 FILE to the exclude list\&.  Blank lines in FILE and lines starting with
 \';\' or \'#\' are ignored\&.
+If \fIFILE\fP is \fB-\fP the list will be read from standard input.
 .IP 
 .IP "\fB--include=PATTERN\fP" 
 This option tells rsync to not exclude the
@@ -628,6 +637,7 @@
 .IP "\fB--include-from=FILE\fP" 
 This specifies a list of include patterns
 from a file\&.
+If \fIFILE\fP is \fB-\fP the list will be read from standard input.
 .IP 
 .IP "\fB-C, --cvs-exclude\fP" 
 This is a useful shorthand for excluding a
@@ -688,6 +698,12 @@
 --partial because partially transferred files will remain in the new
 temporary destination until they have a chance to be completed\&.  If DIR is
 a relative path, it is relative to the destination directory\&.
+.IP 
+.IP "\fB-z, --link-dest=\fP\fIDIR\fP" 
+This option behaves like \fB--compare-dest\fP
+but also will create hard links from \fIDIR\fP
+to the destination directory
+for unchanged files\&.
 .IP 
 .IP "\fB-z, --compress\fP" 
 With this option, rsync compresses any data from


More information about the rsync mailing list