[PATCH] --source-filter && --dest-filter for rsync 2.5.6

Miroslav Rudisin miero at matfyz.cz
Mon Nov 17 03:55:20 EST 2003


Hi,

I needed to filter content of files (encrypt), before they are sent over the network to backup server.
The easiest way to do this was modifying Kyle Jones's "--dest-filter" patch.

Somebody was asking there this feature in the past, so I'm sending this patch to list.

Implementation details:
-filtering disables rsync alogrithm
-source filter makes temporary files in /tmp (there should by enough disk space)
-if source filter fails, data is send unfiltered
-failure of destination filter, causes data loss!!!
-if filter changes size of file, you should use --times-only option
 to prevent repeated transfers of unchanged files

miEro


diff -ur rsync-2.5.6/generator.c rsync-2.5.6-filtered/generator.c
--- rsync-2.5.6/generator.c	2002-08-29 16:44:55.000000000 +0200
+++ rsync-2.5.6-filtered/generator.c	2003-11-16 13:57:54.000000000 +0100
@@ -35,6 +35,7 @@
 extern int block_size;
 extern int csum_length;
 extern int ignore_times;
+extern int times_only;
 extern int size_only;
 extern int io_timeout;
 extern int remote_version;
@@ -48,8 +49,10 @@
 static int skip_file(char *fname,
 		     struct file_struct *file, STRUCT_STAT *st)
 {
-	if (st->st_size != file->length) {
-		return 0;
+	if (! times_only) {
+		if (st->st_size != file->length) {
+			return 0;
+		}
 	}
 	if (link_dest) {
 		if((st->st_mode & ~_S_IFMT) !=  (file->mode & ~_S_IFMT)) {
@@ -59,6 +62,9 @@
 			return 0;
 		}
 	}
+	if (times_only) {
+		return (cmp_modtime(st->st_mtime,file->modtime) == 0);
+	}
 
 	
 	/* if always checksum is set then we use the checksum instead 
diff -ur rsync-2.5.6/options.c rsync-2.5.6-filtered/options.c
--- rsync-2.5.6/options.c	2003-01-28 04:11:57.000000000 +0100
+++ rsync-2.5.6-filtered/options.c	2003-11-16 14:06:29.000000000 +0100
@@ -48,6 +48,9 @@
 int dry_run=0;
 int local_server=0;
 int ignore_times=0;
+char *source_filter = NULL;
+char *dest_filter = NULL;
+int times_only=0;
 int delete_mode=0;
 int delete_excluded=0;
 int one_file_system=0;
@@ -246,6 +249,7 @@
   rprintf(F,"     --timeout=TIME          set IO timeout in seconds\n");
   rprintf(F," -I, --ignore-times          don't exclude files that match length and time\n");
   rprintf(F,"     --size-only             only use file size when determining if a file should be transferred\n");
+  rprintf(F,"     --times-only             only use file modification time when determining if a file should be transferred\n");
   rprintf(F,"     --modify-window=NUM     Timestamp window (seconds) for file match (default=%d)\n",modify_window);
   rprintf(F," -T  --temp-dir=DIR          create temporary files in directory DIR\n");
   rprintf(F,"     --compare-dest=DIR      also compare destination files relative to DIR\n");
@@ -270,6 +274,8 @@
   rprintf(F,"     --bwlimit=KBPS          limit I/O bandwidth, KBytes per second\n");
   rprintf(F,"     --write-batch=PREFIX    write batch fileset starting with PREFIX\n");
   rprintf(F,"     --read-batch=PREFIX     read batch fileset starting with PREFIX\n");
+  rprintf(F,"     --source-filter=COMMAND filter file through COMMAND at source\n");
+  rprintf(F,"     --dest-filter=COMMAND   filter file through COMMAND at destination\n");
   rprintf(F," -h, --help                  show this help screen\n");
 #ifdef INET6
   rprintf(F," -4                          prefer IPv4\n");
@@ -283,6 +289,7 @@
 }
 
 enum {OPT_VERSION = 1000, OPT_SUFFIX, OPT_SENDER, OPT_SERVER, OPT_EXCLUDE,
+      OPT_SOURCE_FILTER, OPT_DEST_FILTER,
       OPT_EXCLUDE_FROM, OPT_DELETE, OPT_DELETE_EXCLUDED, OPT_NUMERIC_IDS,
       OPT_RSYNC_PATH, OPT_FORCE, OPT_TIMEOUT, OPT_DAEMON, OPT_CONFIG, OPT_PORT,
       OPT_INCLUDE, OPT_INCLUDE_FROM, OPT_STATS, OPT_PARTIAL, OPT_PROGRESS,
@@ -300,6 +307,9 @@
   {"rsync-path",       0,  POPT_ARG_STRING, &rsync_path,	0, 0, 0 },
   {"password-file",    0,  POPT_ARG_STRING, &password_file,	0, 0, 0 },
   {"ignore-times",    'I', POPT_ARG_NONE,   &ignore_times , 0, 0, 0 },
+  {"times-only",       0,  POPT_ARG_NONE,   &times_only , 0, 0, 0 },
+  {"source-filter",    0,  POPT_ARG_STRING, &source_filter , OPT_SOURCE_FILTER, 0, 0 },
+  {"dest-filter",      0,  POPT_ARG_STRING, &dest_filter , OPT_DEST_FILTER, 0, 0 },
   {"size-only",        0,  POPT_ARG_NONE,   &size_only , 0, 0, 0 },
   {"modify-window",    0,  POPT_ARG_INT,    &modify_window, OPT_MODIFY_WINDOW, 0, 0 },
   {"one-file-system", 'x', POPT_ARG_NONE,   &one_file_system , 0, 0, 0 },
@@ -471,6 +481,16 @@
                         print_rsync_version(FINFO);
 			exit_cleanup(0);
 			
+		case OPT_SOURCE_FILTER:
+			/* source_filter already set by popt */
+			whole_file = 1;
+			break;
+
+		case OPT_DEST_FILTER:
+			/* dest_filter already set by popt */
+			whole_file = 1;
+			break;
+
 		case OPT_SUFFIX:
                         /* The value has already been set by popt, but
                          * we need to remember that a suffix was specified
@@ -631,6 +651,13 @@
 	    return 0;
 	}
 
+	if ((source_filter || dest_filter) && no_whole_file) {
+	    snprintf(err_buf,sizeof(err_buf),
+		"no-whole-file can not be used with source-filter or dest-filter\n");
+	    rprintf(FERROR,"ERROR: no-whole-file can not be used with source-filter or dest-filter\n");
+	    return 0;
+	}
+
         *argv = poptGetArgs(pc);
         if (*argv)
                 *argc = count_args(*argv);
@@ -784,6 +811,17 @@
 	if (delete_excluded)
 		args[ac++] = "--delete-excluded";
 
+	if (times_only)
+		args[ac++] = "--times-only";
+
+	if (dest_filter) {
+		static char buf[1000];
+		/* have to single quote the arg to keep the
+		   remote shell from splitting it */
+	        snprintf(buf, sizeof(buf), "--dest-filter='%s'", dest_filter);
+		args[ac++] = buf;
+	}
+
 	if (size_only)
 		args[ac++] = "--size-only";
 
diff -ur rsync-2.5.6/pipe.c rsync-2.5.6-filtered/pipe.c
--- rsync-2.5.6/pipe.c	2002-04-08 09:39:56.000000000 +0200
+++ rsync-2.5.6-filtered/pipe.c	2003-11-16 13:20:34.000000000 +0100
@@ -146,3 +146,90 @@
 }
 
 
+pid_t run_filter(char *command[], int out, int *pipe_to_filter)
+{
+	pid_t pid;
+	int pipefds[2];
+	extern int blocking_io;
+	
+	if (verbose >= 2) {
+		print_child_argv(command);
+	}
+
+	if (pipe(pipefds) < 0) {
+		rprintf(FERROR, "pipe: %s\n", strerror(errno));
+		exit_cleanup(RERR_IPC);
+	}
+
+	pid = fork();
+	if (pid == -1) {
+		rprintf(FERROR, "fork: %s\n", strerror(errno));
+		exit_cleanup(RERR_IPC);
+	}
+
+	if (pid == 0) {
+		extern int orig_umask;
+		if (dup2(pipefds[0], STDIN_FILENO) < 0) {
+			rprintf(FERROR, "Failed dup2 to child stdin : %s\n",
+				strerror(errno));
+			exit_cleanup(RERR_IPC);
+		}
+		if (dup2(out, STDOUT_FILENO) < 0) {
+			rprintf(FERROR, "Failed dup2 to child stdout : %s\n",
+				strerror(errno));
+			exit_cleanup(RERR_IPC);
+		}
+		close(pipefds[1]);
+		umask(orig_umask);
+		set_blocking(STDIN_FILENO);
+		if (blocking_io) {
+			set_blocking(STDOUT_FILENO);
+		}
+		execvp(command[0], command);
+		rprintf(FERROR, "Failed to exec %s : %s\n",
+			command[0], strerror(errno));
+		exit_cleanup(RERR_IPC);
+	}
+
+	*pipe_to_filter = pipefds[1];
+
+	return pid;
+}
+
+pid_t run_filter_on_file(char *command[], int out, int in)
+{
+	pid_t pid;
+	extern int blocking_io;
+	
+	if (verbose >= 2) {
+		print_child_argv(command);
+	}
+
+	pid = fork();
+	if (pid == -1) {
+		rprintf(FERROR, "fork: %s\n", strerror(errno));
+		exit_cleanup(RERR_IPC);
+	}
+
+	if (pid == 0) {
+		if (dup2(in, STDIN_FILENO) < 0) {
+			rprintf(FERROR, "Failed dup2 to child stdin : %s\n",
+				strerror(errno));
+			exit_cleanup(RERR_IPC);
+		}
+		if (dup2(out, STDOUT_FILENO) < 0) {
+			rprintf(FERROR, "Failed dup2 to child stdout : %s\n",
+				strerror(errno));
+			exit_cleanup(RERR_IPC);
+		}
+		if (blocking_io) {
+			set_blocking(STDOUT_FILENO);
+		}
+		execvp(command[0], command);
+		rprintf(FERROR, "Failed to exec %s : %s\n",
+			command[0], strerror(errno));
+		exit_cleanup(RERR_IPC);
+	}
+
+	return pid;
+}
diff -ur rsync-2.5.6/proto.h rsync-2.5.6-filtered/proto.h
--- rsync-2.5.6/proto.h	2003-01-27 04:35:09.000000000 +0100
+++ rsync-2.5.6-filtered/proto.h	2003-11-16 13:20:48.000000000 +0100
@@ -181,6 +181,8 @@
 pid_t piped_child(char **command, int *f_in, int *f_out);
 pid_t local_child(int argc, char **argv,int *f_in,int *f_out,
 		  int (*child_main)(int, char*[]));
+pid_t run_filter(char *command[], int in, int *pipe_to_filter);
+pid_t run_filter_on_file(char *command[], int out, int in);
 void end_progress(OFF_T size);
 void show_progress(OFF_T ofs, OFF_T size);
 void delete_files(struct file_list *flist);
diff -ur rsync-2.5.6/receiver.c rsync-2.5.6-filtered/receiver.c
--- rsync-2.5.6/receiver.c	2003-01-21 00:32:17.000000000 +0100
+++ rsync-2.5.6-filtered/receiver.c	2003-11-16 11:44:14.000000000 +0100
@@ -320,11 +320,32 @@
 	extern int delete_after;
 	extern int orig_umask;
 	struct stats initial_stats;
+	pid_t pid = 0; /* assignment to get rid of compiler warning */
+	int status;
+	extern char *dest_filter;
+#define	MAX_FILTER_ARGS	100
+	char *filter_argv[MAX_FILTER_ARGS + 1];
 
 	if (verbose > 2) {
 		rprintf(FINFO,"recv_files(%d) starting\n",flist->count);
 	}
 
+	if (dest_filter) {
+		char *p;
+		char *sep = " \t";
+		int i;
+		for (p = strtok(dest_filter, sep), i = 0;
+		     p && i < MAX_FILTER_ARGS;
+		     p = strtok(0, sep)) {
+			filter_argv[i++] = p;
+		}
+		filter_argv[i] = 0;
+		if (p) {
+			rprintf(FERROR,"Too many arguments to dest-filter (> %d)\n", i);
+			exit_cleanup(RERR_SYNTAX);
+		}
+	}
+
 	while (1) {      
 		cleanup_disable();
 
@@ -448,16 +469,34 @@
 			log_transfer(file, fname);
 		}
 
+		if (dest_filter) {
+			pid = run_filter(filter_argv, fd2, &fd2);
+		}
+
 		/* recv file data */
 		recv_ok = receive_data(f_in,buf,fd2,fname,file->length);
 
+		if (dest_filter) {
+			close(fd2);
+			wait_process(pid, &status);
+			if (status != 0) {
+				rprintf(FERROR,"filter %s exited code: %d\n",
+					dest_filter, status);
+				if (buf) unmap_file(buf);
+				if (fd1 != -1) close(fd1);
+				continue;
+			}
+		}
+
 		log_recv(file, &initial_stats);
 		
 		if (buf) unmap_file(buf);
 		if (fd1 != -1) {
 			close(fd1);
 		}
-		close(fd2);
+		if (! dest_filter) {
+			close(fd2);
+		}
 		
 		if (verbose > 2)
 			rprintf(FINFO,"renaming %s to %s\n",fnametmp,fname);
diff -ur rsync-2.5.6/rsync.1 rsync-2.5.6-filtered/rsync.1
--- rsync-2.5.6/rsync.1	2003-01-28 04:11:57.000000000 +0100
+++ rsync-2.5.6-filtered/rsync.1	2003-11-16 14:07:36.000000000 +0100
@@ -361,6 +361,7 @@
      --timeout=TIME          set IO timeout in seconds
  -I, --ignore-times          don\'t exclude files that match length and time
      --size-only             only use file size when determining if a file should be transferred
+     --times-only            only use file modification time when determining if a file should be transferred
      --modify-window=NUM     Timestamp window (seconds) for file match (default=0)
  -T  --temp-dir=DIR          create temporary files in directory DIR
      --compare-dest=DIR      also compare destination files relative to DIR
@@ -386,6 +387,8 @@
      --bwlimit=KBPS          limit I/O bandwidth, KBytes per second
      --read-batch=PREFIX     read batch fileset starting with PREFIX
      --write-batch=PREFIX    write batch fileset starting with PREFIX
+     --source-filter=COMMAND filter file through COMMAND at source
+     --dest-filter=COMMAND   filter file through COMMAND at destination
  -h, --help                  show this help screen
 
 
@@ -437,6 +440,12 @@
 after using another mirroring system which may not preserve timestamps
 exactly\&.
 .IP 
+.IP "\fB--times-only\fP" 
+With this option, rsync will ignore size and file content
+differences when deciding whether to transfer a file\&.  Only
+a difference in file modification time will cause a file to be
+transferred.
+.IP 
 .IP "\fB--modify-window\fP" 
 When comparing two timestamps rsync treats
 the timestamps as being equal if they are within the value of
@@ -938,6 +947,32 @@
 using the fileset whose filenames start with PREFIX\&. See the "BATCH
 MODE" section for details\&.
 .IP 
+.IP "\fB --source-filter=COMMAND\fP" 
+This option allows you to specify a filter program that will be
+applied to the contents of all transferred regular files before
+the data is sent to destination.  COMMAND will receive the data on its
+standard input and it should write the filtered data to standard
+output.  COMMAND should exit non-zero if it cannot process the
+data or if it encounters an error when writing the data to stdout.
+Example: --source-filter="gzip -9" will cause remote files to be compressed.
+Use of --source-filter automatically enables --whole-file.
+If your filter does not output the same number of bytes that it
+received on input, you should use --times-only to disable size and
+content checks on subsequent rsync runs.
+.IP 
+.IP "\fB --dest-filter=COMMAND\fP" 
+This option allows you to specify a filter program that will be
+applied to the contents of all transferred regular files before
+the data is written to disk.  COMMAND will receive the data on its
+standard input and it should write the filtered data to standard
+output.  COMMAND should exit non-zero if it cannot process the
+data or if it encounters an error when writing the data to stdout.
+Example: --dest-filter="gzip -9" will cause remote files to be compressed.
+Use of --dest-filter automatically enables --whole-file.
+If your filter does not output the same number of bytes that it
+received on input, you should use --times-only to disable size and
+content checks on subsequent rsync runs.
+.IP 
 .PP 
 .SH "EXCLUDE PATTERNS" 
 .PP 
diff -ur rsync-2.5.6/sender.c rsync-2.5.6-filtered/sender.c
--- rsync-2.5.6/sender.c	2002-04-09 08:03:50.000000000 +0200
+++ rsync-2.5.6-filtered/sender.c	2003-11-16 14:33:17.000000000 +0100
@@ -109,6 +109,27 @@
 	char buff[CHUNK_SIZE];    /* dw */
 	int j;   /* dw */
 	int done;   /* dw */
+	extern char *source_filter;
+#define       MAX_FILTER_ARGS 100
+	char *filter_argv[MAX_FILTER_ARGS + 1];
+	char *tmp = 0, *tmpl = "/tmp/rsync-filtered_sourceXXXXXX";
+	int unlink_tmp = 0;
+
+	if (source_filter) {
+		char *p; 
+		char *sep = " \t"; 
+		int i; 
+		for (p = strtok(source_filter, sep), i = 0; 
+				p && i < MAX_FILTER_ARGS; 
+				p = strtok(0, sep)) { 
+			filter_argv[i++] = p; 
+		} 
+		filter_argv[i] = 0; 
+		if (p) { 
+			rprintf(FERROR,"Too many arguments to source-filter (> %d)\n", i); 
+			exit_cleanup(RERR_SYNTAX); 
+		} 
+	} 
 
 	if (verbose > 2)
 		rprintf(FINFO,"send_files starting\n");
@@ -178,7 +199,34 @@
 		    write_batch_csum_info(&i,flist->count,s);
 	  
 		if (!read_batch) {
+			unlink_tmp=0;
+
 			fd = do_open(fname, O_RDONLY, 0);
+
+			if (fd != -1 && source_filter) {
+				int fd2, status;
+				pid_t pid = 0; /* assignment to get rid of compiler warning */
+
+				tmp = strdup(tmpl);
+				fd2 = mkstemp(tmp);
+				if (fd2 == -1) 
+					rprintf(FERROR,"mkstemp %s failed: %s\n",tmp,strerror(errno));
+				else {
+					pid = run_filter_on_file(filter_argv, fd2, fd);
+					close(fd);
+					close(fd2);
+					wait_process(pid, &status);
+					if (status != 0) {
+						rprintf(FERROR,"bypassing source filter %s; exited with code: %d\n",source_filter,status);
+						fd = do_open(fname, O_RDONLY, 0);
+					}
+					else {
+						fd = do_open(tmp, O_RDONLY, 0);
+						unlink_tmp = 1;
+					}
+				}
+			}
+
 			if (fd == -1) {
 				io_error = 1;
 				rprintf(FERROR,"send_files failed to open %s: %s\n",
@@ -275,6 +323,7 @@
 		if (!read_batch) { /* dw */
 		    if (buf) unmap_file(buf);
 		    close(fd);
+			if (unlink_tmp) unlink(tmp);
 		}
 	  
 		free_sums(s);



More information about the rsync mailing list