need to modify file data before storing it on destination

Kyle Jones kyle_jones at wonderworks.com
Thu Apr 3 03:03:27 EST 2003


Here's a patch against rsync 2.5.6 that implements --dest-filter
(discussed as --remotefilter) and --times-only.  If anyone cares
to try it out and let me know if it works on your system, I'd
appreciate it.  I'll be distributing this patch with my snapshot
utilities.

*** rsync-2.5.6/generator.c	Thu Aug 29 07:44:55 2002
--- rsync-2.5.6.patched/generator.c	Tue Apr  1 11:18:49 2003
***************
*** 35,40 ****
--- 35,42 ----
  extern int block_size;
  extern int csum_length;
  extern int ignore_times;
+ extern int times_only;
+ extern char *dest_filter;
  extern int size_only;
  extern int io_timeout;
  extern int remote_version;
***************
*** 48,55 ****
  static int skip_file(char *fname,
  		     struct file_struct *file, STRUCT_STAT *st)
  {
! 	if (st->st_size != file->length) {
! 		return 0;
  	}
  	if (link_dest) {
  		if((st->st_mode & ~_S_IFMT) !=  (file->mode & ~_S_IFMT)) {
--- 50,59 ----
  static int skip_file(char *fname,
  		     struct file_struct *file, STRUCT_STAT *st)
  {
! 	if (! times_only) {
! 		if (st->st_size != file->length) {
! 			return 0;
! 		}
  	}
  	if (link_dest) {
  		if((st->st_mode & ~_S_IFMT) !=  (file->mode & ~_S_IFMT)) {
***************
*** 58,63 ****
--- 62,70 ----
  		if (st->st_uid != file->uid || st->st_gid != file->gid) {
  			return 0;
  		}
+ 	}
+ 	if (times_only) {
+ 		return (cmp_modtime(st->st_mtime,file->modtime) == 0);
  	}
  
  	
*** rsync-2.5.6/options.c	Mon Jan 27 19:11:57 2003
--- rsync-2.5.6.patched/options.c	Tue Apr  1 22:00:58 2003
***************
*** 48,53 ****
--- 48,55 ----
  int dry_run=0;
  int local_server=0;
  int ignore_times=0;
+ char *dest_filter = NULL;
+ int times_only=0;
  int delete_mode=0;
  int delete_excluded=0;
  int one_file_system=0;
***************
*** 207,212 ****
--- 209,215 ----
    rprintf(F," -v, --verbose               increase verbosity\n");
    rprintf(F," -q, --quiet                 decrease verbosity\n");
    rprintf(F," -c, --checksum              always checksum\n");
+   rprintf(F,"     --dest-filter=COMMAND   filter file through COMMAND at destination\n");
    rprintf(F," -a, --archive               archive mode, equivalent to -rlptgoD\n");
    rprintf(F," -r, --recursive             recurse into directories\n");
    rprintf(F," -R, --relative              use relative path names\n");
***************
*** 246,251 ****
--- 249,255 ----
    rprintf(F,"     --timeout=TIME          set IO timeout in seconds\n");
    rprintf(F," -I, --ignore-times          don't exclude files that match length and time\n");
    rprintf(F,"     --size-only             only use file size when determining if a file should be transferred\n");
+   rprintf(F,"     --times-only             only use file modification time when determining if a file should be transferred\n");
    rprintf(F,"     --modify-window=NUM     Timestamp window (seconds) for file match (default=%d)\n",modify_window);
    rprintf(F," -T  --temp-dir=DIR          create temporary files in directory DIR\n");
    rprintf(F,"     --compare-dest=DIR      also compare destination files relative to DIR\n");
***************
*** 283,288 ****
--- 287,293 ----
  }
  
  enum {OPT_VERSION = 1000, OPT_SUFFIX, OPT_SENDER, OPT_SERVER, OPT_EXCLUDE,
+       OPT_DEST_FILTER,
        OPT_EXCLUDE_FROM, OPT_DELETE, OPT_DELETE_EXCLUDED, OPT_NUMERIC_IDS,
        OPT_RSYNC_PATH, OPT_FORCE, OPT_TIMEOUT, OPT_DAEMON, OPT_CONFIG, OPT_PORT,
        OPT_INCLUDE, OPT_INCLUDE_FROM, OPT_STATS, OPT_PARTIAL, OPT_PROGRESS,
***************
*** 300,305 ****
--- 305,312 ----
    {"rsync-path",       0,  POPT_ARG_STRING, &rsync_path,	0, 0, 0 },
    {"password-file",    0,  POPT_ARG_STRING, &password_file,	0, 0, 0 },
    {"ignore-times",    'I', POPT_ARG_NONE,   &ignore_times , 0, 0, 0 },
+   {"times-only",       0,  POPT_ARG_NONE,   &times_only , 0, 0, 0 },
+   {"dest-filter",      0,  POPT_ARG_STRING, &dest_filter , OPT_DEST_FILTER, 0, 0 },
    {"size-only",        0,  POPT_ARG_NONE,   &size_only , 0, 0, 0 },
    {"modify-window",    0,  POPT_ARG_INT,    &modify_window, OPT_MODIFY_WINDOW, 0, 0 },
    {"one-file-system", 'x', POPT_ARG_NONE,   &one_file_system , 0, 0, 0 },
***************
*** 471,476 ****
--- 478,488 ----
                          print_rsync_version(FINFO);
  			exit_cleanup(0);
  			
+ 		case OPT_DEST_FILTER:
+ 			/* dest_filter already set by popt */
+ 			whole_file = 1;
+ 			break;
+ 
  		case OPT_SUFFIX:
                          /* The value has already been set by popt, but
                           * we need to remember that a suffix was specified
***************
*** 631,636 ****
--- 643,655 ----
  	    return 0;
  	}
  
+ 	if (dest_filter && no_whole_file) {
+ 	    snprintf(err_buf,sizeof(err_buf),
+ 		"no-whole-file can not be used with dest-filter\n");
+ 	    rprintf(FERROR,"ERROR: no-whole-file can not be used with dest-filter\n");
+ 	    return 0;
+ 	}
+ 
          *argv = poptGetArgs(pc);
          if (*argv)
                  *argc = count_args(*argv);
***************
*** 783,788 ****
--- 802,818 ----
  
  	if (delete_excluded)
  		args[ac++] = "--delete-excluded";
+ 
+ 	if (times_only)
+ 		args[ac++] = "--times-only";
+ 
+ 	if (dest_filter) {
+ 		static char buf[1000];
+ 		/* have to single quote the arg to keep the
+ 		   remote shell from splitting it */
+ 	        snprintf(buf, sizeof(buf), "--dest-filter='%s'", dest_filter);
+ 		args[ac++] = buf;
+ 	}
  
  	if (size_only)
  		args[ac++] = "--size-only";
*** rsync-2.5.6/pipe.c	Mon Apr  8 00:39:56 2002
--- rsync-2.5.6.patched/pipe.c	Tue Apr  1 21:13:07 2003
***************
*** 146,148 ****
--- 146,197 ----
  }
  
  
+ pid_t run_filter(char *command[], int out, int *pipe_to_filter)
+ {
+ 	pid_t pid;
+ 	int pipefds[2];
+ 	extern int blocking_io;
+ 	
+ 	if (verbose >= 2) {
+ 		print_child_argv(command);
+ 	}
+ 
+ 	if (pipe(pipefds) < 0) {
+ 		rprintf(FERROR, "pipe: %s\n", strerror(errno));
+ 		exit_cleanup(RERR_IPC);
+ 	}
+ 
+ 	pid = do_fork();
+ 	if (pid == -1) {
+ 		rprintf(FERROR, "fork: %s\n", strerror(errno));
+ 		exit_cleanup(RERR_IPC);
+ 	}
+ 
+ 	if (pid == 0) {
+ 		extern int orig_umask;
+ 		if (dup2(pipefds[0], STDIN_FILENO) < 0) {
+ 			rprintf(FERROR, "Failed dup2 to child stdin : %s\n",
+ 				strerror(errno));
+ 			exit_cleanup(RERR_IPC);
+ 		}
+ 		if (dup2(out, STDOUT_FILENO) < 0) {
+ 			rprintf(FERROR, "Failed dup2 to child stdout : %s\n",
+ 				strerror(errno));
+ 			exit_cleanup(RERR_IPC);
+ 		}
+ 		close(pipefds[1]);
+ 		umask(orig_umask);
+ 		set_blocking(STDIN_FILENO);
+ 		if (blocking_io) {
+ 			set_blocking(STDOUT_FILENO);
+ 		}
+ 		execvp(command[0], command);
+ 		rprintf(FERROR, "Failed to exec %s : %s\n",
+ 			command[0], strerror(errno));
+ 		exit_cleanup(RERR_IPC);
+ 	}
+ 
+ 	*pipe_to_filter = pipefds[1];
+ 
+ 	return pid;
+ }
*** rsync-2.5.6/proto.h	Sun Jan 26 19:35:09 2003
--- rsync-2.5.6.patched/proto.h	Tue Apr  1 21:12:49 2003
***************
*** 181,186 ****
--- 181,187 ----
  pid_t piped_child(char **command, int *f_in, int *f_out);
  pid_t local_child(int argc, char **argv,int *f_in,int *f_out,
  		  int (*child_main)(int, char*[]));
+ pid_t run_filter(char *command[], int in, int *pipe_to_filter);
  void end_progress(OFF_T size);
  void show_progress(OFF_T ofs, OFF_T size);
  void delete_files(struct file_list *flist);
*** rsync-2.5.6/receiver.c	Mon Jan 20 15:32:17 2003
--- rsync-2.5.6.patched/receiver.c	Tue Apr  1 22:41:13 2003
***************
*** 320,330 ****
--- 320,351 ----
  	extern int delete_after;
  	extern int orig_umask;
  	struct stats initial_stats;
+ 	pid_t pid = 0; /* assignment to get rid of compiler warning */
+ 	int status;
+ 	extern char *dest_filter;
+ #define	MAX_FILTER_ARGS	100
+ 	char *filter_argv[MAX_FILTER_ARGS + 1];
  
  	if (verbose > 2) {
  		rprintf(FINFO,"recv_files(%d) starting\n",flist->count);
  	}
  
+ 	if (dest_filter) {
+ 		char *p;
+ 		char *sep = " \t";
+ 		int i;
+ 		for (p = strtok(dest_filter, sep), i = 0;
+ 		     p && i < MAX_FILTER_ARGS;
+ 		     p = strtok(0, sep)) {
+ 			filter_argv[i++] = p;
+ 		}
+ 		filter_argv[i] = 0;
+ 		if (p) {
+ 			rprintf(FERROR,"Too many arguments to dest-filter (> %d)\n", i);
+ 			exit_cleanup(RERR_SYNTAX);
+ 		}
+ 	}
+ 
  	while (1) {      
  		cleanup_disable();
  
***************
*** 448,463 ****
  			log_transfer(file, fname);
  		}
  
  		/* recv file data */
  		recv_ok = receive_data(f_in,buf,fd2,fname,file->length);
  
  		log_recv(file, &initial_stats);
  		
  		if (buf) unmap_file(buf);
  		if (fd1 != -1) {
  			close(fd1);
  		}
! 		close(fd2);
  		
  		if (verbose > 2)
  			rprintf(FINFO,"renaming %s to %s\n",fnametmp,fname);
--- 469,502 ----
  			log_transfer(file, fname);
  		}
  
+ 		if (dest_filter) {
+ 			pid = run_filter(filter_argv, fd2, &fd2);
+ 		}
+ 
  		/* recv file data */
  		recv_ok = receive_data(f_in,buf,fd2,fname,file->length);
  
+ 		if (dest_filter) {
+ 			close(fd2);
+ 			wait_process(pid, &status);
+ 			if (status != 0) {
+ 				rprintf(FERROR,"filter %s exited code: %d\n",
+ 					dest_filter, status);
+ 				if (buf) unmap_file(buf);
+ 				if (fd1 != -1) close(fd1);
+ 				continue;
+ 			}
+ 		}
+ 
  		log_recv(file, &initial_stats);
  		
  		if (buf) unmap_file(buf);
  		if (fd1 != -1) {
  			close(fd1);
  		}
! 		if (! dest_filter) {
! 			close(fd2);
! 		}
  		
  		if (verbose > 2)
  			rprintf(FINFO,"renaming %s to %s\n",fnametmp,fname);
*** rsync-2.5.6/rsync.1	Mon Jan 27 19:11:57 2003
--- rsync-2.5.6.patched/rsync.1	Tue Apr  1 22:20:55 2003
***************
*** 322,327 ****
--- 322,328 ----
   -v, --verbose               increase verbosity
   -q, --quiet                 decrease verbosity
   -c, --checksum              always checksum
+      --dest-filter=COMMAND   filter file through COMMAND at destination
   -a, --archive               archive mode, equivalent to -rlptgoD
   -r, --recursive             recurse into directories
   -R, --relative              use relative path names
***************
*** 361,366 ****
--- 362,368 ----
       --timeout=TIME          set IO timeout in seconds
   -I, --ignore-times          don\'t exclude files that match length and time
       --size-only             only use file size when determining if a file should be transferred
+      --times-only            only use file modification time when determining if a file should be transferred
       --modify-window=NUM     Timestamp window (seconds) for file match (default=0)
   -T  --temp-dir=DIR          create temporary files in directory DIR
       --compare-dest=DIR      also compare destination files relative to DIR
***************
*** 437,442 ****
--- 439,450 ----
  after using another mirroring system which may not preserve timestamps
  exactly\&.
  .IP 
+ .IP "\fB--times-only\fP" 
+ With this option, rsync will ignore size and file content
+ differences when deciding whether to transfer a file\&.  Only
+ a difference in file modification time will cause a file to be
+ transferred.
+ .IP 
  .IP "\fB--modify-window\fP" 
  When comparing two timestamps rsync treats
  the timestamps as being equal if they are within the value of
***************
*** 451,456 ****
--- 459,477 ----
  explicitly checked on the receiver and any files of the same name
  which already exist and have the same checksum and size on the
  receiver are skipped\&.  This option can be quite slow\&.
+ .IP 
+ .IP "\fB --dest-filter=COMMAND\fP" 
+ This option allows you to specify a filter program that will be
+ applied to the contents of all transferred regular files before
+ the data is written to disk.  COMMAND will receive the data on its
+ standard input and it should write the filtered data to standard
+ output.  COMMAND should exit non-zero if it cannot process the
+ data or if it encounters an error when writing the data to stdout.
+ Example: --dest-filter="gzip -9" will cause remote files to be compressed.
+ Use of --dest-filter automatically enables --whole-file.
+ If your filter does not output the same number of bytes that it
+ received on input, you should use --times-only to disable size and
+ content checks on subsequent rsync runs.
  .IP 
  .IP "\fB-a, --archive\fP" 
  This is equivalent to -rlptgoD\&. It is a quick
*** rsync-2.5.6/util.c	Sun Jan 19 13:37:11 2003
--- rsync-2.5.6.patched/util.c	Tue Apr  1 23:51:02 2003
***************
*** 381,389 ****
--- 381,393 ----
  {
  	pid_t newpid = fork();
  	
+ 	/* --dest-filter can launch thousands of processes.
+ 	   This is too clumsy to deal with it. */
+ #if 0
  	if (newpid != 0  &&  newpid != -1) {
  		all_pids[num_pids++] = newpid;
  	}
+ #endif
  	return newpid;
  }
  


More information about the rsync mailing list