[PATCH] --source-filter && --dest-filter for rsync 2.5.6
Miroslav Rudisin
miero at matfyz.cz
Mon Nov 17 03:55:20 EST 2003
Hi,
I needed to filter content of files (encrypt), before they are sent over the network to backup server.
The easiest way to do this was modifying Kyle Jones's "--dest-filter" patch.
Somebody was asking there this feature in the past, so I'm sending this patch to list.
Implementation details:
-filtering disables rsync alogrithm
-source filter makes temporary files in /tmp (there should by enough disk space)
-if source filter fails, data is send unfiltered
-failure of destination filter, causes data loss!!!
-if filter changes size of file, you should use --times-only option
to prevent repeated transfers of unchanged files
miEro
diff -ur rsync-2.5.6/generator.c rsync-2.5.6-filtered/generator.c
--- rsync-2.5.6/generator.c 2002-08-29 16:44:55.000000000 +0200
+++ rsync-2.5.6-filtered/generator.c 2003-11-16 13:57:54.000000000 +0100
@@ -35,6 +35,7 @@
extern int block_size;
extern int csum_length;
extern int ignore_times;
+extern int times_only;
extern int size_only;
extern int io_timeout;
extern int remote_version;
@@ -48,8 +49,10 @@
static int skip_file(char *fname,
struct file_struct *file, STRUCT_STAT *st)
{
- if (st->st_size != file->length) {
- return 0;
+ if (! times_only) {
+ if (st->st_size != file->length) {
+ return 0;
+ }
}
if (link_dest) {
if((st->st_mode & ~_S_IFMT) != (file->mode & ~_S_IFMT)) {
@@ -59,6 +62,9 @@
return 0;
}
}
+ if (times_only) {
+ return (cmp_modtime(st->st_mtime,file->modtime) == 0);
+ }
/* if always checksum is set then we use the checksum instead
diff -ur rsync-2.5.6/options.c rsync-2.5.6-filtered/options.c
--- rsync-2.5.6/options.c 2003-01-28 04:11:57.000000000 +0100
+++ rsync-2.5.6-filtered/options.c 2003-11-16 14:06:29.000000000 +0100
@@ -48,6 +48,9 @@
int dry_run=0;
int local_server=0;
int ignore_times=0;
+char *source_filter = NULL;
+char *dest_filter = NULL;
+int times_only=0;
int delete_mode=0;
int delete_excluded=0;
int one_file_system=0;
@@ -246,6 +249,7 @@
rprintf(F," --timeout=TIME set IO timeout in seconds\n");
rprintf(F," -I, --ignore-times don't exclude files that match length and time\n");
rprintf(F," --size-only only use file size when determining if a file should be transferred\n");
+ rprintf(F," --times-only only use file modification time when determining if a file should be transferred\n");
rprintf(F," --modify-window=NUM Timestamp window (seconds) for file match (default=%d)\n",modify_window);
rprintf(F," -T --temp-dir=DIR create temporary files in directory DIR\n");
rprintf(F," --compare-dest=DIR also compare destination files relative to DIR\n");
@@ -270,6 +274,8 @@
rprintf(F," --bwlimit=KBPS limit I/O bandwidth, KBytes per second\n");
rprintf(F," --write-batch=PREFIX write batch fileset starting with PREFIX\n");
rprintf(F," --read-batch=PREFIX read batch fileset starting with PREFIX\n");
+ rprintf(F," --source-filter=COMMAND filter file through COMMAND at source\n");
+ rprintf(F," --dest-filter=COMMAND filter file through COMMAND at destination\n");
rprintf(F," -h, --help show this help screen\n");
#ifdef INET6
rprintf(F," -4 prefer IPv4\n");
@@ -283,6 +289,7 @@
}
enum {OPT_VERSION = 1000, OPT_SUFFIX, OPT_SENDER, OPT_SERVER, OPT_EXCLUDE,
+ OPT_SOURCE_FILTER, OPT_DEST_FILTER,
OPT_EXCLUDE_FROM, OPT_DELETE, OPT_DELETE_EXCLUDED, OPT_NUMERIC_IDS,
OPT_RSYNC_PATH, OPT_FORCE, OPT_TIMEOUT, OPT_DAEMON, OPT_CONFIG, OPT_PORT,
OPT_INCLUDE, OPT_INCLUDE_FROM, OPT_STATS, OPT_PARTIAL, OPT_PROGRESS,
@@ -300,6 +307,9 @@
{"rsync-path", 0, POPT_ARG_STRING, &rsync_path, 0, 0, 0 },
{"password-file", 0, POPT_ARG_STRING, &password_file, 0, 0, 0 },
{"ignore-times", 'I', POPT_ARG_NONE, &ignore_times , 0, 0, 0 },
+ {"times-only", 0, POPT_ARG_NONE, ×_only , 0, 0, 0 },
+ {"source-filter", 0, POPT_ARG_STRING, &source_filter , OPT_SOURCE_FILTER, 0, 0 },
+ {"dest-filter", 0, POPT_ARG_STRING, &dest_filter , OPT_DEST_FILTER, 0, 0 },
{"size-only", 0, POPT_ARG_NONE, &size_only , 0, 0, 0 },
{"modify-window", 0, POPT_ARG_INT, &modify_window, OPT_MODIFY_WINDOW, 0, 0 },
{"one-file-system", 'x', POPT_ARG_NONE, &one_file_system , 0, 0, 0 },
@@ -471,6 +481,16 @@
print_rsync_version(FINFO);
exit_cleanup(0);
+ case OPT_SOURCE_FILTER:
+ /* source_filter already set by popt */
+ whole_file = 1;
+ break;
+
+ case OPT_DEST_FILTER:
+ /* dest_filter already set by popt */
+ whole_file = 1;
+ break;
+
case OPT_SUFFIX:
/* The value has already been set by popt, but
* we need to remember that a suffix was specified
@@ -631,6 +651,13 @@
return 0;
}
+ if ((source_filter || dest_filter) && no_whole_file) {
+ snprintf(err_buf,sizeof(err_buf),
+ "no-whole-file can not be used with source-filter or dest-filter\n");
+ rprintf(FERROR,"ERROR: no-whole-file can not be used with source-filter or dest-filter\n");
+ return 0;
+ }
+
*argv = poptGetArgs(pc);
if (*argv)
*argc = count_args(*argv);
@@ -784,6 +811,17 @@
if (delete_excluded)
args[ac++] = "--delete-excluded";
+ if (times_only)
+ args[ac++] = "--times-only";
+
+ if (dest_filter) {
+ static char buf[1000];
+ /* have to single quote the arg to keep the
+ remote shell from splitting it */
+ snprintf(buf, sizeof(buf), "--dest-filter='%s'", dest_filter);
+ args[ac++] = buf;
+ }
+
if (size_only)
args[ac++] = "--size-only";
diff -ur rsync-2.5.6/pipe.c rsync-2.5.6-filtered/pipe.c
--- rsync-2.5.6/pipe.c 2002-04-08 09:39:56.000000000 +0200
+++ rsync-2.5.6-filtered/pipe.c 2003-11-16 13:20:34.000000000 +0100
@@ -146,3 +146,90 @@
}
+pid_t run_filter(char *command[], int out, int *pipe_to_filter)
+{
+ pid_t pid;
+ int pipefds[2];
+ extern int blocking_io;
+
+ if (verbose >= 2) {
+ print_child_argv(command);
+ }
+
+ if (pipe(pipefds) < 0) {
+ rprintf(FERROR, "pipe: %s\n", strerror(errno));
+ exit_cleanup(RERR_IPC);
+ }
+
+ pid = fork();
+ if (pid == -1) {
+ rprintf(FERROR, "fork: %s\n", strerror(errno));
+ exit_cleanup(RERR_IPC);
+ }
+
+ if (pid == 0) {
+ extern int orig_umask;
+ if (dup2(pipefds[0], STDIN_FILENO) < 0) {
+ rprintf(FERROR, "Failed dup2 to child stdin : %s\n",
+ strerror(errno));
+ exit_cleanup(RERR_IPC);
+ }
+ if (dup2(out, STDOUT_FILENO) < 0) {
+ rprintf(FERROR, "Failed dup2 to child stdout : %s\n",
+ strerror(errno));
+ exit_cleanup(RERR_IPC);
+ }
+ close(pipefds[1]);
+ umask(orig_umask);
+ set_blocking(STDIN_FILENO);
+ if (blocking_io) {
+ set_blocking(STDOUT_FILENO);
+ }
+ execvp(command[0], command);
+ rprintf(FERROR, "Failed to exec %s : %s\n",
+ command[0], strerror(errno));
+ exit_cleanup(RERR_IPC);
+ }
+
+ *pipe_to_filter = pipefds[1];
+
+ return pid;
+}
+
+pid_t run_filter_on_file(char *command[], int out, int in)
+{
+ pid_t pid;
+ extern int blocking_io;
+
+ if (verbose >= 2) {
+ print_child_argv(command);
+ }
+
+ pid = fork();
+ if (pid == -1) {
+ rprintf(FERROR, "fork: %s\n", strerror(errno));
+ exit_cleanup(RERR_IPC);
+ }
+
+ if (pid == 0) {
+ if (dup2(in, STDIN_FILENO) < 0) {
+ rprintf(FERROR, "Failed dup2 to child stdin : %s\n",
+ strerror(errno));
+ exit_cleanup(RERR_IPC);
+ }
+ if (dup2(out, STDOUT_FILENO) < 0) {
+ rprintf(FERROR, "Failed dup2 to child stdout : %s\n",
+ strerror(errno));
+ exit_cleanup(RERR_IPC);
+ }
+ if (blocking_io) {
+ set_blocking(STDOUT_FILENO);
+ }
+ execvp(command[0], command);
+ rprintf(FERROR, "Failed to exec %s : %s\n",
+ command[0], strerror(errno));
+ exit_cleanup(RERR_IPC);
+ }
+
+ return pid;
+}
diff -ur rsync-2.5.6/proto.h rsync-2.5.6-filtered/proto.h
--- rsync-2.5.6/proto.h 2003-01-27 04:35:09.000000000 +0100
+++ rsync-2.5.6-filtered/proto.h 2003-11-16 13:20:48.000000000 +0100
@@ -181,6 +181,8 @@
pid_t piped_child(char **command, int *f_in, int *f_out);
pid_t local_child(int argc, char **argv,int *f_in,int *f_out,
int (*child_main)(int, char*[]));
+pid_t run_filter(char *command[], int in, int *pipe_to_filter);
+pid_t run_filter_on_file(char *command[], int out, int in);
void end_progress(OFF_T size);
void show_progress(OFF_T ofs, OFF_T size);
void delete_files(struct file_list *flist);
diff -ur rsync-2.5.6/receiver.c rsync-2.5.6-filtered/receiver.c
--- rsync-2.5.6/receiver.c 2003-01-21 00:32:17.000000000 +0100
+++ rsync-2.5.6-filtered/receiver.c 2003-11-16 11:44:14.000000000 +0100
@@ -320,11 +320,32 @@
extern int delete_after;
extern int orig_umask;
struct stats initial_stats;
+ pid_t pid = 0; /* assignment to get rid of compiler warning */
+ int status;
+ extern char *dest_filter;
+#define MAX_FILTER_ARGS 100
+ char *filter_argv[MAX_FILTER_ARGS + 1];
if (verbose > 2) {
rprintf(FINFO,"recv_files(%d) starting\n",flist->count);
}
+ if (dest_filter) {
+ char *p;
+ char *sep = " \t";
+ int i;
+ for (p = strtok(dest_filter, sep), i = 0;
+ p && i < MAX_FILTER_ARGS;
+ p = strtok(0, sep)) {
+ filter_argv[i++] = p;
+ }
+ filter_argv[i] = 0;
+ if (p) {
+ rprintf(FERROR,"Too many arguments to dest-filter (> %d)\n", i);
+ exit_cleanup(RERR_SYNTAX);
+ }
+ }
+
while (1) {
cleanup_disable();
@@ -448,16 +469,34 @@
log_transfer(file, fname);
}
+ if (dest_filter) {
+ pid = run_filter(filter_argv, fd2, &fd2);
+ }
+
/* recv file data */
recv_ok = receive_data(f_in,buf,fd2,fname,file->length);
+ if (dest_filter) {
+ close(fd2);
+ wait_process(pid, &status);
+ if (status != 0) {
+ rprintf(FERROR,"filter %s exited code: %d\n",
+ dest_filter, status);
+ if (buf) unmap_file(buf);
+ if (fd1 != -1) close(fd1);
+ continue;
+ }
+ }
+
log_recv(file, &initial_stats);
if (buf) unmap_file(buf);
if (fd1 != -1) {
close(fd1);
}
- close(fd2);
+ if (! dest_filter) {
+ close(fd2);
+ }
if (verbose > 2)
rprintf(FINFO,"renaming %s to %s\n",fnametmp,fname);
diff -ur rsync-2.5.6/rsync.1 rsync-2.5.6-filtered/rsync.1
--- rsync-2.5.6/rsync.1 2003-01-28 04:11:57.000000000 +0100
+++ rsync-2.5.6-filtered/rsync.1 2003-11-16 14:07:36.000000000 +0100
@@ -361,6 +361,7 @@
--timeout=TIME set IO timeout in seconds
-I, --ignore-times don\'t exclude files that match length and time
--size-only only use file size when determining if a file should be transferred
+ --times-only only use file modification time when determining if a file should be transferred
--modify-window=NUM Timestamp window (seconds) for file match (default=0)
-T --temp-dir=DIR create temporary files in directory DIR
--compare-dest=DIR also compare destination files relative to DIR
@@ -386,6 +387,8 @@
--bwlimit=KBPS limit I/O bandwidth, KBytes per second
--read-batch=PREFIX read batch fileset starting with PREFIX
--write-batch=PREFIX write batch fileset starting with PREFIX
+ --source-filter=COMMAND filter file through COMMAND at source
+ --dest-filter=COMMAND filter file through COMMAND at destination
-h, --help show this help screen
@@ -437,6 +440,12 @@
after using another mirroring system which may not preserve timestamps
exactly\&.
.IP
+.IP "\fB--times-only\fP"
+With this option, rsync will ignore size and file content
+differences when deciding whether to transfer a file\&. Only
+a difference in file modification time will cause a file to be
+transferred.
+.IP
.IP "\fB--modify-window\fP"
When comparing two timestamps rsync treats
the timestamps as being equal if they are within the value of
@@ -938,6 +947,32 @@
using the fileset whose filenames start with PREFIX\&. See the "BATCH
MODE" section for details\&.
.IP
+.IP "\fB --source-filter=COMMAND\fP"
+This option allows you to specify a filter program that will be
+applied to the contents of all transferred regular files before
+the data is sent to destination. COMMAND will receive the data on its
+standard input and it should write the filtered data to standard
+output. COMMAND should exit non-zero if it cannot process the
+data or if it encounters an error when writing the data to stdout.
+Example: --source-filter="gzip -9" will cause remote files to be compressed.
+Use of --source-filter automatically enables --whole-file.
+If your filter does not output the same number of bytes that it
+received on input, you should use --times-only to disable size and
+content checks on subsequent rsync runs.
+.IP
+.IP "\fB --dest-filter=COMMAND\fP"
+This option allows you to specify a filter program that will be
+applied to the contents of all transferred regular files before
+the data is written to disk. COMMAND will receive the data on its
+standard input and it should write the filtered data to standard
+output. COMMAND should exit non-zero if it cannot process the
+data or if it encounters an error when writing the data to stdout.
+Example: --dest-filter="gzip -9" will cause remote files to be compressed.
+Use of --dest-filter automatically enables --whole-file.
+If your filter does not output the same number of bytes that it
+received on input, you should use --times-only to disable size and
+content checks on subsequent rsync runs.
+.IP
.PP
.SH "EXCLUDE PATTERNS"
.PP
diff -ur rsync-2.5.6/sender.c rsync-2.5.6-filtered/sender.c
--- rsync-2.5.6/sender.c 2002-04-09 08:03:50.000000000 +0200
+++ rsync-2.5.6-filtered/sender.c 2003-11-16 14:33:17.000000000 +0100
@@ -109,6 +109,27 @@
char buff[CHUNK_SIZE]; /* dw */
int j; /* dw */
int done; /* dw */
+ extern char *source_filter;
+#define MAX_FILTER_ARGS 100
+ char *filter_argv[MAX_FILTER_ARGS + 1];
+ char *tmp = 0, *tmpl = "/tmp/rsync-filtered_sourceXXXXXX";
+ int unlink_tmp = 0;
+
+ if (source_filter) {
+ char *p;
+ char *sep = " \t";
+ int i;
+ for (p = strtok(source_filter, sep), i = 0;
+ p && i < MAX_FILTER_ARGS;
+ p = strtok(0, sep)) {
+ filter_argv[i++] = p;
+ }
+ filter_argv[i] = 0;
+ if (p) {
+ rprintf(FERROR,"Too many arguments to source-filter (> %d)\n", i);
+ exit_cleanup(RERR_SYNTAX);
+ }
+ }
if (verbose > 2)
rprintf(FINFO,"send_files starting\n");
@@ -178,7 +199,34 @@
write_batch_csum_info(&i,flist->count,s);
if (!read_batch) {
+ unlink_tmp=0;
+
fd = do_open(fname, O_RDONLY, 0);
+
+ if (fd != -1 && source_filter) {
+ int fd2, status;
+ pid_t pid = 0; /* assignment to get rid of compiler warning */
+
+ tmp = strdup(tmpl);
+ fd2 = mkstemp(tmp);
+ if (fd2 == -1)
+ rprintf(FERROR,"mkstemp %s failed: %s\n",tmp,strerror(errno));
+ else {
+ pid = run_filter_on_file(filter_argv, fd2, fd);
+ close(fd);
+ close(fd2);
+ wait_process(pid, &status);
+ if (status != 0) {
+ rprintf(FERROR,"bypassing source filter %s; exited with code: %d\n",source_filter,status);
+ fd = do_open(fname, O_RDONLY, 0);
+ }
+ else {
+ fd = do_open(tmp, O_RDONLY, 0);
+ unlink_tmp = 1;
+ }
+ }
+ }
+
if (fd == -1) {
io_error = 1;
rprintf(FERROR,"send_files failed to open %s: %s\n",
@@ -275,6 +323,7 @@
if (!read_batch) { /* dw */
if (buf) unmap_file(buf);
close(fd);
+ if (unlink_tmp) unlink(tmp);
}
free_sums(s);
More information about the rsync
mailing list