specifying a list of files to transfer

Wayne Davison wayned at users.sourceforge.net
Tue Jan 14 23:58:00 EST 2003


On Tue, Jan 14, 2003 at 03:32:41PM -0600, Dave Dykstra wrote:
>     1. Yes it should take a filename or - as a parameter.
>     2. I don't like the idea of skipping the SRC spec.  Paths should be
> 	relative to the SRC.  If somebody wants to use full paths they
> 	can always have a SRC of "/".
>     3. It should be called --files-from.
>     4. --send-dirs and --no-implicit-dirs shouldn't be separate options,
> 	they should be automatically turned on with the --files-from option.

OK, I'm also fine with these points.  Note RE comment #2: even though
the relative path names now default to the SRC dir, the user can still
include absolute path names in the list and rsync will transfer them
without problem.  Also, I think the older implementation of --files-from
implied the -R (--relative) option, and this implementation does not.

So, here's a *VERY EARLY* implementation that can transfer files in
either direction.  It adds the option --files-from=FILE and the option
--null (for null-terminated names).  "FILE" can be "-" for stdin.  This
patch is relative to the CVS version, and is only for those that want to
assist in implementation, design, and/or testing.  **I have not tested
daemon mode at all yet, just simple ssh transfers in both directions.**

Compatibility note:  when pushing files, the --files-from mode will work
with any older version of rsync that we can transfer files with.  When
pulling files, the remote rsync must understand the "--files-from=-"
option (which tells it to read the file list over the stdin-socket since
it's combined with the --server option).

Aside:  there was a huge chunk of code in main.c that was not indented
correctly (due to the addition of some read_batch stuff).  I didn't want
to march the code off the edge of the screen any further, so I made the
read_batch code use a goto.  Those that have a weak stomach may wish to
avert their gaze from that portion of the patch.

..wayne..
-------------- next part --------------
Index: flist.c
--- flist.c	24 Dec 2002 07:42:04 -0000	1.127
+++ flist.c	14 Jan 2003 23:44:21 -0000
@@ -41,6 +41,8 @@
 extern int cvs_exclude;
 
 extern int recurse;
+extern char *files_from;
+extern int files_from_fd;
 
 extern int one_file_system;
 extern int make_backups;
@@ -680,7 +682,7 @@
 	if (noexcludes)
 		goto skip_excludes;
 
-	if (S_ISDIR(st.st_mode) && !recurse) {
+	if (S_ISDIR(st.st_mode) && !recurse && !files_from) {
 		rprintf(FINFO, "skipping directory %s\n", fname);
 		return NULL;
 	}
@@ -876,12 +878,13 @@
  **/
 struct file_list *send_file_list(int f, int argc, char *argv[])
 {
-	int i, l;
+	int l;
 	STRUCT_STAT st;
 	char *p, *dir, *olddir;
 	char lastpath[MAXPATHLEN] = "";
 	struct file_list *flist;
 	int64 start_write;
+	int use_ff_fd = 0;
 
 	if (show_filelist_p() && f != -1)
 		start_filelist_progress("building file list");
@@ -890,16 +893,33 @@
 
 	flist = flist_new();
 
-	if (f != -1) {
+	if (f != -1)
 		io_start_buffering(f);
+
+	if (files_from && f != -1) {
+		if (!push_dir(argv[0], 0)) {
+			rprintf(FERROR, "push_dir %s : %s\n",
+				argv[0], strerror(errno));
+			exit_cleanup(RERR_FILESELECT);
+		}
+		use_ff_fd = 1;
 	}
 
-	for (i = 0; i < argc; i++) {
+	while (1) {
 		char *fname = topsrcname;
 
-		strlcpy(fname, argv[i], MAXPATHLEN);
+		if (use_ff_fd) {
+			l = read_filesfrom_line(files_from_fd, fname);
+			if (!l)
+				break;
+		}
+		else {
+			if (argc-- == 0)
+				break;
+			strlcpy(fname, *argv++, MAXPATHLEN);
+			l = strlen(fname);
+		}
 
-		l = strlen(fname);
 		if (l != 1 && fname[l - 1] == '/') {
 			if ((l == 2) && (fname[0] == '.')) {
 				/*  Turn ./ into just . rather than ./.
@@ -922,7 +942,7 @@
 			continue;
 		}
 
-		if (S_ISDIR(st.st_mode) && !recurse) {
+		if (S_ISDIR(st.st_mode) && !recurse && !files_from) {
 			rprintf(FINFO, "skipping directory %s\n", fname);
 			continue;
 		}
@@ -940,7 +960,7 @@
 					dir = fname;
 				fname = p + 1;
 			}
-		} else if (f != -1 && (p = strrchr(fname, '/'))) {
+		} else if (f != -1 && !files_from && (p=strrchr(fname,'/'))) {
 			/* this ensures we send the intermediate directories,
 			   thus getting their permissions right */
 			*p = 0;
@@ -1034,6 +1054,9 @@
 
 	if (verbose > 2)
 		rprintf(FINFO, "send_file_list done\n");
+
+	if (use_ff_fd && !am_server)
+		close(files_from_fd);
 
 	return flist;
 }
Index: io.c
--- io.c	11 Apr 2002 02:11:50 -0000	1.105
+++ io.c	14 Jan 2003 23:44:21 -0000
@@ -76,6 +76,9 @@
 
 
 static int io_error_fd = -1;
+static int io_files_from_f_in = -1;
+static int io_files_from_f_out = -1;
+static int io_files_from_active = 0;
 
 static void read_loop(int fd, char *buf, size_t len);
 
@@ -141,6 +144,11 @@
 	io_error_fd = fd;
 }
 
+void io_set_files_from_fds(int f_in, int f_out)
+{
+	io_files_from_f_in = f_in;
+	io_files_from_f_out = f_out;
+}
 
 /**
  * It's almost always an error to get an EOF when we're trying to read
@@ -197,16 +205,29 @@
 
 	while (ret == 0) {
 		/* until we manage to read *something* */
-		fd_set fds;
+		fd_set r_fds, w_fds;
 		struct timeval tv;
 		int fd_count = fd+1;
 		int count;
 
-		FD_ZERO(&fds);
-		FD_SET(fd, &fds);
+		FD_ZERO(&r_fds);
+		FD_SET(fd, &r_fds);
 		if (io_error_fd != -1) {
-			FD_SET(io_error_fd, &fds);
-			if (io_error_fd > fd) fd_count = io_error_fd+1;
+			FD_SET(io_error_fd, &r_fds);
+			if (io_error_fd >= fd_count) fd_count = io_error_fd+1;
+		}
+		if (io_files_from_f_in != -1) {
+			int new_fd;
+			if (io_files_from_active) {
+				FD_ZERO(&w_fds);
+				FD_SET(io_files_from_f_out, &w_fds);
+				new_fd = io_files_from_f_out;
+			}
+			else {
+				FD_SET(io_files_from_f_in, &r_fds);
+				new_fd = io_files_from_f_in;
+			}
+			if (new_fd >= fd_count) fd_count = new_fd+1;
 		}
 
 		tv.tv_sec = io_timeout?io_timeout:SELECT_TIMEOUT;
@@ -214,7 +235,9 @@
 
 		errno = 0;
 
-		count = select(fd_count, &fds, NULL, NULL, &tv);
+		count = select(fd_count, &r_fds,
+			       io_files_from_active? &w_fds : NULL,
+			       NULL, &tv);
 
 		if (count == 0) {
 			check_timeout();
@@ -227,11 +250,34 @@
 			continue;
 		}
 
-		if (io_error_fd != -1 && FD_ISSET(io_error_fd, &fds)) {
+
+		if (io_error_fd != -1 && FD_ISSET(io_error_fd, &r_fds)) {
 			read_error_fd();
 		}
 
-		if (!FD_ISSET(fd, &fds)) continue;
+		if (io_files_from_f_in != -1) {
+			if (io_files_from_active) {
+				if (FD_ISSET(io_files_from_f_out, &w_fds)) {
+					char tmpbuf[1024];
+					int l = read(io_files_from_f_in,
+						     tmpbuf, sizeof tmpbuf);
+					if (l <= 0) {
+						extern char ffrom_rs;
+						write(io_files_from_f_out,
+						      &ffrom_rs, 1);
+						io_files_from_f_in = -1;
+					}
+					else {
+						write(io_files_from_f_out,
+						      tmpbuf, l);
+					}
+					io_files_from_active = 0;
+				}
+			} else if (FD_ISSET(io_files_from_f_in, &r_fds))
+				io_files_from_active = 1;
+		}
+
+		if (!FD_ISSET(fd, &r_fds)) continue;
 
 		n = read(fd, buf, len);
 
@@ -257,7 +303,39 @@
 	return ret;
 }
 
+int read_filesfrom_line(int fd, char *fname)
+{
+	char *s = fname;
+	char *eob = fname + MAXPATHLEN - 1;
+	char ch;
+	int cnt;
+	extern char ffrom_rs;
+	extern int io_timeout;
+
+	while (1) {
+		cnt = read(fd, &ch, 1);
+		if (cnt < 0 && (errno == EWOULDBLOCK
+		  || errno == EINTR || errno == EAGAIN)) {
+			struct timeval tv;
+			fd_set fds;
+			FD_ZERO(&fds);
+			FD_SET(fd, &fds);
+			tv.tv_sec = io_timeout? io_timeout : SELECT_TIMEOUT;
+			tv.tv_usec = 0;
+			cnt = select(fd+1, &fds, NULL, NULL, &tv);
+			if (!cnt)
+				check_timeout();
+			continue;
+		}
+		if (cnt != 1 || ch == ffrom_rs)
+			break;
+		if (s < eob)
+			*s++ = ch;
+	}
+	*s = '\0';
 
+	return s - fname;
+}
 
 
 /**
@@ -454,11 +532,11 @@
 
 	while (total < len) {
 		FD_ZERO(&w_fds);
-		FD_ZERO(&r_fds);
 		FD_SET(fd,&w_fds);
 		fd_count = fd;
 
 		if (io_error_fd != -1) {
+			FD_ZERO(&r_fds);
 			FD_SET(io_error_fd,&r_fds);
 			if (io_error_fd > fd_count) 
 				fd_count = io_error_fd;
Index: main.c
--- main.c	10 Jan 2003 08:32:09 -0000	1.158
+++ main.c	14 Jan 2003 23:44:21 -0000
@@ -22,8 +22,13 @@
 #include "rsync.h"
 
 time_t starttime = 0;
+int files_from_fd;
 
 extern struct stats stats;
+extern char *files_from;
+extern int am_server;
+extern int am_sender;
+extern int am_daemon;
 extern int verbose;
 
 /* there's probably never more than at most 2 outstanding child processes,
@@ -75,9 +80,6 @@
 static void report(int f)
 {
 	time_t t = time(NULL);
-	extern int am_server;
-	extern int am_sender;
-	extern int am_daemon;
 	extern int do_stats;
 	extern int remote_version;
 	int send_stats;
@@ -165,9 +167,6 @@
 {
 #ifdef HAVE_MALLINFO
 	struct mallinfo mi;
-	extern int am_server;
-	extern int am_sender;
-	extern int am_daemon;
 
 	mi = mallinfo();
 
@@ -476,9 +475,7 @@
 	char *dir = NULL;
 	extern int delete_mode;
 	extern int delete_excluded;
-	extern int am_daemon;
 	extern int module_id;
-	extern int am_sender;
 	extern int read_batch;
 	extern struct file_list *batch_flist;
 
@@ -507,9 +504,9 @@
 		recv_exclude_list(f_in);
 
 	if (read_batch)
-	    flist = batch_flist;
+		flist = batch_flist;
 	else
-	    flist = recv_file_list(f_in);
+		flist = recv_file_list(f_in);
 	if (!flist) {
 		rprintf(FERROR,"server_recv: recv_file_list error\n");
 		exit_cleanup(RERR_FILESELECT);
@@ -538,7 +535,6 @@
 void start_server(int f_in, int f_out, int argc, char *argv[])
 {
 	extern int cvs_exclude;
-	extern int am_sender;
 	extern int remote_version;
 	extern int read_batch;
 
@@ -573,7 +569,6 @@
 	struct file_list *flist = NULL;
 	int status = 0, status2 = 0;
 	char *local_name = NULL;
-	extern int am_sender;
 	extern int remote_version;
 	extern pid_t cleanup_child_pid;
 	extern int write_batch;
@@ -601,7 +596,7 @@
 		if (delete_mode && !delete_excluded) 
 			send_exclude_list(f_out);
 		if (!read_batch) /*  dw -- don't write to pipe */
-		    flist = send_file_list(f_out,argc,argv);
+			flist = send_file_list(f_out,argc,argv);
 		if (verbose > 3) 
 			rprintf(FINFO,"file list sent\n");
 
@@ -628,6 +623,15 @@
 	if (!write_batch)
 	    send_exclude_list(f_out);
 	
+	if (files_from) {
+		/* We're receiving the file info from the sender, so we need
+		 * the IO routines to automatically write out the names onto
+		 * our f_out socket as we read the list info from the sender.
+		 * This avoids both deadlock and extra delays. */
+		io_set_files_from_fds(files_from_fd, f_out);
+		files_from_fd = -1;
+	}
+
 	flist = recv_file_list(f_in);
 	if (!flist || flist->count == 0) {
 		rprintf(FINFO, "client: nothing to do: "
@@ -700,7 +704,6 @@
 	pid_t pid;
 	int f_in,f_out;
 	extern int local_server;
-	extern int am_sender;
 	extern char *shell_cmd;
 	extern int rsync_port;
 	extern int daemon_over_rsh;
@@ -732,9 +735,14 @@
 		return start_socket_client(host, path, argc-1, argv+1);
 	}
 
-	if (!read_batch) {
-		p = find_colon(argv[0]);
+	if (read_batch) {
+		am_sender = 1;
+		local_server = 1;
+		shell_path = argv[argc-1];
+		goto local_arg;
+	}
 
+	p = find_colon(argv[0]);
 	if (p) {
 		if (p[1] == ':') { /* double colon */
 			*p = 0;
@@ -808,12 +816,8 @@
 		}
 		argc--;
 	}
-	} else {
-	    am_sender = 1;
-	    local_server = 1;
-	    shell_path = argv[argc-1];
-	}
 
+    local_arg:
 	if (shell_machine) {
 		p = strchr(shell_machine,'@');
 		if (p) {
@@ -953,8 +957,6 @@
 	extern int am_root;
 	extern int orig_umask;
 	extern int dry_run;
-	extern int am_daemon;
-	extern int am_server;
 	int ret;
 	extern int write_batch;
 	int orig_argc;
@@ -992,6 +994,21 @@
                  * code here, rather than relying on getopt. */
 		option_error();
 		exit_cleanup(RERR_SYNTAX);
+	}
+
+	if (files_from) {
+		if (argc != 2) {
+			usage(FERROR);
+			exit_cleanup(RERR_SYNTAX);
+		}
+		if (strcmp(files_from, "-") == 0)
+			files_from_fd = 0;
+		else if ((files_from_fd = open(files_from, O_RDONLY)) < 0) {
+			rsyserr(FERROR, errno,
+				"failed to open files-from file %s",
+				files_from);
+			exit_cleanup(RERR_FILEIO);
+		}
 	}
 
 	signal(SIGINT,SIGNAL_CAST sig_int);
Index: options.c
--- options.c	11 Jan 2003 01:29:30 -0000	1.100
+++ options.c	14 Jan 2003 23:44:21 -0000
@@ -65,6 +65,8 @@
 int module_id = -1;
 int am_server = 0;
 int am_sender = 0;
+char *files_from = NULL;
+char ffrom_rs = '\n';
 int recurse = 0;
 int am_daemon = 0;
 int daemon_over_rsh = 0;
@@ -274,6 +276,8 @@
   rprintf(F,"     --bwlimit=KBPS          limit I/O bandwidth, KBytes per second\n");
   rprintf(F,"     --write-batch=PREFIX    write batch fileset starting with PREFIX\n");
   rprintf(F,"     --read-batch=PREFIX     read batch fileset starting with PREFIX\n");
+  rprintf(F,"     --files-from=FILE       read FILE for list of source-file names\n");
+  rprintf(F,"     --null                  indicates --files-from names are separated by nulls, not newlines\n");
   rprintf(F," -h, --help                  show this help screen\n");
 #ifdef INET6
   rprintf(F," -4                          prefer IPv4\n");
@@ -295,7 +299,8 @@
       OPT_DELETE_AFTER, OPT_EXISTING, OPT_MAX_DELETE, OPT_BACKUP_DIR, 
       OPT_IGNORE_ERRORS, OPT_BWLIMIT, OPT_BLOCKING_IO,
       OPT_NO_BLOCKING_IO, OPT_WHOLE_FILE, OPT_NO_WHOLE_FILE,
-      OPT_MODIFY_WINDOW, OPT_READ_BATCH, OPT_WRITE_BATCH, OPT_IGNORE_EXISTING};
+      OPT_MODIFY_WINDOW, OPT_READ_BATCH, OPT_WRITE_BATCH, OPT_IGNORE_EXISTING,
+      OPT_NULL};
 
 static struct poptOption long_options[] = {
   /* longName, shortName, argInfo, argPtr, value, descrip, argDesc */
@@ -370,6 +375,8 @@
   {"hard-links",      'H', POPT_ARG_NONE,   &preserve_hard_links , 0, 0, 0 },
   {"read-batch",       0,  POPT_ARG_STRING, &batch_prefix, OPT_READ_BATCH, 0, 0 },
   {"write-batch",      0,  POPT_ARG_STRING, &batch_prefix, OPT_WRITE_BATCH, 0, 0 },
+  {"files-from",       0,  POPT_ARG_STRING, &files_from, 0, 0, 0 },
+  {"null",             0,  POPT_ARG_NONE,   0,             OPT_NULL, 0, 0},
 #ifdef INET6
   {0,		      '4', POPT_ARG_VAL,    &default_af_hint,   AF_INET , 0, 0 },
   {0,		      '6', POPT_ARG_VAL,    &default_af_hint,   AF_INET6 , 0, 0 },
@@ -593,6 +600,11 @@
 			/* popt stores the filename in batch_prefix for us */
 			read_batch = 1;
 			break;
+
+		case OPT_NULL:
+			ffrom_rs = '\0';
+			break;
+
 		case OPT_LINK_DEST:
 #if HAVE_LINK
 			compare_dest = (char *)poptGetOptArg(pc);
@@ -844,6 +856,12 @@
 		 */
 		args[ac++] = link_dest ? "--link-dest" : "--compare-dest";
 		args[ac++] = compare_dest;
+	}
+
+	if (files_from && !am_sender) {
+		args[ac++] = "--files-from=-";
+		if (ffrom_rs == '\0')
+			args[ac++] = "--null";
 	}
 
 	*argc = ac;
Index: proto.h
--- proto.h	24 Dec 2002 20:16:38 -0000	1.150
+++ proto.h	14 Jan 2003 23:44:21 -0000
@@ -97,6 +97,8 @@
 int check_hard_link(struct file_struct *file);
 void do_hard_links(void);
 void io_set_error_fd(int fd);
+void io_set_files_from_fds(int f_in, int f_out);
+int read_filesfrom_line(int fd, char *fname);
 int32 read_int(int f);
 int64 read_longint(int f);
 void read_buf(int f,char *buf,size_t len);


More information about the rsync mailing list