direct write patch

Dave Dykstra dwd at bell-labs.com
Tue Nov 13 05:40:21 EST 2001


Oh boy, I think you're getting into quite a can of worms there.

At a minimum this option should imply the --partial option because if the
operation is aborted the file will be left partially transferred.  Note
that if you're trying to use the rsync rolling checksum algorithm to
minimize bandwidth that if a transfer is interrupted all the previous
data will be lost so there will be less data to compare against when the
transfer is retried.

Next, keep in mind that the receiving side of an rsync transfer uses two
independent processes, one for generating checksums and one for creating
files.  I'm not knowledgable enough to know whether or not the creating files
operation is guaranteed to begin only after the checksum generation is 
completed on each file, but if it isn't then overwriting a file could be
a big problem.  Have you tried transferring any very large files?

I'm surprised you didn't need to do anything to finish_transfer to keep
the robust_rename from returning an error.

The last thing that comes to mind is that overwriting files that are open
by another process, such as a running executable, can be a problem.  An
unlink/rename works much better for that.

I'm sure there are more issues too.

- Dave Dykstra


On Mon, Nov 12, 2001 at 10:12:54AM -0800, Don Mahurin wrote:
> I have attached a patch that supports a new "--direct-write" option.
> 
> The result of using this option is to write directly to the destination
> files, instead of a temporary file first.
> 
> The reason this patch is needed is for rsyncing to a device where the
> device is full or nearly full.
> 
> Say that I am writing to a device that has 1 Meg free, and a 2 meg file
> on that device is out of date.
> Rsync will first attempt to write a new temp file, and fail, SIGUSR1'ing
> itself, and outputting "Error 20".
> 
> Specifically, I am writing a linux root fs to a 32Meg compact flash, and
> libc needs to be updated, and rsync fails.
> 
> This patch simply sets fnametmp to fname.
> 
> Two issues with the patch, that hopefully developers can answer:
> 
> - In direct-write mode, I open without O_EXCL, as the file likely does
> exist.
>   Should the destination file be deleted instead? (I do not know what
> exactly the race condition is)
> 
> - There is a section after the assignment of fnametmp, and before the
> open that does do_mktemp, then
>   receive_data.  What is the purpose of this part? I skip it for
> direct-write, and it works, but what do I know?
> 
> 
> -don

> Only in rsync-2.4.6-direct-write/lib: dummy
> diff -ru rsync-2.4.6/options.c rsync-2.4.6-direct-write/options.c
> --- rsync-2.4.6/options.c	Tue Sep  5 19:46:43 2000
> +++ rsync-2.4.6-direct-write/options.c	Sun Nov 11 10:40:01 2001
> @@ -22,6 +22,7 @@
>  
>  
>  int make_backups = 0;
> +int direct_write = 0;
>  int whole_file = 0;
>  int copy_links = 0;
>  int preserve_links = 0;
> @@ -147,6 +148,7 @@
>    rprintf(F,"     --ignore-errors         delete even if there are IO errors\n");
>    rprintf(F,"     --max-delete=NUM        don't delete more than NUM files\n");
>    rprintf(F,"     --partial               keep partially transferred files\n");
> +  rprintf(F,"     --direct-write          write directly to the destination files\n");
>    rprintf(F,"     --force                 force deletion of directories even if not empty\n");
>    rprintf(F,"     --numeric-ids           don't map uid/gid values by user/group name\n");
>    rprintf(F,"     --timeout=TIME          set IO timeout in seconds\n");
> @@ -188,7 +190,7 @@
>        OPT_LOG_FORMAT, OPT_PASSWORD_FILE, OPT_SIZE_ONLY, OPT_ADDRESS,
>        OPT_DELETE_AFTER, OPT_EXISTING, OPT_MAX_DELETE, OPT_BACKUP_DIR, 
>        OPT_IGNORE_ERRORS, OPT_BWLIMIT, OPT_BLOCKING_IO,
> -      OPT_MODIFY_WINDOW};
> +      OPT_MODIFY_WINDOW, OPT_DIRECT_WRITE};
>  
>  static char *short_options = "oblLWHpguDCtcahvqrRIxnSe:B:T:zP";
>  
> @@ -227,6 +229,7 @@
>    {"perms",       0,     0,    'p'},
>    {"links",       0,     0,    'l'},
>    {"copy-links",  0,     0,    'L'},
> +  {"direct-write",     0,     0,    OPT_DIRECT_WRITE},
>    {"copy-unsafe-links", 0, 0,  OPT_COPY_UNSAFE_LINKS},
>    {"safe-links",  0,     0,    OPT_SAFE_LINKS},
>    {"whole-file",  0,     0,    'W'},
> @@ -400,6 +403,10 @@
>  			safe_symlinks=1;
>  			break;
>  
> +		case OPT_DIRECT_WRITE:
> +			direct_write = 1;
> +			break;
> +
>  		case 'h':
>  			usage(FINFO);
>  			exit_cleanup(0);
> @@ -554,6 +561,8 @@
>  			keep_partial = 1;
>  			break;
>  
> +
> +
>  		case OPT_IGNORE_ERRORS:
>  			ignore_errors = 1;
>  			break;
> @@ -691,6 +700,9 @@
>  		slprintf(mdelete,sizeof(mdelete),"--max-delete=%d",max_delete);
>  		args[ac++] = mdelete;
>  	}    
> +
> +	if (direct_write)
> +		args[ac++] = "--direct-write";
>  
>  	if (io_timeout) {
>  		slprintf(iotime,sizeof(iotime),"--timeout=%d",io_timeout);
> diff -ru rsync-2.4.6/receiver.c rsync-2.4.6-direct-write/receiver.c
> --- rsync-2.4.6/receiver.c	Thu Mar 30 06:23:03 2000
> +++ rsync-2.4.6-direct-write/receiver.c	Sun Nov 11 11:14:43 2001
> @@ -34,6 +34,7 @@
>  extern char *tmpdir;
>  extern char *compare_dest;
>  extern int make_backups;
> +extern int direct_write;
>  extern char *backup_suffix;
>  
>  static struct delete_list {
> @@ -302,7 +303,8 @@
>  	int fd1,fd2;
>  	STRUCT_STAT st;
>  	char *fname;
> -	char fnametmp[MAXPATHLEN];
> +	char *fnametmp;
> +	char fnametmpbuf[MAXPATHLEN];
>  	char *fnamecmp;
>  	char fnamecmpbuf[MAXPATHLEN];
>  	struct map_struct *buf;
> @@ -314,6 +316,7 @@
>  	extern int preserve_perms;
>  	extern int delete_after;
>  	struct stats initial_stats;
> +	int write_flags;
>  
>  	if (verbose > 2) {
>  		rprintf(FINFO,"recv_files(%d) starting\n",flist->count);
> @@ -404,22 +407,29 @@
>  			buf = NULL;
>  		}
>  
> -		if (!get_tmpname(fnametmp,fname)) {
> -			if (buf) unmap_file(buf);
> -			if (fd1 != -1) close(fd1);
> -			continue;
> -		}
> +		if(direct_write) {
> +			fnametmp = fname;
> +			write_flags = O_WRONLY|O_CREAT;
> +		} else {
> +			fnametmp = fnametmpbuf;
> +			if (!get_tmpname(fnametmp,fname)) {
> +				if (buf) unmap_file(buf);
> +				if (fd1 != -1) close(fd1);
> +				continue;
> +			}
> +			write_flags = O_WRONLY|O_CREAT|O_EXCL;
>  
>  		/* mktemp is deliberately used here instead of mkstemp.
>  		   because O_EXCL is used on the open, the race condition
>  		   is not a problem or a security hole, and we want to
>  		   control the access permissions on the created file. */
> -		if (NULL == do_mktemp(fnametmp)) {
> -			rprintf(FERROR,"mktemp %s failed\n",fnametmp);
> -			receive_data(f_in,buf,-1,NULL,file->length);
> -			if (buf) unmap_file(buf);
> -			if (fd1 != -1) close(fd1);
> -			continue;
> +			if (NULL == do_mktemp(fnametmp)) {
> +				rprintf(FERROR,"mktemp %s failed\n",fnametmp);
> +				receive_data(f_in,buf,-1,NULL,file->length);
> +				if (buf) unmap_file(buf);
> +				if (fd1 != -1) close(fd1);
> +				continue;
> +			}
>  		}
>  
>  		/* we initially set the perms without the
> @@ -428,7 +438,7 @@
>  		   the lchown. Thanks to snabb at epipe.fi for pointing
>  		   this out.  We also set it initially without group
>  		   access because of a similar race condition. */
> -		fd2 = do_open(fnametmp,O_WRONLY|O_CREAT|O_EXCL,
> +		fd2 = do_open(fnametmp,write_flags,
>  			      file->mode & INITACCESSPERMS);
>  
>  		/* in most cases parent directories will already exist
> @@ -436,7 +446,7 @@
>  		   transferred, but that may not be the case with -R */
>  		if (fd2 == -1 && relative_paths && errno == ENOENT && 
>  		    create_directory_path(fnametmp) == 0) {
> -			fd2 = do_open(fnametmp,O_WRONLY|O_CREAT|O_EXCL,
> +			fd2 = do_open(fnametmp,write_flags,
>  				      file->mode & INITACCESSPERMS);
>  		}
>  		if (fd2 == -1) {





More information about the rsync mailing list