PATCH: better handling for write failures (disk full)

John R. LoVerso john at loverso.southborough.ma.us
Fri May 23 00:37:07 EST 2003


[I sent this the other day, but it never got approved for the list]

I've been having problems trying to sync two small partitions (128MB)
that are usually near being full.  The rsync would fail with this
cryptic error:

rsync: writefd_unbuffered failed to write 4 bytes: phase "unknown": Broken pipe
rsync error: error in rsync protocol data stream (code 12) at io.c(515)

It ends up that if rsync gets a write error while transferring the file
(such as is caused when you fill up a partition), it just fails.

That's because the code in receive_data() just calls exit_cleanup()
upon a write error, which bombs out the receiver.  For whatever
reason, the sender doesn't handle this gracefully, and in turn aborts.

People have "worked around" this problem by using "-T", putting the
received file in temp space (assuming it's big enough).  That allows
the receive to complete without error (avoiding the abort).  Then
finish_transfer() will copy the file from the temp space, overwriting
the destination file using copy_file(), which usually avoids filling
the destination partition.  Thus, using "-T" is the only way to
transfer into a nearly full partition.

However, there is one other problem: if copy_file() fills up the
destintation partition, it will fail.  At least it gives a valid
error message with "-vv" and doesn't abort the whole sync:

renaming /var/tmp/.x1.OApztf to dest/x1
write dest/x1: No space left on device
copy /var/tmp/.x1.OApztf -> dest/x1 : No space left on device

However, in this case, it never removes the truncated destination file,
leaving the destination partition full (and guaranteeing that nothing
else will transfer).

In my case, if it fills the small partition, I don't want the
partially transferred file around.  If I did, I'd have specified
the "--partial" option.


Here are two patches ("these work for me, YMMV"):

receiver.c:	upon a write error, discard the rest of the
		current file transfer and keep working. don't give up.
		do generate a useful error message.
rsync.c:	if using -T but not --partial, remove a partial result
		when a write error occurs

Perhaps the changes in receive_data() could specifically just target
ENOSPC, on the assumption that any other write error is fatal.

I'm also using John Van Essen's write_file() patch from:
	http://lists.samba.org/pipermail/rsync/2003-April/010511.html

John


diff -Nru a/rsync/receiver.c b/rsync/receiver.c
--- a/rsync/receiver.c	Tue May 20 08:56:43 2003
+++ b/rsync/receiver.c	Tue May 20 08:56:43 2003
@@ -214,6 +214,7 @@
 	static char file_sum1[MD4_SUM_LENGTH];
 	static char file_sum2[MD4_SUM_LENGTH];
 	char *map=NULL;
+	int discard = 0;
 	
 	count = read_int(f_in);
 	n = read_int(f_in);
@@ -240,7 +241,9 @@
 
 			if (fd != -1 && write_file(fd,data,i) != i) {
 				rprintf(FERROR,"write failed on %s : %s\n",fname,strerror(errno));
-				exit_cleanup(RERR_FILEIO);
+				discard = 1;
+				fd = -1;
+				// exit_cleanup(RERR_FILEIO);
 			}
 			offset += i;
 			continue;
@@ -268,7 +271,9 @@
 		if (fd != -1 && write_file(fd,map,len) != (int) len) {
 			rprintf(FERROR,"write failed on %s : %s\n",
 				fname,strerror(errno));
-			exit_cleanup(RERR_FILEIO);
+			discard = 1;
+			fd = -1;
+			// exit_cleanup(RERR_FILEIO);
 		}
 		offset += len;
 	}
@@ -278,7 +283,9 @@
 	if (fd != -1 && offset > 0 && sparse_end(fd) != 0) {
 		rprintf(FERROR,"write failed on %s : %s\n",
 			fname,strerror(errno));
-		exit_cleanup(RERR_FILEIO);
+		discard = 1;
+		fd = -1;
+		// exit_cleanup(RERR_FILEIO);
 	}
 
 	sum_end(file_sum1);
@@ -293,6 +300,8 @@
 			return 0;
 		}
 	}
+	if (discard)
+		return 2;
 	return 1;
 }
 
@@ -458,6 +467,16 @@
 			close(fd1);
 		}
 		close(fd2);
+
+	    /*
+	     * This means a write error occured, and the file is discarded
+	     */
+	    if (recv_ok == 2) {
+		if (verbose > 2)
+			rprintf(FINFO,"discarding %s\n",fname);
+		do_unlink(fnametmp);
+		cleanup_disable();
+	    } else {
 		
 		if (verbose > 2)
 			rprintf(FINFO,"renaming %s to %s\n",fnametmp,fname);
@@ -476,6 +495,7 @@
 				write_int(f_gen,i);
 			}
 		}
+	    }
 	}
 
 	if (delete_after) {
diff -Nru a/rsync/rsync.c b/rsync/rsync.c
--- a/rsync/rsync.c	Tue May 20 08:56:43 2003
+++ b/rsync/rsync.c	Tue May 20 08:56:43 2003
@@ -243,8 +243,14 @@
 			/* rename failed on cross-filesystem link.  
 			   Copy the file instead. */
 			if (copy_file(fnametmp,fname, file->mode & INITACCESSPERMS)) {
-				rprintf(FERROR,"copy %s -> %s : %s\n",
+				int err = errno;
+				extern int keep_partial;
+				rprintf(FERROR,"error copy %s -> %s : %s\n",
 					fnametmp,fname,strerror(errno));
+				/* remove partial result if disk full */
+				if (err == ENOSPC && !keep_partial) {
+					(void)unlink(fname);
+				}
 			} else {
 				set_perms(fname,file,NULL,0);
 			}




More information about the rsync mailing list