[patch] paranoid checksum checking

Nick Burrett nick at sqrt.co.uk
Wed Jul 27 10:05:17 GMT 2005


Chris Shoemaker wrote:
> On Tue, Jul 26, 2005 at 06:19:08PM +0100, Nick Burrett wrote:
> 
>>The attached patch provides an additional check for the checksumming 
>>mode to ensure that a file that is actually written out to disk can be 
>>read back and has the same MD4 sum as the file on at the originating 
>>location.
> 
> 
> I'm not so sure there's a strong correlation between the group of
> people who want the normal '--checksum' behavior and the people who
> want forced read-back verification.  I can think of many cases where
> the user would want each without the other.
> 
> I propose that this behavior shouldn't be grouped with "--checksum"
> Instead, perhaps make it a '--verify' or '--re-verify' option.  Also,
> this patch needs a corresponding change to rsync.yo.

Thanks for the feedback.  I agree with what you're saying since the 
--checksum option serves a different purpose, than the actual 
verification of data transmitted and stored on disk.  The attached patch 
incorporates your suggestions, and calls the option '--read-verify'.

Regards,

Nick.
-------------- next part --------------
diff -cp rsync-2.6.6pre1/flist.c rsync-new/flist.c
*** rsync-2.6.6pre1/flist.c	2005-07-07 20:49:14.000000000 +0100
--- rsync-new/flist.c	2005-07-27 10:44:46.000000000 +0100
*************** extern int copy_unsafe_links;
*** 57,62 ****
--- 57,63 ----
  extern int protocol_version;
  extern int sanitize_paths;
  extern int orig_umask;
+ extern int read_verify;
  extern struct stats stats;
  extern struct file_list *the_file_list;
  
*************** void send_file_entry(struct file_struct 
*** 480,486 ****
  	}
  #endif
  
! 	if (always_checksum) {
  		char *sum;
  		if (S_ISREG(mode))
  			sum = file->u.sum;
--- 481,487 ----
  	}
  #endif
  
! 	if (always_checksum || read_verify) {
  		char *sum;
  		if (S_ISREG(mode))
  			sum = file->u.sum;
*************** static struct file_struct *receive_file_
*** 618,624 ****
  #endif
  		linkname_len = 0;
  
! 	sum_len = always_checksum && S_ISREG(mode) ? MD4_SUM_LENGTH : 0;
  
  	alloc_len = file_struct_len + dirname_len + basename_len
  		  + linkname_len + sum_len;
--- 619,628 ----
  #endif
  		linkname_len = 0;
  
!         if (S_ISREG (mode) && (always_checksum || read_verify))
!                 sum_len = MD4_SUM_LENGTH;
!         else
!                 sum_len = 0;
  
  	alloc_len = file_struct_len + dirname_len + basename_len
  		  + linkname_len + sum_len;
*************** static struct file_struct *receive_file_
*** 708,714 ****
  	}
  #endif
  
! 	if (always_checksum) {
  		char *sum;
  		if (sum_len) {
  			file->u.sum = sum = bp;
--- 712,718 ----
  	}
  #endif
  
! 	if (always_checksum || read_verify) {
  		char *sum;
  		if (sum_len) {
  			file->u.sum = sum = bp;
*************** skip_filters:
*** 864,870 ****
  	linkname_len = 0;
  #endif
  
! 	sum_len = always_checksum && S_ISREG(st.st_mode) ? MD4_SUM_LENGTH : 0;
  
  	alloc_len = file_struct_len + dirname_len + basename_len
  	    + linkname_len + sum_len;
--- 868,877 ----
  	linkname_len = 0;
  #endif
  
!         if (S_ISREG (st.st_mode) && (always_checksum || read_verify))
!                 sum_len = MD4_SUM_LENGTH;
!         else
!                 sum_len = 0;
  
  	alloc_len = file_struct_len + dirname_len + basename_len
  	    + linkname_len + sum_len;
*************** static void clean_flist(struct file_list
*** 1567,1579 ****
  	}
  }
  
- 
  static void output_flist(struct file_list *flist)
  {
! 	char uidbuf[16], gidbuf[16], depthbuf[16];
  	struct file_struct *file;
  	const char *who = who_am_i();
! 	int i;
  
  	for (i = 0; i < flist->count; i++) {
  		file = flist->files[i];
--- 1574,1585 ----
  	}
  }
  
  static void output_flist(struct file_list *flist)
  {
! 	char uidbuf[16], gidbuf[16], depthbuf[16], csum[SUM_LENGTH * 2 + 1];
  	struct file_struct *file;
  	const char *who = who_am_i();
! 	int i, j;
  
  	for (i = 0; i < flist->count; i++) {
  		file = flist->files[i];
*************** static void output_flist(struct file_lis
*** 1585,1598 ****
  			sprintf(gidbuf, " gid=%ld", (long)file->gid);
  		else
  			*gidbuf = '\0';
  		if (!am_sender)
  			sprintf(depthbuf, "%d", file->dir.depth);
! 		rprintf(FINFO, "[%s] i=%d %s %s%s%s%s mode=0%o len=%.0f%s%s flags=%x\n",
  			who, i, am_sender ? NS(file->dir.root) : depthbuf,
  			file->dirname ? safe_fname(file->dirname) : "",
  			file->dirname ? "/" : "", NS(file->basename),
  			S_ISDIR(file->mode) ? "/" : "", (int)file->mode,
! 			(double)file->length, uidbuf, gidbuf, file->flags);
  	}
  }
  
--- 1591,1610 ----
  			sprintf(gidbuf, " gid=%ld", (long)file->gid);
  		else
  			*gidbuf = '\0';
+                 if (read_verify && S_ISREG (file->mode)) {
+                         for (j = 0; j < SUM_LENGTH; j++)
+                                 sprintf(csum + j * 2, "%02x",
+                                         (uchar) file->u.sum[j]);
+                 }
  		if (!am_sender)
  			sprintf(depthbuf, "%d", file->dir.depth);
!                         rprintf(FINFO, "[%s] i=%d %s %s%s%s%s mode=0%o len=%.0f%s%s flags=%x csum=%s\n",
  			who, i, am_sender ? NS(file->dir.root) : depthbuf,
  			file->dirname ? safe_fname(file->dirname) : "",
  			file->dirname ? "/" : "", NS(file->basename),
  			S_ISDIR(file->mode) ? "/" : "", (int)file->mode,
! 			(double)file->length, uidbuf, gidbuf, file->flags,
!                         csum);
  	}
  }
  
diff -cp rsync-2.6.6pre1/generator.c rsync-new/generator.c
*** rsync-2.6.6pre1/generator.c	2005-06-30 18:03:14.000000000 +0100
--- rsync-new/generator.c	2005-07-27 10:17:20.000000000 +0100
*************** extern dev_t filesystem_dev;
*** 86,91 ****
--- 86,92 ----
  extern char *backup_dir;
  extern char *backup_suffix;
  extern int backup_suffix_len;
+ extern int read_verify;
  extern struct file_list *the_file_list;
  extern struct filter_list_struct server_filter_list;
  
diff -cp rsync-2.6.6pre1/options.c rsync-new/options.c
*** rsync-2.6.6pre1/options.c	2005-05-19 09:52:17.000000000 +0100
--- rsync-new/options.c	2005-07-27 10:28:57.000000000 +0100
*************** int checksum_seed = 0;
*** 106,112 ****
  int inplace = 0;
  int delay_updates = 0;
  long block_size = 0; /* "long" because popt can't set an int32. */
! 
  
  /** Network address family. **/
  #ifdef INET6
--- 106,112 ----
  int inplace = 0;
  int delay_updates = 0;
  long block_size = 0; /* "long" because popt can't set an int32. */
! int read_verify = 0;
  
  /** Network address family. **/
  #ifdef INET6
*************** void usage(enum logcode F)
*** 352,357 ****
--- 352,358 ----
    rprintf(F," -4, --ipv4                  prefer IPv4\n");
    rprintf(F," -6, --ipv6                  prefer IPv6\n");
  #endif
+   rprintf(F,"     --read-verify           perform checksum verification on written files\n");
    rprintf(F,"     --version               print version number\n");
    rprintf(F," -h, --help                  show this help screen\n");
  
*************** static struct poptOption long_options[] 
*** 456,462 ****
    {"write-batch",      0,  POPT_ARG_STRING, &batch_name, OPT_WRITE_BATCH, 0, 0 },
    {"only-write-batch", 0,  POPT_ARG_STRING, &batch_name, OPT_ONLY_WRITE_BATCH, 0, 0 },
    {"files-from",       0,  POPT_ARG_STRING, &files_from, 0, 0, 0 },
!   {"from0",           '0', POPT_ARG_NONE,   &eol_nulls, 0, 0, 0},
    {"no-implied-dirs",  0,  POPT_ARG_VAL,    &implied_dirs, 0, 0, 0 },
    {"protocol",         0,  POPT_ARG_INT,    &protocol_version, 0, 0, 0 },
    {"checksum-seed",    0,  POPT_ARG_INT,    &checksum_seed, 0, 0, 0 },
--- 457,463 ----
    {"write-batch",      0,  POPT_ARG_STRING, &batch_name, OPT_WRITE_BATCH, 0, 0 },
    {"only-write-batch", 0,  POPT_ARG_STRING, &batch_name, OPT_ONLY_WRITE_BATCH, 0, 0 },
    {"files-from",       0,  POPT_ARG_STRING, &files_from, 0, 0, 0 },
!   {"from0",           '0', POPT_ARG_NONE,   &eol_nulls, 0, 0, 0 },
    {"no-implied-dirs",  0,  POPT_ARG_VAL,    &implied_dirs, 0, 0, 0 },
    {"protocol",         0,  POPT_ARG_INT,    &protocol_version, 0, 0, 0 },
    {"checksum-seed",    0,  POPT_ARG_INT,    &checksum_seed, 0, 0, 0 },
*************** static struct poptOption long_options[] 
*** 464,469 ****
--- 465,471 ----
    {"ipv4",            '4', POPT_ARG_VAL,    &default_af_hint, AF_INET, 0, 0 },
    {"ipv6",            '6', POPT_ARG_VAL,    &default_af_hint, AF_INET6, 0, 0 },
  #endif
+   {"read-verify",      0,  POPT_ARG_NONE,   &read_verify, 0, 0, 0 } ,
    /* All these options switch us into daemon-mode option-parsing. */
    {"config",           0,  POPT_ARG_STRING, 0, OPT_DAEMON, 0, 0 },
    {"daemon",           0,  POPT_ARG_NONE,   0, OPT_DAEMON, 0, 0 },
*************** void server_options(char **args,int *arg
*** 1480,1485 ****
--- 1482,1490 ----
  	if (remove_sent_files)
  		args[ac++] = "--remove-sent-files";
  
+         if (read_verify)
+                 args[ac++] = "--read-verify";
+ 
  	*argc = ac;
  	return;
  
diff -cp rsync-2.6.6pre1/receiver.c rsync-new/receiver.c
*** rsync-2.6.6pre1/receiver.c	2005-04-14 02:42:13.000000000 +0100
--- rsync-new/receiver.c	2005-07-27 10:48:38.000000000 +0100
*************** extern char *partial_dir;
*** 56,61 ****
--- 56,62 ----
  extern char *basis_dir[];
  extern struct file_list *the_file_list;
  extern struct filter_list_struct server_filter_list;
+ extern int read_verify;
  
  #define SLOT_SIZE	(16*1024)	/* Desired size in bytes */
  #define PER_SLOT_BITS	(SLOT_SIZE * 8) /* Number of bits per slot */
*************** int recv_files(int f_in, struct file_lis
*** 649,654 ****
--- 650,667 ----
  			exit_cleanup(RERR_FILEIO);
  		}
  
+                 /* Check that the file written to local disk has the same
+                    checksum as the file in the originating location.  */
+                 if (recv_ok && ! am_server && read_verify) {
+                         char csum[MD4_SUM_LENGTH + 1];
+                         file_checksum (fnametmp, csum, file->length);
+                         if (memcmp(csum, file->u.sum, MD4_SUM_LENGTH) != 0) {
+                                 rprintf (FERROR, "checksum mismatch on %s: does not match remote checksum\n",
+                                          full_fname (fnametmp));
+                                 recv_ok = 0;
+                         }
+                 }
+ 
  		if ((recv_ok && (!delay_updates || !partialptr)) || inplace) {
  			finish_transfer(fname, fnametmp, file, recv_ok, 1);
  			if (partialptr != fname && fnamecmp == partialptr) {
diff -cp rsync-2.6.6pre1/rsync.yo rsync-new/rsync.yo
*** rsync-2.6.6pre1/rsync.yo	2005-07-07 23:51:08.000000000 +0100
--- rsync-new/rsync.yo	2005-07-27 10:49:27.000000000 +0100
***************
*** 1,5 ****
  mailto(rsync-bugs at samba.org)
! manpage(rsync)(1)(7 Jul 2005)()()
  manpagename(rsync)(faster, flexible replacement for rcp)
  manpagesynopsis()
  
--- 1,5 ----
  mailto(rsync-bugs at samba.org)
! manpage(rsync)(1)(27 Jul 2005)()()
  manpagename(rsync)(faster, flexible replacement for rcp)
  manpagesynopsis()
  
*************** to the detailed description below for a 
*** 375,380 ****
--- 375,381 ----
       --checksum-seed=NUM     set block/file checksum seed (advanced)
   -4, --ipv4                  prefer IPv4
   -6, --ipv6                  prefer IPv6
+      --read-verify           perform checksum verification on written files
       --version               print version number
   -h, --help                  show this help screen)
  
*************** explicitly checked on the receiver and a
*** 452,457 ****
--- 453,465 ----
  which already exist and have the same checksum and size on the
  receiver are not transferred.  This option can be quite slow.
  
+ dit(bf(--read-verify)) Perform a read-verification of files updated or
+ written to the receiver by forcing the sender to checksum all files using
+ a 128-bit MD4 checksum before transfer. Once the file has been written
+ to a temporary file on the receiver, an MD4 checksum of that file will be
+ calculated and compared to the server's sum before overwriting occurs.
+ This option can be quite slow.
+ 
  dit(bf(-a, --archive)) This is equivalent to bf(-rlptgoD). It is a quick
  way of saying you want recursion and want to preserve almost
  everything.  The only exception to this is if bf(--files-from) was


More information about the rsync mailing list