[patch] paranoid checksum checking
Nick Burrett
nick at sqrt.co.uk
Wed Jul 27 10:05:17 GMT 2005
Chris Shoemaker wrote:
> On Tue, Jul 26, 2005 at 06:19:08PM +0100, Nick Burrett wrote:
>
>>The attached patch provides an additional check for the checksumming
>>mode to ensure that a file that is actually written out to disk can be
>>read back and has the same MD4 sum as the file on at the originating
>>location.
>
>
> I'm not so sure there's a strong correlation between the group of
> people who want the normal '--checksum' behavior and the people who
> want forced read-back verification. I can think of many cases where
> the user would want each without the other.
>
> I propose that this behavior shouldn't be grouped with "--checksum"
> Instead, perhaps make it a '--verify' or '--re-verify' option. Also,
> this patch needs a corresponding change to rsync.yo.
Thanks for the feedback. I agree with what you're saying since the
--checksum option serves a different purpose, than the actual
verification of data transmitted and stored on disk. The attached patch
incorporates your suggestions, and calls the option '--read-verify'.
Regards,
Nick.
-------------- next part --------------
diff -cp rsync-2.6.6pre1/flist.c rsync-new/flist.c
*** rsync-2.6.6pre1/flist.c 2005-07-07 20:49:14.000000000 +0100
--- rsync-new/flist.c 2005-07-27 10:44:46.000000000 +0100
*************** extern int copy_unsafe_links;
*** 57,62 ****
--- 57,63 ----
extern int protocol_version;
extern int sanitize_paths;
extern int orig_umask;
+ extern int read_verify;
extern struct stats stats;
extern struct file_list *the_file_list;
*************** void send_file_entry(struct file_struct
*** 480,486 ****
}
#endif
! if (always_checksum) {
char *sum;
if (S_ISREG(mode))
sum = file->u.sum;
--- 481,487 ----
}
#endif
! if (always_checksum || read_verify) {
char *sum;
if (S_ISREG(mode))
sum = file->u.sum;
*************** static struct file_struct *receive_file_
*** 618,624 ****
#endif
linkname_len = 0;
! sum_len = always_checksum && S_ISREG(mode) ? MD4_SUM_LENGTH : 0;
alloc_len = file_struct_len + dirname_len + basename_len
+ linkname_len + sum_len;
--- 619,628 ----
#endif
linkname_len = 0;
! if (S_ISREG (mode) && (always_checksum || read_verify))
! sum_len = MD4_SUM_LENGTH;
! else
! sum_len = 0;
alloc_len = file_struct_len + dirname_len + basename_len
+ linkname_len + sum_len;
*************** static struct file_struct *receive_file_
*** 708,714 ****
}
#endif
! if (always_checksum) {
char *sum;
if (sum_len) {
file->u.sum = sum = bp;
--- 712,718 ----
}
#endif
! if (always_checksum || read_verify) {
char *sum;
if (sum_len) {
file->u.sum = sum = bp;
*************** skip_filters:
*** 864,870 ****
linkname_len = 0;
#endif
! sum_len = always_checksum && S_ISREG(st.st_mode) ? MD4_SUM_LENGTH : 0;
alloc_len = file_struct_len + dirname_len + basename_len
+ linkname_len + sum_len;
--- 868,877 ----
linkname_len = 0;
#endif
! if (S_ISREG (st.st_mode) && (always_checksum || read_verify))
! sum_len = MD4_SUM_LENGTH;
! else
! sum_len = 0;
alloc_len = file_struct_len + dirname_len + basename_len
+ linkname_len + sum_len;
*************** static void clean_flist(struct file_list
*** 1567,1579 ****
}
}
-
static void output_flist(struct file_list *flist)
{
! char uidbuf[16], gidbuf[16], depthbuf[16];
struct file_struct *file;
const char *who = who_am_i();
! int i;
for (i = 0; i < flist->count; i++) {
file = flist->files[i];
--- 1574,1585 ----
}
}
static void output_flist(struct file_list *flist)
{
! char uidbuf[16], gidbuf[16], depthbuf[16], csum[SUM_LENGTH * 2 + 1];
struct file_struct *file;
const char *who = who_am_i();
! int i, j;
for (i = 0; i < flist->count; i++) {
file = flist->files[i];
*************** static void output_flist(struct file_lis
*** 1585,1598 ****
sprintf(gidbuf, " gid=%ld", (long)file->gid);
else
*gidbuf = '\0';
if (!am_sender)
sprintf(depthbuf, "%d", file->dir.depth);
! rprintf(FINFO, "[%s] i=%d %s %s%s%s%s mode=0%o len=%.0f%s%s flags=%x\n",
who, i, am_sender ? NS(file->dir.root) : depthbuf,
file->dirname ? safe_fname(file->dirname) : "",
file->dirname ? "/" : "", NS(file->basename),
S_ISDIR(file->mode) ? "/" : "", (int)file->mode,
! (double)file->length, uidbuf, gidbuf, file->flags);
}
}
--- 1591,1610 ----
sprintf(gidbuf, " gid=%ld", (long)file->gid);
else
*gidbuf = '\0';
+ if (read_verify && S_ISREG (file->mode)) {
+ for (j = 0; j < SUM_LENGTH; j++)
+ sprintf(csum + j * 2, "%02x",
+ (uchar) file->u.sum[j]);
+ }
if (!am_sender)
sprintf(depthbuf, "%d", file->dir.depth);
! rprintf(FINFO, "[%s] i=%d %s %s%s%s%s mode=0%o len=%.0f%s%s flags=%x csum=%s\n",
who, i, am_sender ? NS(file->dir.root) : depthbuf,
file->dirname ? safe_fname(file->dirname) : "",
file->dirname ? "/" : "", NS(file->basename),
S_ISDIR(file->mode) ? "/" : "", (int)file->mode,
! (double)file->length, uidbuf, gidbuf, file->flags,
! csum);
}
}
diff -cp rsync-2.6.6pre1/generator.c rsync-new/generator.c
*** rsync-2.6.6pre1/generator.c 2005-06-30 18:03:14.000000000 +0100
--- rsync-new/generator.c 2005-07-27 10:17:20.000000000 +0100
*************** extern dev_t filesystem_dev;
*** 86,91 ****
--- 86,92 ----
extern char *backup_dir;
extern char *backup_suffix;
extern int backup_suffix_len;
+ extern int read_verify;
extern struct file_list *the_file_list;
extern struct filter_list_struct server_filter_list;
diff -cp rsync-2.6.6pre1/options.c rsync-new/options.c
*** rsync-2.6.6pre1/options.c 2005-05-19 09:52:17.000000000 +0100
--- rsync-new/options.c 2005-07-27 10:28:57.000000000 +0100
*************** int checksum_seed = 0;
*** 106,112 ****
int inplace = 0;
int delay_updates = 0;
long block_size = 0; /* "long" because popt can't set an int32. */
!
/** Network address family. **/
#ifdef INET6
--- 106,112 ----
int inplace = 0;
int delay_updates = 0;
long block_size = 0; /* "long" because popt can't set an int32. */
! int read_verify = 0;
/** Network address family. **/
#ifdef INET6
*************** void usage(enum logcode F)
*** 352,357 ****
--- 352,358 ----
rprintf(F," -4, --ipv4 prefer IPv4\n");
rprintf(F," -6, --ipv6 prefer IPv6\n");
#endif
+ rprintf(F," --read-verify perform checksum verification on written files\n");
rprintf(F," --version print version number\n");
rprintf(F," -h, --help show this help screen\n");
*************** static struct poptOption long_options[]
*** 456,462 ****
{"write-batch", 0, POPT_ARG_STRING, &batch_name, OPT_WRITE_BATCH, 0, 0 },
{"only-write-batch", 0, POPT_ARG_STRING, &batch_name, OPT_ONLY_WRITE_BATCH, 0, 0 },
{"files-from", 0, POPT_ARG_STRING, &files_from, 0, 0, 0 },
! {"from0", '0', POPT_ARG_NONE, &eol_nulls, 0, 0, 0},
{"no-implied-dirs", 0, POPT_ARG_VAL, &implied_dirs, 0, 0, 0 },
{"protocol", 0, POPT_ARG_INT, &protocol_version, 0, 0, 0 },
{"checksum-seed", 0, POPT_ARG_INT, &checksum_seed, 0, 0, 0 },
--- 457,463 ----
{"write-batch", 0, POPT_ARG_STRING, &batch_name, OPT_WRITE_BATCH, 0, 0 },
{"only-write-batch", 0, POPT_ARG_STRING, &batch_name, OPT_ONLY_WRITE_BATCH, 0, 0 },
{"files-from", 0, POPT_ARG_STRING, &files_from, 0, 0, 0 },
! {"from0", '0', POPT_ARG_NONE, &eol_nulls, 0, 0, 0 },
{"no-implied-dirs", 0, POPT_ARG_VAL, &implied_dirs, 0, 0, 0 },
{"protocol", 0, POPT_ARG_INT, &protocol_version, 0, 0, 0 },
{"checksum-seed", 0, POPT_ARG_INT, &checksum_seed, 0, 0, 0 },
*************** static struct poptOption long_options[]
*** 464,469 ****
--- 465,471 ----
{"ipv4", '4', POPT_ARG_VAL, &default_af_hint, AF_INET, 0, 0 },
{"ipv6", '6', POPT_ARG_VAL, &default_af_hint, AF_INET6, 0, 0 },
#endif
+ {"read-verify", 0, POPT_ARG_NONE, &read_verify, 0, 0, 0 } ,
/* All these options switch us into daemon-mode option-parsing. */
{"config", 0, POPT_ARG_STRING, 0, OPT_DAEMON, 0, 0 },
{"daemon", 0, POPT_ARG_NONE, 0, OPT_DAEMON, 0, 0 },
*************** void server_options(char **args,int *arg
*** 1480,1485 ****
--- 1482,1490 ----
if (remove_sent_files)
args[ac++] = "--remove-sent-files";
+ if (read_verify)
+ args[ac++] = "--read-verify";
+
*argc = ac;
return;
diff -cp rsync-2.6.6pre1/receiver.c rsync-new/receiver.c
*** rsync-2.6.6pre1/receiver.c 2005-04-14 02:42:13.000000000 +0100
--- rsync-new/receiver.c 2005-07-27 10:48:38.000000000 +0100
*************** extern char *partial_dir;
*** 56,61 ****
--- 56,62 ----
extern char *basis_dir[];
extern struct file_list *the_file_list;
extern struct filter_list_struct server_filter_list;
+ extern int read_verify;
#define SLOT_SIZE (16*1024) /* Desired size in bytes */
#define PER_SLOT_BITS (SLOT_SIZE * 8) /* Number of bits per slot */
*************** int recv_files(int f_in, struct file_lis
*** 649,654 ****
--- 650,667 ----
exit_cleanup(RERR_FILEIO);
}
+ /* Check that the file written to local disk has the same
+ checksum as the file in the originating location. */
+ if (recv_ok && ! am_server && read_verify) {
+ char csum[MD4_SUM_LENGTH + 1];
+ file_checksum (fnametmp, csum, file->length);
+ if (memcmp(csum, file->u.sum, MD4_SUM_LENGTH) != 0) {
+ rprintf (FERROR, "checksum mismatch on %s: does not match remote checksum\n",
+ full_fname (fnametmp));
+ recv_ok = 0;
+ }
+ }
+
if ((recv_ok && (!delay_updates || !partialptr)) || inplace) {
finish_transfer(fname, fnametmp, file, recv_ok, 1);
if (partialptr != fname && fnamecmp == partialptr) {
diff -cp rsync-2.6.6pre1/rsync.yo rsync-new/rsync.yo
*** rsync-2.6.6pre1/rsync.yo 2005-07-07 23:51:08.000000000 +0100
--- rsync-new/rsync.yo 2005-07-27 10:49:27.000000000 +0100
***************
*** 1,5 ****
mailto(rsync-bugs at samba.org)
! manpage(rsync)(1)(7 Jul 2005)()()
manpagename(rsync)(faster, flexible replacement for rcp)
manpagesynopsis()
--- 1,5 ----
mailto(rsync-bugs at samba.org)
! manpage(rsync)(1)(27 Jul 2005)()()
manpagename(rsync)(faster, flexible replacement for rcp)
manpagesynopsis()
*************** to the detailed description below for a
*** 375,380 ****
--- 375,381 ----
--checksum-seed=NUM set block/file checksum seed (advanced)
-4, --ipv4 prefer IPv4
-6, --ipv6 prefer IPv6
+ --read-verify perform checksum verification on written files
--version print version number
-h, --help show this help screen)
*************** explicitly checked on the receiver and a
*** 452,457 ****
--- 453,465 ----
which already exist and have the same checksum and size on the
receiver are not transferred. This option can be quite slow.
+ dit(bf(--read-verify)) Perform a read-verification of files updated or
+ written to the receiver by forcing the sender to checksum all files using
+ a 128-bit MD4 checksum before transfer. Once the file has been written
+ to a temporary file on the receiver, an MD4 checksum of that file will be
+ calculated and compared to the server's sum before overwriting occurs.
+ This option can be quite slow.
+
dit(bf(-a, --archive)) This is equivalent to bf(-rlptgoD). It is a quick
way of saying you want recursion and want to preserve almost
everything. The only exception to this is if bf(--files-from) was
More information about the rsync
mailing list