scan for first existing hard-link file

Wayne Davison wayned at samba.org
Sun Jan 25 10:05:21 GMT 2004


Here's a patch that makes rsync try to find an existing file in a group
of hard-linked files so that it doesn't create the first one in the
group from scratch if a later file could be used instead.

Details:  I decided to avoid having the code do an extra scan down the
list when we encounter the lead file in the list.  This is because it
would be bad to have to do the same scan in the receiver that the
generator just performed, especially since there's no guarantee that it
will get identical results (if a file pops up at the wrong moment).  My
solution just keeps moving the master file in the group down the list,
causing it to be processed in turn as we go through the normal flist
scan.  This ensures that we use the right basis file in the receiver,
and keeps the code simple.  The only complicating factor was that the
hard-link post-processing pass was being done by the receiver, while the
generator is the one that keeps track of the updated master.  To deal
with this, I moved the hard-link post-processing loop and the final
touch-up of the directory permissions into the final work that the
generator does after it gets the "end of phase 2" indicator from the
receiver.

Some simple changes to the hard-link data structures was needed.  I
got rid of the "head" pointer, replacing it with an index into the
hlink_list array.  This lets us update this "first item" pointer to
point to the current master.  I then made the single-linked list of
hard-linked items circular, and added a flag to mark the last item in
the original list (so we know when to give up our search and just ask
for the file to be created).

..wayne..
-------------- next part --------------
--- backup.c	20 Jan 2004 05:24:07 -0000	1.19
+++ backup.c	25 Jan 2004 09:54:32 -0000
@@ -25,6 +25,7 @@ extern int backup_suffix_len;
 extern int backup_dir_len;
 extern char *backup_suffix;
 extern char *backup_dir;
+extern struct file_struct **hlink_list;
 
 extern int am_root;
 extern int preserve_devices;
@@ -263,7 +264,7 @@ static int keep_backup(char *fname)
 	}
 #endif
 	if (!kept && preserve_hard_links && file->link_u.links
-	    && file->F_HEAD != file) {
+	    && hlink_list[file->F_INDEX] != file) {
 		if (verbose > 1)
 			rprintf(FINFO, "%s is a hard link\n", f_name(file));
 	}
--- generator.c	22 Jan 2004 04:38:18 -0000	1.72
+++ generator.c	25 Jan 2004 09:56:10 -0000
@@ -43,6 +43,7 @@ extern int protocol_version;
 extern int always_checksum;
 extern char *compare_dest;
 extern int link_dest;
+extern struct file_struct **hlink_list;
 
 
 /* choose whether to skip a particular file */
@@ -397,7 +398,8 @@ void recv_generator(char *fname, struct 
 	}
 #endif
 
-	if (preserve_hard_links && file->link_u.links && file->F_HEAD != file) {
+	if (preserve_hard_links && file->link_u.links
+	    && hlink_list[file->F_INDEX] != file) {
 		if (verbose > 1) {
 			rprintf(FINFO, "recv_generator: \"%s\" is a hard link\n",
 				f_name(file));
@@ -438,6 +440,11 @@ void recv_generator(char *fname, struct 
 	}
 
 	if (statret == -1) {
+		if (preserve_hard_links && file->link_u.links
+		    && !(file->flags & FLAG_HLINK_LAST)) {
+			hlink_list[file->F_INDEX] = file->F_NEXT;
+			return;
+		}
 		if (errno == ENOENT) {
 			write_int(f_out,i);
 			if (!dry_run) write_sum_head(f_out, NULL);
@@ -455,6 +462,11 @@ void recv_generator(char *fname, struct 
 		}
 
 		/* now pretend the file didn't exist */
+		if (preserve_hard_links && file->link_u.links
+		    && !(file->flags & FLAG_HLINK_LAST)) {
+			hlink_list[file->F_INDEX] = file->F_NEXT;
+			return;
+		}
 		write_int(f_out,i);
 		if (!dry_run) write_sum_head(f_out, NULL);
 		return;
@@ -496,6 +508,11 @@ void recv_generator(char *fname, struct 
 		rprintf(FERROR, "failed to open %s, continuing: %s\n",
 			full_fname(fnamecmp), strerror(errno));
 		/* pretend the file didn't exist */
+		if (preserve_hard_links && file->link_u.links
+		    && !(file->flags & FLAG_HLINK_LAST)) {
+			hlink_list[file->F_INDEX] = file->F_NEXT;
+			return;
+		}
 		write_int(f_out,i);
 		write_sum_head(f_out, NULL);
 		return;
--- hlink.c	22 Jan 2004 04:38:18 -0000	1.30
+++ hlink.c	25 Jan 2004 09:22:23 -0000
@@ -38,8 +38,8 @@ static int hlink_compare(struct file_str
 	return file_compare(file1, file2);
 }
 
-static struct file_struct **hlink_list;
-static int hlink_count;
+struct file_struct **hlink_list;
+int hlink_count;
 
 #define LINKED(p1,p2) ((p1)->F_DEV == (p2)->F_DEV \
 		    && (p1)->F_INODE == (p2)->F_INODE)
@@ -56,13 +56,14 @@ static void link_idev_data(void)
 		head = hlink_list[start];
 		while (from < hlink_count-1
 		    && LINKED(hlink_list[from], hlink_list[from+1])) {
-			hlink_list[from]->F_HEAD = head;
+			hlink_list[from]->F_INDEX = to;
 			hlink_list[from]->F_NEXT = hlink_list[from+1];
 			from++;
 		}
 		if (from > start) {
-			hlink_list[from]->F_HEAD = head;
-			hlink_list[from]->F_NEXT = NULL;
+			hlink_list[from]->F_INDEX = to;
+			hlink_list[from]->F_NEXT = head;
+			hlink_list[from]->flags |= FLAG_HLINK_LAST;
 			hlink_list[to++] = head;
 		} else {
 			free((char*)head->link_u.idev);
@@ -136,7 +137,7 @@ static void hard_link_one(char *hlink1, 
 void do_hard_links(void)
 {
 #if SUPPORT_HARD_LINKS
-	struct file_struct *file;
+	struct file_struct *file, *first;
 	char hlink1[MAXPATHLEN];
 	char *hlink2;
 	STRUCT_STAT st1, st2;
@@ -146,10 +147,10 @@ void do_hard_links(void)
 		return;
 
 	for (i = 0; i < hlink_count; i++) {
-		file = hlink_list[i];
-		if (link_stat(f_name_to(file, hlink1), &st1) != 0)
+		first = file = hlink_list[i];
+		if (link_stat(f_name_to(first, hlink1), &st1) != 0)
 			continue;
-		while ((file = file->F_NEXT) != NULL) {
+		while ((file = file->F_NEXT) != first) {
 			hlink2 = f_name(file);
 			if (link_stat(hlink2, &st2) == 0) {
 				if (st2.st_dev == st1.st_dev
--- main.c	20 Jan 2004 17:46:31 -0000	1.183
+++ main.c	25 Jan 2004 09:19:45 -0000
@@ -409,6 +409,7 @@ static void do_server_sender(int f_in, i
 
 static int do_recv(int f_in,int f_out,struct file_list *flist,char *local_name)
 {
+	int i;
 	int pid;
 	int status=0;
 	int error_pipe[2];
@@ -463,6 +464,19 @@ static int do_recv(int f_in,int f_out,st
 	generate_files(f_out, flist, local_name);
 
 	get_redo_num(); /* Read final -1, and any prior messages. */
+
+	if (preserve_hard_links)
+		do_hard_links();
+
+	/* now we need to fix any directory permissions that were
+	 * modified during the transfer */
+	for (i = 0; i < flist->count; i++) {
+		struct file_struct *file = flist->files[i];
+		if (!file->basename || !S_ISDIR(file->mode)) continue;
+		recv_generator(local_name ? local_name : f_name(file),
+			       file, i, -1);
+	}
+
 	io_flush(FULL_FLUSH);
 	if (protocol_version >= 24) {
 		/* send a final goodbye message */
--- receiver.c	24 Jan 2004 22:12:58 -0000	1.67
+++ receiver.c	25 Jan 2004 09:23:56 -0000
@@ -479,18 +479,6 @@ int recv_files(int f_in,struct file_list
 	    && flist->count > 0)
 		delete_files(flist);
 
-	if (preserve_hard_links)
-		do_hard_links();
-
-	/* now we need to fix any directory permissions that were
-	 * modified during the transfer */
-	for (i = 0; i < flist->count; i++) {
-		file = flist->files[i];
-		if (!file->basename || !S_ISDIR(file->mode)) continue;
-		recv_generator(local_name ? local_name : f_name_to(file, fbuf),
-			       file, i, -1);
-	}
-
 	if (verbose > 2)
 		rprintf(FINFO,"recv_files finished\n");
 
--- rsync.h	22 Jan 2004 18:37:26 -0000	1.174
+++ rsync.h	25 Jan 2004 09:09:07 -0000
@@ -58,6 +58,11 @@
 /* What flags above are relevant after the transfer of the flist? */
 #define LIVE_FLAGS FLAG_DELETE
 
+/* These flist flags can be set after the flist is transferred. */
+
+/*#define FLAG_DELETE (1<<0) -- from the above list */
+#define FLAG_HLINK_LAST (1<<1)
+
 /* update this if you make incompatible changes */
 #define PROTOCOL_VERSION 28
 
@@ -384,7 +389,7 @@ enum msgcode {
 #define GID_NONE (gid_t) -1
 
 struct hlink {
-	struct file_struct *head;
+	int index;
 	struct file_struct *next;
 };
 
@@ -396,7 +401,7 @@ struct idev {
 #define F_DEV	link_u.idev->dev
 #define F_INODE	link_u.idev->inode
 
-#define F_HEAD	link_u.links->head
+#define F_INDEX	link_u.links->index
 #define F_NEXT	link_u.links->next
 
 struct file_struct {


More information about the rsync mailing list