TODO hardlink performance optimizations
jw schultz
jw at pegasys.ws
Sun Jan 4 13:30:03 GMT 2004
On Sun, Jan 04, 2004 at 06:35:03AM -0600, John Van Essen wrote:
> Lester,
>
> You articulated your situation clear enough for me. Thanks.
>
> I'll address your issue about when rsync is running locally for /vol/N
> to /vol/N_mirror syncing, it exhausts all of the RAM and swap.
>
> If you haven't read jw schultz's "How Rsync Works" page, here the link:
>
> http://www.pegasys.ws/how-rsync-works.html
>
> The sender, receiver, and generator each have a full copy of the file
> list (each file's entry uses 100 bytes on average).
>
> Additonally, the --hard-links option creates yet *another* full copy of
> the file list in the receiver, so that's even more memory consumed.
>
> So you are in a world o' hurt rsyncing an entire /vol/N internally
> with --hard-links, since there will be FOUR copies of the file list.
>
> I'd suggest breaking the /vol/N rsync up into separate rsyncs for each
> of the maxdepth 1 hierarchies. If I understand your situation correctly,
> all hard link groups are self contained within each of those hierarchies
> so you will be OK.
>
> I've modified hlink.c to use a list of file struct pointers instead of
> copies of the actual file structs themselves, so that will save memory.
> I'll submit that patch for review in a day or two after I've tested it.
I've just done the same. It reduces the memory requirements
of the hlink list to 1/18th. It is also somewhat faster to
build that way because we don't have to walk the list.
If we built the hlink_list one element at a time the way we
do the file_list only putting those files that we might link
in it it would be smaller but building it would be slower.
I've only done a little testing but it seems to be working
and warnings about theory v. practice aside it should be
good.
===================================================================
RCS file: /data/cvs/rsync/hlink.c,v
retrieving revision 1.23
diff -p -u -r1.23 hlink.c
--- hlink.c 2 Jan 2004 07:34:49 -0000 1.23
+++ hlink.c 4 Jan 2004 13:21:14 -0000
@@ -24,45 +24,43 @@ extern int dry_run;
extern int verbose;
#if SUPPORT_HARD_LINKS
-static int hlink_compare(struct file_struct *f1, struct file_struct *f2)
+static int hlink_compare(struct file_struct **f1, struct file_struct **f2)
{
- if (!S_ISREG(f1->mode) && !S_ISREG(f2->mode))
+ if (!S_ISREG((*f1)->mode) && !S_ISREG((*f2)->mode))
return 0;
- if (!S_ISREG(f1->mode))
+ if (!S_ISREG((*f1)->mode))
return -1;
- if (!S_ISREG(f2->mode))
+ if (!S_ISREG((*f2)->mode))
return 1;
- if (f1->dev != f2->dev)
- return (int) (f1->dev > f2->dev ? 1 : -1);
+ if ((*f1)->dev != (*f2)->dev)
+ return (int) ((*f1)->dev > (*f2)->dev ? 1 : -1);
- if (f1->inode != f2->inode)
- return (int) (f1->inode > f2->inode ? 1 : -1);
+ if ((*f1)->inode != (*f2)->inode)
+ return (int) ((*f1)->inode > (*f2)->inode ? 1 : -1);
- return file_compare(&f1, &f2);
+
+ return file_compare(f1, f2);
}
-static struct file_struct *hlink_list;
+static struct file_struct **hlink_list;
static int hlink_count;
#endif
void init_hard_links(struct file_list *flist)
{
#if SUPPORT_HARD_LINKS
- int i;
if (flist->count < 2)
return;
if (hlink_list)
free(hlink_list);
- if (!(hlink_list = new_array(struct file_struct, flist->count)))
+ if (!(hlink_list = new_array(struct file_struct *, flist->count)))
out_of_memory("init_hard_links");
-
- for (i = 0; i < flist->count; i++)
- memcpy(&hlink_list[i], flist->files[i],
- sizeof(hlink_list[0]));
+
+ memcpy(hlink_list, flist->files, sizeof(hlink_list[0]) * flist->count);
qsort(hlink_list, flist->count,
sizeof(hlink_list[0]), (int (*)()) hlink_compare);
@@ -84,7 +82,7 @@ int check_hard_link(struct file_struct *
while (low != high) {
int mid = (low + high) / 2;
- ret = hlink_compare(&hlink_list[mid], file);
+ ret = hlink_compare(&hlink_list[mid], &file);
if (ret == 0) {
low = mid;
break;
@@ -97,16 +95,16 @@ int check_hard_link(struct file_struct *
/* XXX: To me this looks kind of dodgy -- why do we use [low]
* here and [low-1] below? -- mbp */
- if (hlink_compare(&hlink_list[low], file) != 0)
+ if (hlink_compare(&hlink_list[low], &file) != 0)
return 0;
if (low > 0 &&
- S_ISREG(hlink_list[low - 1].mode) &&
- file->dev == hlink_list[low - 1].dev &&
- file->inode == hlink_list[low - 1].inode) {
+ S_ISREG(hlink_list[low - 1]->mode) &&
+ file->dev == hlink_list[low - 1]->dev &&
+ file->inode == hlink_list[low - 1]->inode) {
if (verbose >= 2) {
rprintf(FINFO, "check_hard_link: \"%s\" is a hard link to file %d, \"%s\"\n",
- f_name(file), low-1, f_name(&hlink_list[low-1]));
+ f_name(file), low-1, f_name(hlink_list[low-1]));
}
return 1;
}
@@ -120,12 +118,12 @@ int check_hard_link(struct file_struct *
static void hard_link_one(int i)
{
STRUCT_STAT st1, st2;
- char *hlink2, *hlink1 = f_name(&hlink_list[i - 1]);
+ char *hlink2, *hlink1 = f_name(hlink_list[i - 1]);
if (link_stat(hlink1, &st1) != 0)
return;
- hlink2 = f_name(&hlink_list[i]);
+ hlink2 = f_name(hlink_list[i]);
if (link_stat(hlink2, &st2) != 0) {
if (do_link(hlink1, hlink2)) {
if (verbose > 0) {
@@ -166,11 +164,11 @@ void do_hard_links(void)
return;
for (i = 1; i < hlink_count; i++) {
- if (S_ISREG(hlink_list[i].mode) &&
- S_ISREG(hlink_list[i - 1].mode) &&
- hlink_list[i].basename && hlink_list[i - 1].basename &&
- hlink_list[i].dev == hlink_list[i - 1].dev &&
- hlink_list[i].inode == hlink_list[i - 1].inode) {
+ if (S_ISREG(hlink_list[i]->mode) &&
+ S_ISREG(hlink_list[i - 1]->mode) &&
+ hlink_list[i]->basename && hlink_list[i - 1]->basename &&
+ hlink_list[i]->dev == hlink_list[i - 1]->dev &&
+ hlink_list[i]->inode == hlink_list[i - 1]->inode) {
hard_link_one(i);
}
}
More information about the rsync
mailing list