Feature request: preallocation of directories
Theodore Ts'o
tytso at mit.edu
Mon Sep 1 13:33:18 GMT 2008
Hi there,
One of the things that I've been doing for fun is to try to speed up
ext4's fsck time. As you can see here:
http://thunk.org/tytso/blog/2008/08/08/fast-ext4-fsck-times/
Fsck'ing an ext4 filesystem can be between 6-8 times after than the
equivalent file hierarchy on ext4. In order to speed it up further, the
main place to look is in pass #2, where the directory is scanned, and in
order to do that, I need to address the problem of directory
fragmentation.
In the long run we probably should have an appropriate kernel interface
to support this (probably via the already-existing fallocate interface,
applied to directories, or an extension to the mkdir system call which
gives the anticipated size of the directory), but in the meantime,
userspace can expand a directory simply by creating some zero-length,
large filenames, and then deleting them. But in order to do that,
programs like cpio, tar and rsync (regardless of the short-term
create-large- files-hack or some mkdir extension), need to be involved
since they know how big the filesystem will likely be.
So what I am interested in trying to prototype is a new option to rsync
which, if it needs to create a directory, will take the size of the
directory on the source and fill it with dummy files with large
filenames (which are then deleted after the directory is expanded). I
have a program which does this separately, but while this makes the
makes the directories nicely contiguous on disk, the block
allocation suffers, since the directories aren't created in the correct
block groups given that they are created first, instead of as the
files are copied over.
Unfortunately, being new to rsync's source code, it's not clear to me
how is the best way to add this feature. I was hoping that some rsync
hacker with some free time would either take a tiny bit of code for the
attached copy_dirstruct.c program and add it to rsync in the appropriate
place, or give me some tips about how to add it. I *think* it is in
generator.c, but recv_generator is complicated enough that I'm not
entirely confident how to add it without breaking some other option, or
some part of its functioning.
- Ted
/*
* copy_dirstruct.c - Copy a directory structure
*
* Compile using: cc -o copy_dirstruct copy_dirstruct -le2p -lcom_err
*
* Copyright (C) 2008 Theodore Ts'o <tytso at mit.edu>
*
* This file can be redistributed under the terms of the GNU General
* Public License.
*/
#include <sys/types.h>
#include <dirent.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <errno.h>
#include <sys/param.h>
#include <sys/stat.h>
#include "ext2fs/ext2_fs.h"
#ifdef __GNUC__
#define EXT2FS_ATTR(x) __attribute__(x)
#else
#define EXT2FS_ATTR(x)
#endif
#ifndef S_ISLNK /* So we can compile even with gcc-warn */
# ifdef __S_IFLNK
# define S_ISLNK(mode) __S_ISTYPE((mode), __S_IFLNK)
# else
# define S_ISLNK(mode) 0
# endif
#endif
#include "et/com_err.h"
#include "e2p/e2p.h"
static const char * program_name = "copy_dirstruct";
#ifdef _LFS64_LARGEFILE
#define LSTAT lstat64
#define STRUCT_STAT struct stat64
#else
#define LSTAT lstat
#define STRUCT_STAT struct stat
#endif
const char *dest_dir = "/mnt";
const char *src_dir = "/";
int blocksize = 4096;
int dest_len = 4;
int silent = 0;
dev_t xdev;
static void usage(void)
{
fprintf(stderr,
"Usage: %s [-RVf] [-+=AacDdijsSu] [-v version] files...\n",
program_name);
exit(1);
}
static int copy_dir_proc(const char *, struct dirent *, void *);
static int copy_dir(const char * name)
{
unsigned long flags;
struct stat st;
char *newname;
off_t size;
char fill[250];
int i, l, fd, num;
int recurse;
if (LSTAT (name, &st) == -1) {
if (!silent)
com_err (program_name, errno,
"while trying to stat %s", name);
return -1;
}
if (!S_ISDIR(st.st_mode))
return 0;
recurse = (st.st_dev == xdev);
newname = malloc(strlen(name) + dest_len + 2);
if (!newname) {
if (!silent)
com_err(program_name, 0, "couldn't allocate newname");
return -1;
}
sprintf(newname, "%s/%s", dest_dir, name);
size = st.st_size;
if (mkdir(newname, st.st_mode) < 0) {
if (errno == EEXIST) {
if (stat(newname, &st) == 0 &&
S_ISDIR(st.st_mode))
goto dir_ok;
errno = EEXIST;
}
if (!silent)
com_err(program_name, errno,
"while trying to mkdir %s", name);
return -1;
}
dir_ok:
if (st.st_size == blocksize)
goto done;
st.st_size = 0;
num = 0;
if (chdir(newname) < 0) {
com_err(program_name, errno, "while trying to chdir to %s",
newname);
return -1;
}
while (st.st_size < size) {
l = sprintf(fill, "Fill-%08x", num++);
memset(fill+l, '=', sizeof(fill)-l-1);
fill[sizeof(fill)-1] = 0;
fd = open(fill, O_CREAT|O_WRONLY, 0600);
if (fd < 0) {
com_err(program_name, errno,
"while trying to create %s", fill);
break;
}
close(fd);
if (stat(".", &st) < 0) {
com_err(program_name, errno,
"while trying to stat '.' in file create loop");
break;
}
}
for (i=0; i < num; i++) {
l = sprintf(fill, "Fill-%08x", i);
memset(fill+l, '=', sizeof(fill)-l-1);
fill[sizeof(fill)-1] = 0;
if (unlink(fill) < 0)
com_err(program_name, errno,
"while trying to unlink %s", fill);
}
done:
if (recurse)
return iterate_on_dir(name, copy_dir_proc, NULL);
return 0;
}
static int copy_dir_proc(const char * dir_name, struct dirent * de,
void * private)
{
int ret = 0;
if (strcmp (de->d_name, ".") && strcmp (de->d_name, "..")) {
char *path;
path = malloc(strlen (dir_name) + 1 + strlen (de->d_name) + 1);
if (!path) {
fprintf(stderr, "Couldn't allocate path variable "
"in copy_dir_proc");
return -1;
}
sprintf(path, "%s/%s", dir_name, de->d_name);
ret = copy_dir(path);
free(path);
}
return ret;
}
int main (int argc, char ** argv)
{
struct stat st;
int ret;
if (stat(src_dir, &st) < 0) {
com_err(program_name, errno, "while trying to stat %s",
src_dir);
exit(1);
}
xdev = st.st_dev;
copy_dir(src_dir);
}
More information about the rsync
mailing list