[PATCH] add pread operation to vfs layer
James Peach
jpeach at sgi.com
Tue Dec 16 04:38:05 GMT 2003
Hi all,
The following diff adds pread/pwrite operations to the VFS layer and makes
use of them in the I/O path. This corresponds to bugzilla bug #889.
There is not much performance advanage additional to using spinlocks (a
consistent 2% - 7% increase in throughput), but there is a significant
benefit to using p{read,write} with fcntl tdb locking (5% - 30% increase in
throughput). The largest improvements are in high packet rate workloads (ie,
small blocks sizes and meta-data workloads), as you might expect. I can
provide detailed numbers if that would be useful.
For systems without pread/pwrite (are there many of these?), the
p{read,write} operations are emulated by a lseek/lseek/{read,write}/lseek
sequence. The file I/O path for these systems continues to use the old
lseek/read code.
cheers
--
James Peach | jpeach at sgi.com
Index: samba/source/include/smbprofile.h
===================================================================
RCS file: /cvsroot/samba/source/include/smbprofile.h,v
retrieving revision 1.4.2.2
diff -u -r1.4.2.2 smbprofile.h
--- samba/source/include/smbprofile.h 12 May 2003 01:20:12 -0000 1.4.2.2
+++ samba/source/include/smbprofile.h 16 Dec 2003 04:27:25 -0000
@@ -34,7 +34,7 @@
#define PROF_SHMEM_KEY ((key_t)0x07021999)
#define PROF_SHM_MAGIC 0x6349985
-#define PROF_SHM_VERSION 7
+#define PROF_SHM_VERSION 8
/* time values in the following structure are in microseconds */
@@ -60,9 +60,15 @@
unsigned syscall_read_count;
unsigned syscall_read_time;
unsigned syscall_read_bytes; /* bytes read with read syscall */
+ unsigned syscall_pread_count;
+ unsigned syscall_pread_time;
+ unsigned syscall_pread_bytes; /* bytes read with pread syscall */
unsigned syscall_write_count;
unsigned syscall_write_time;
unsigned syscall_write_bytes; /* bytes written with write syscall */
+ unsigned syscall_pwrite_count;
+ unsigned syscall_pwrite_time;
+ unsigned syscall_pwrite_bytes; /* bytes written with pwrite syscall */
unsigned syscall_lseek_count;
unsigned syscall_lseek_time;
unsigned syscall_sendfile_count;
Index: samba/source/include/vfs.h
===================================================================
RCS file: /cvsroot/samba/source/include/vfs.h,v
retrieving revision 1.25.2.10
diff -u -r1.25.2.10 vfs.h
--- samba/source/include/vfs.h 7 Aug 2003 21:47:46 -0000 1.25.2.10
+++ samba/source/include/vfs.h 16 Dec 2003 04:27:25 -0000
@@ -51,7 +51,8 @@
/* Changed to version 7 to include the get_nt_acl info parameter. JRA. */
/* Changed to version 8 includes EA calls. JRA. */
/* Changed to version 9 to include the get_shadow_data call. --metze */
-#define SMB_VFS_INTERFACE_VERSION 9
+/* Changed to version 10 to include pread/pwrite calls. */
+#define SMB_VFS_INTERFACE_VERSION 10
/* to bug old modules witch are trying to compile with the old functions */
@@ -107,7 +108,9 @@
SMB_VFS_OP_OPEN,
SMB_VFS_OP_CLOSE,
SMB_VFS_OP_READ,
+ SMB_VFS_OP_PREAD,
SMB_VFS_OP_WRITE,
+ SMB_VFS_OP_PWRITE,
SMB_VFS_OP_LSEEK,
SMB_VFS_OP_SENDFILE,
SMB_VFS_OP_RENAME,
@@ -213,7 +216,9 @@
int (*open)(struct vfs_handle_struct *handle, struct connection_struct *conn, const char *fname, int flags, mode_t mode);
int (*close)(struct vfs_handle_struct *handle, struct files_struct *fsp, int fd);
ssize_t (*read)(struct vfs_handle_struct *handle, struct files_struct *fsp, int fd, void *data, size_t n);
+ ssize_t (*pread)(struct vfs_handle_struct *handle, struct files_struct *fsp, int fd, void *data, size_t n, SMB_OFF_T offset);
ssize_t (*write)(struct vfs_handle_struct *handle, struct files_struct *fsp, int fd, const void *data, size_t n);
+ ssize_t (*pwrite)(struct vfs_handle_struct *handle, struct files_struct *fsp, int fd, const void *data, size_t n, SMB_OFF_T offset);
SMB_OFF_T (*lseek)(struct vfs_handle_struct *handle, struct files_struct *fsp, int fd, SMB_OFF_T offset, int whence);
ssize_t (*sendfile)(struct vfs_handle_struct *handle, int tofd, files_struct *fsp, int fromfd, const DATA_BLOB *header, SMB_OFF_T offset, size_t count);
int (*rename)(struct vfs_handle_struct *handle, struct connection_struct *conn, const char *old, const char *new);
@@ -311,7 +316,9 @@
struct vfs_handle_struct *open;
struct vfs_handle_struct *close;
struct vfs_handle_struct *read;
+ struct vfs_handle_struct *pread;
struct vfs_handle_struct *write;
+ struct vfs_handle_struct *pwrite;
struct vfs_handle_struct *lseek;
struct vfs_handle_struct *sendfile;
struct vfs_handle_struct *rename;
Index: samba/source/include/vfs_macros.h
===================================================================
RCS file: /cvsroot/samba/source/include/vfs_macros.h,v
retrieving revision 1.1.2.11
diff -u -r1.1.2.11 vfs_macros.h
--- samba/source/include/vfs_macros.h 7 Aug 2003 21:47:46 -0000 1.1.2.11
+++ samba/source/include/vfs_macros.h 16 Dec 2003 04:27:25 -0000
@@ -46,7 +46,9 @@
#define SMB_VFS_OPEN(conn, fname, flags, mode) ((conn)->vfs.ops.open((conn)->vfs.handles.open, (conn), (fname), (flags), (mode)))
#define SMB_VFS_CLOSE(fsp, fd) ((fsp)->conn->vfs.ops.close((fsp)->conn->vfs.handles.close, (fsp), (fd)))
#define SMB_VFS_READ(fsp, fd, data, n) ((fsp)->conn->vfs.ops.read((fsp)->conn->vfs.handles.read, (fsp), (fd), (data), (n)))
+#define SMB_VFS_PREAD(fsp, fd, data, n, off) ((fsp)->conn->vfs.ops.pread((fsp)->conn->vfs.handles.pread, (fsp), (fd), (data), (n), (off)))
#define SMB_VFS_WRITE(fsp, fd, data, n) ((fsp)->conn->vfs.ops.write((fsp)->conn->vfs.handles.write, (fsp), (fd), (data), (n)))
+#define SMB_VFS_PWRITE(fsp, fd, data, n, off) ((fsp)->conn->vfs.ops.pwrite((fsp)->conn->vfs.handles.pwrite, (fsp), (fd), (data), (n), (off)))
#define SMB_VFS_LSEEK(fsp, fd, offset, whence) ((fsp)->conn->vfs.ops.lseek((fsp)->conn->vfs.handles.lseek, (fsp), (fd), (offset), (whence)))
#define SMB_VFS_SENDFILE(tofd, fsp, fromfd, header, offset, count) ((fsp)->conn->vfs.ops.sendfile((fsp)->conn->vfs.handles.sendfile, (tofd), (fsp), (fromfd), (header), (offset), (count)))
#define SMB_VFS_RENAME(conn, old, new) ((conn)->vfs.ops.rename((conn)->vfs.handles.rename, (conn), (old), (new)))
@@ -142,7 +144,9 @@
#define SMB_VFS_OPAQUE_OPEN(conn, fname, flags, mode) ((conn)->vfs_opaque.ops.open((conn)->vfs_opaque.handles.open, (conn), (fname), (flags), (mode)))
#define SMB_VFS_OPAQUE_CLOSE(fsp, fd) ((fsp)->conn->vfs_opaque.ops.close((fsp)->conn->vfs_opaque.handles.close, (fsp), (fd)))
#define SMB_VFS_OPAQUE_READ(fsp, fd, data, n) ((fsp)->conn->vfs_opaque.ops.read((fsp)->conn->vfs_opaque.handles.read, (fsp), (fd), (data), (n)))
+#define SMB_VFS_OPAQUE_PREAD(fsp, fd, data, n, off) ((fsp)->conn->vfs_opaque.ops.pread((fsp)->conn->vfs_opaque.handles.pread, (fsp), (fd), (data), (n), (off)))
#define SMB_VFS_OPAQUE_WRITE(fsp, fd, data, n) ((fsp)->conn->vfs_opaque.ops.write((fsp)->conn->vfs_opaque.handles.write, (fsp), (fd), (data), (n)))
+#define SMB_VFS_OPAQUE_PWRITE(fsp, fd, data, n, off) ((fsp)->conn->vfs_opaque.ops.pwrite((fsp)->conn->vfs_opaque.handles.pwrite, (fsp), (fd), (data), (n), (off)))
#define SMB_VFS_OPAQUE_LSEEK(fsp, fd, offset, whence) ((fsp)->conn->vfs_opaque.ops.lseek((fsp)->conn->vfs_opaque.handles.lseek, (fsp), (fd), (offset), (whence)))
#define SMB_VFS_OPAQUE_SENDFILE(tofd, fsp, fromfd, header, offset, count) ((fsp)->conn->vfs_opaque.ops.sendfile((fsp)->conn->vfs_opaque.handles.sendfile, (tofd), (fsp), (fromfd), (header), (offset), (count)))
#define SMB_VFS_OPAQUE_RENAME(conn, old, new) ((conn)->vfs_opaque.ops.rename((conn)->vfs_opaque.handles.rename, (conn), (old), (new)))
@@ -238,7 +242,9 @@
#define SMB_VFS_NEXT_OPEN(handle, conn, fname, flags, mode) ((handle)->vfs_next.ops.open((handle)->vfs_next.handles.open, (conn), (fname), (flags), (mode)))
#define SMB_VFS_NEXT_CLOSE(handle, fsp, fd) ((handle)->vfs_next.ops.close((handle)->vfs_next.handles.close, (fsp), (fd)))
#define SMB_VFS_NEXT_READ(handle, fsp, fd, data, n) ((handle)->vfs_next.ops.read((handle)->vfs_next.handles.read, (fsp), (fd), (data), (n)))
+#define SMB_VFS_NEXT_PREAD(handle, fsp, fd, data, n, off) ((handle)->vfs_next.ops.pread((handle)->vfs_next.handles.pread, (fsp), (fd), (data), (n), (off)))
#define SMB_VFS_NEXT_WRITE(handle, fsp, fd, data, n) ((handle)->vfs_next.ops.write((handle)->vfs_next.handles.write, (fsp), (fd), (data), (n)))
+#define SMB_VFS_NEXT_PWRITE(handle, fsp, fd, data, n, off) ((handle)->vfs_next.ops.pwrite((handle)->vfs_next.handles.pwrite, (fsp), (fd), (data), (n), (off)))
#define SMB_VFS_NEXT_LSEEK(handle, fsp, fd, offset, whence) ((handle)->vfs_next.ops.lseek((handle)->vfs_next.handles.lseek, (fsp), (fd), (offset), (whence)))
#define SMB_VFS_NEXT_SENDFILE(handle, tofd, fsp, fromfd, header, offset, count) ((handle)->vfs_next.ops.sendfile((handle)->vfs_next.handles.sendfile, (tofd), (fsp), (fromfd), (header), (offset), (count)))
#define SMB_VFS_NEXT_RENAME(handle, conn, old, new) ((handle)->vfs_next.ops.rename((handle)->vfs_next.handles.rename, (conn), (old), (new)))
Index: samba/source/lib/system.c
===================================================================
RCS file: /cvsroot/samba/source/lib/system.c,v
retrieving revision 1.78.2.9
diff -u -r1.78.2.9 system.c
--- samba/source/lib/system.c 1 Oct 2003 17:01:21 -0000 1.78.2.9
+++ samba/source/lib/system.c 16 Dec 2003 04:27:26 -0000
@@ -100,6 +100,47 @@
return ret;
}
+
+/*******************************************************************
+A pread wrapper that will deal with EINTR and 64-bit file offsets.
+********************************************************************/
+
+#if defined(HAVE_PREAD) || defined(HAVE_PREAD64)
+ssize_t sys_pread(int fd, void *buf, size_t count, SMB_OFF_T off)
+{
+ ssize_t ret;
+
+ do {
+#if defined(HAVE_EXPLICIT_LARGEFILE_SUPPORT) && defined(HAVE_OFF64_T) && defined(HAVE_PREAD64)
+ ret = pread64(fd, buf, count, off);
+#else
+ ret = pread(fd, buf, count, off);
+#endif
+ } while (ret == -1 && errno == EINTR);
+ return ret;
+}
+#endif
+
+/*******************************************************************
+A write wrapper that will deal with EINTR and 64-bit file offsets.
+********************************************************************/
+
+#if defined(HAVE_PWRITE) || defined(HAVE_PWRITE64)
+ssize_t sys_pwrite(int fd, const void *buf, size_t count, SMB_OFF_T off)
+{
+ ssize_t ret;
+
+ do {
+#if defined(HAVE_EXPLICIT_LARGEFILE_SUPPORT) && defined(HAVE_OFF64_T) && defined(HAVE_PWRITE64)
+ ret = pwrite64(fd, buf, count, off);
+#else
+ ret = pwrite(fd, buf, count, off);
+#endif
+ } while (ret == -1 && errno == EINTR);
+ return ret;
+}
+#endif
+
/*******************************************************************
A send wrapper that will deal with EINTR.
********************************************************************/
Index: samba/source/smbd/fileio.c
===================================================================
RCS file: /cvsroot/samba/source/smbd/fileio.c,v
retrieving revision 1.40.2.11
diff -u -r1.40.2.11 fileio.c
--- samba/source/smbd/fileio.c 2 Nov 2003 17:10:12 -0000 1.40.2.11
+++ samba/source/smbd/fileio.c 16 Dec 2003 04:27:26 -0000
@@ -95,16 +95,22 @@
flush_write_cache(fsp, READ_FLUSH);
+#if !defined(HAVE_PREAD) && !defined(HAVE_PREAD64)
if (seek_file(fsp,pos) == -1) {
DEBUG(3,("read_file: Failed to seek to %.0f\n",(double)pos));
return(ret);
}
-
+#endif
+
if (n > 0) {
#ifdef DMF_FIX
int numretries = 3;
tryagain:
+#if defined(HAVE_PREAD) || defined(HAVE_PREAD64)
+ readret = SMB_VFS_PREAD(fsp,fsp->fd,data,n,pos);
+#else
readret = SMB_VFS_READ(fsp,fsp->fd,data,n);
+#endif
if (readret == -1) {
if ((errno == EAGAIN) && numretries) {
DEBUG(3,("read_file EAGAIN retry in 10 seconds\n"));
@@ -115,7 +121,11 @@
return -1;
}
#else /* NO DMF fix. */
+#if defined(HAVE_PREAD) || defined(HAVE_PREAD64)
+ readret = SMB_VFS_PREAD(fsp,fsp->fd,data,n,pos);
+#else
readret = SMB_VFS_READ(fsp,fsp->fd,data,n);
+#endif
if (readret == -1)
return -1;
#endif
@@ -143,10 +153,17 @@
{
ssize_t ret;
+#if defined(HAVE_PREAD) || defined(HAVE_PREAD64)
+ if (pos == -1)
+ ret = vfs_write_data(fsp, data, n);
+ else
+ ret = vfs_pwrite_data(fsp, data, n, pos);
+#else
if ((pos != -1) && (seek_file(fsp,pos) == -1))
return -1;
ret = vfs_write_data(fsp,data,n);
+#endif
DEBUG(10,("real_write_file (%s): pos = %.0f, size = %lu, returned %ld\n",
fsp->fsp_name, (double)pos, (unsigned long)n, (long)ret ));
Index: samba/source/smbd/vfs-wrap.c
===================================================================
RCS file: /cvsroot/samba/source/smbd/vfs-wrap.c,v
retrieving revision 1.37.2.13
diff -u -r1.37.2.13 vfs-wrap.c
--- samba/source/smbd/vfs-wrap.c 21 Nov 2003 23:01:37 -0000 1.37.2.13
+++ samba/source/smbd/vfs-wrap.c 16 Dec 2003 04:27:27 -0000
@@ -190,6 +190,49 @@
return result;
}
+ssize_t vfswrap_pread(vfs_handle_struct * handle,
+ files_struct * fsp,
+ int fd,
+ void * data,
+ size_t n,
+ SMB_OFF_T offset)
+{
+ ssize_t result;
+
+#if defined(HAVE_PREAD) || defined(HAVE_PREAD64)
+ START_PROFILE_BYTES(syscall_pread, n);
+ result = sys_pread(fd, data, n, offset);
+ END_PROFILE(syscall_pread);
+
+ if (result == -1 && errno == ESPIPE) {
+ /* Maintain the fiction that pipes can be seeked (sought?) on. */
+ result = SMB_VFS_READ(fsp, fd, data, n);
+ }
+
+#else /* HAVE_PREAD */
+ SMB_OFF_T curr;
+ int lerrno;
+
+ curr = SMB_VFS_LSEEK(fsp, fd, 0, SEEK_CUR);
+ if (curr == -1) {
+ return -1;
+ }
+
+ if (SMB_VFS_LSEEK(fsp, fd, offset, SEEK_SET) == -1) {
+ return -1;
+ }
+
+ result = SMB_VFS_READ(fsp, fd, data, n);
+ lerrno = errno;
+
+ SMB_VFS_LSEEK(fsp, fd, curr, SEEK_SET);
+ errno = lerrno;
+
+#endif /* HAVE_PREAD */
+
+ return result;
+}
+
ssize_t vfswrap_write(vfs_handle_struct *handle, files_struct *fsp, int fd, const void *data, size_t n)
{
ssize_t result;
@@ -197,6 +240,49 @@
START_PROFILE_BYTES(syscall_write, n);
result = sys_write(fd, data, n);
END_PROFILE(syscall_write);
+ return result;
+}
+
+ssize_t vfswrap_pwrite(vfs_handle_struct * handle,
+ files_struct * fsp,
+ int fd,
+ const void * data,
+ size_t n,
+ SMB_OFF_T offset)
+{
+ ssize_t result;
+
+#if defined(HAVE_PWRITE) || defined(HAVE_PRWITE64)
+ START_PROFILE_BYTES(syscall_pwrite, n);
+ result = sys_pwrite(fd, data, n, offset);
+ END_PROFILE(syscall_pwrite);
+
+ if (result == -1 && errno == ESPIPE) {
+ /* Maintain the fiction that pipes can be sought on. */
+ result = SMB_VFS_WRITE(fsp, fd, data, n);
+ }
+
+#else /* HAVE_PWRITE */
+ SMB_OFF_T curr;
+ int lerrno;
+
+ curr = SMB_VFS_LSEEK(fsp, fd, 0, SEEK_CUR);
+ if (curr == -1) {
+ return -1;
+ }
+
+ if (SMB_VFS_LSEEK(fsp, fd, offset, SEEK_SET) == -1) {
+ return -1;
+ }
+
+ result = SMB_VFS_WRITE(fsp, fd, data, n);
+ lerrno = errno;
+
+ SMB_VFS_LSEEK(fsp, fd, curr, SEEK_SET);
+ errno = lerrno;
+
+#endif /* HAVE_PWRITE */
+
return result;
}
Index: samba/source/smbd/vfs.c
===================================================================
RCS file: /cvsroot/samba/source/smbd/vfs.c,v
retrieving revision 1.57.2.22
diff -u -r1.57.2.22 vfs.c
--- samba/source/smbd/vfs.c 5 Sep 2003 19:59:55 -0000 1.57.2.22
+++ samba/source/smbd/vfs.c 16 Dec 2003 04:27:27 -0000
@@ -71,7 +71,9 @@
vfswrap_open,
vfswrap_close,
vfswrap_read,
+ vfswrap_pread,
vfswrap_write,
+ vfswrap_pwrite,
vfswrap_lseek,
vfswrap_sendfile,
vfswrap_rename,
@@ -429,6 +431,28 @@
return (ssize_t)total;
}
+ssize_t vfs_pread_data(files_struct *fsp, char *buf,
+ size_t byte_count, SMB_OFF_T offset)
+{
+ size_t total=0;
+
+ while (total < byte_count)
+ {
+ ssize_t ret = SMB_VFS_PREAD(fsp, fsp->fd, buf + total,
+ byte_count - total, offset + total);
+
+ if (ret == 0) return total;
+ if (ret == -1) {
+ if (errno == EINTR)
+ continue;
+ else
+ return -1;
+ }
+ total += ret;
+ }
+ return (ssize_t)total;
+}
+
/****************************************************************************
Write data to a fd on the vfs.
****************************************************************************/
@@ -451,6 +475,25 @@
return (ssize_t)total;
}
+ssize_t vfs_pwrite_data(files_struct *fsp,const char *buffer,
+ size_t N, SMB_OFF_T offset)
+{
+ size_t total=0;
+ ssize_t ret;
+
+ while (total < N) {
+ ret = SMB_VFS_PWRITE(fsp, fsp->fd, buffer + total,
+ N - total, offset + total);
+
+ if (ret == -1)
+ return -1;
+ if (ret == 0)
+ return total;
+
+ total += ret;
+ }
+ return (ssize_t)total;
+}
/****************************************************************************
An allocate file space call using the vfs interface.
Allocates space for a file from a filedescriptor.
Index: samba/source/utils/status.c
===================================================================
RCS file: /cvsroot/samba/source/utils/status.c,v
retrieving revision 1.72.2.15
diff -u -r1.72.2.15 status.c
--- samba/source/utils/status.c 12 Nov 2003 22:35:50 -0000 1.72.2.15
+++ samba/source/utils/status.c 16 Dec 2003 04:27:27 -0000
@@ -197,6 +197,12 @@
d_printf("write_count: %u\n", profile_p->syscall_write_count);
d_printf("write_time: %u\n", profile_p->syscall_write_time);
d_printf("write_bytes: %u\n", profile_p->syscall_write_bytes);
+ d_printf("pread_count: %u\n", profile_p->syscall_pread_count);
+ d_printf("pread_time: %u\n", profile_p->syscall_pread_time);
+ d_printf("pread_bytes: %u\n", profile_p->syscall_pread_bytes);
+ d_printf("pwrite_count: %u\n", profile_p->syscall_pwrite_count);
+ d_printf("pwrite_time: %u\n", profile_p->syscall_pwrite_time);
+ d_printf("pwrite_bytes: %u\n", profile_p->syscall_pwrite_bytes);
#ifdef WITH_SENDFILE
d_printf("sendfile_count: %u\n", profile_p->syscall_sendfile_count);
d_printf("sendfile_time: %u\n", profile_p->syscall_sendfile_time);
More information about the samba-technical
mailing list