[PATCH] add pread operation to vfs layer

James Peach jpeach at sgi.com
Tue Dec 16 04:38:05 GMT 2003


Hi all,

The following diff adds pread/pwrite operations to the VFS layer and makes
use of them in the I/O path. This corresponds to bugzilla bug #889.

There is not much performance advanage additional to using spinlocks (a
consistent 2% - 7% increase in throughput), but there is a significant
benefit to using p{read,write} with fcntl tdb locking (5% - 30% increase in
throughput). The largest improvements are in high packet rate workloads (ie,
small blocks sizes and meta-data workloads), as you might expect. I can
provide detailed numbers if that would be useful.

For systems without pread/pwrite (are there many of these?), the
p{read,write} operations are emulated by a lseek/lseek/{read,write}/lseek
sequence. The file I/O path for these systems continues to use the old
lseek/read code.

cheers

--
James Peach | jpeach at sgi.com

Index: samba/source/include/smbprofile.h
===================================================================
RCS file: /cvsroot/samba/source/include/smbprofile.h,v
retrieving revision 1.4.2.2
diff -u -r1.4.2.2 smbprofile.h
--- samba/source/include/smbprofile.h	12 May 2003 01:20:12 -0000	1.4.2.2
+++ samba/source/include/smbprofile.h	16 Dec 2003 04:27:25 -0000
@@ -34,7 +34,7 @@
 
 #define PROF_SHMEM_KEY ((key_t)0x07021999)
 #define PROF_SHM_MAGIC 0x6349985
-#define PROF_SHM_VERSION 7
+#define PROF_SHM_VERSION 8
 
 /* time values in the following structure are in microseconds */
 
@@ -60,9 +60,15 @@
 	unsigned syscall_read_count;
 	unsigned syscall_read_time;
 	unsigned syscall_read_bytes;	/* bytes read with read syscall */
+	unsigned syscall_pread_count;
+	unsigned syscall_pread_time;
+	unsigned syscall_pread_bytes;	/* bytes read with pread syscall */
 	unsigned syscall_write_count;
 	unsigned syscall_write_time;
 	unsigned syscall_write_bytes;	/* bytes written with write syscall */
+	unsigned syscall_pwrite_count;
+	unsigned syscall_pwrite_time;
+	unsigned syscall_pwrite_bytes;	/* bytes written with pwrite syscall */
 	unsigned syscall_lseek_count;
 	unsigned syscall_lseek_time;
 	unsigned syscall_sendfile_count;
Index: samba/source/include/vfs.h
===================================================================
RCS file: /cvsroot/samba/source/include/vfs.h,v
retrieving revision 1.25.2.10
diff -u -r1.25.2.10 vfs.h
--- samba/source/include/vfs.h	7 Aug 2003 21:47:46 -0000	1.25.2.10
+++ samba/source/include/vfs.h	16 Dec 2003 04:27:25 -0000
@@ -51,7 +51,8 @@
 /* Changed to version 7 to include the get_nt_acl info parameter. JRA. */
 /* Changed to version 8 includes EA calls. JRA. */
 /* Changed to version 9 to include the get_shadow_data call. --metze */
-#define SMB_VFS_INTERFACE_VERSION 9
+/* Changed to version 10 to include pread/pwrite calls. */
+#define SMB_VFS_INTERFACE_VERSION 10
 
 
 /* to bug old modules witch are trying to compile with the old functions */
@@ -107,7 +108,9 @@
 	SMB_VFS_OP_OPEN,
 	SMB_VFS_OP_CLOSE,
 	SMB_VFS_OP_READ,
+	SMB_VFS_OP_PREAD,
 	SMB_VFS_OP_WRITE,
+	SMB_VFS_OP_PWRITE,
 	SMB_VFS_OP_LSEEK,
 	SMB_VFS_OP_SENDFILE,
 	SMB_VFS_OP_RENAME,
@@ -213,7 +216,9 @@
 		int (*open)(struct vfs_handle_struct *handle, struct connection_struct *conn, const char *fname, int flags, mode_t mode);
 		int (*close)(struct vfs_handle_struct *handle, struct files_struct *fsp, int fd);
 		ssize_t (*read)(struct vfs_handle_struct *handle, struct files_struct *fsp, int fd, void *data, size_t n);
+		ssize_t (*pread)(struct vfs_handle_struct *handle, struct files_struct *fsp, int fd, void *data, size_t n, SMB_OFF_T offset);
 		ssize_t (*write)(struct vfs_handle_struct *handle, struct files_struct *fsp, int fd, const void *data, size_t n);
+		ssize_t (*pwrite)(struct vfs_handle_struct *handle, struct files_struct *fsp, int fd, const void *data, size_t n, SMB_OFF_T offset);
 		SMB_OFF_T (*lseek)(struct vfs_handle_struct *handle, struct files_struct *fsp, int fd, SMB_OFF_T offset, int whence);
 		ssize_t (*sendfile)(struct vfs_handle_struct *handle, int tofd, files_struct *fsp, int fromfd, const DATA_BLOB *header, SMB_OFF_T offset, size_t count);
 		int (*rename)(struct vfs_handle_struct *handle, struct connection_struct *conn, const char *old, const char *new);
@@ -311,7 +316,9 @@
 		struct vfs_handle_struct *open;
 		struct vfs_handle_struct *close;
 		struct vfs_handle_struct *read;
+		struct vfs_handle_struct *pread;
 		struct vfs_handle_struct *write;
+		struct vfs_handle_struct *pwrite;
 		struct vfs_handle_struct *lseek;
 		struct vfs_handle_struct *sendfile;
 		struct vfs_handle_struct *rename;
Index: samba/source/include/vfs_macros.h
===================================================================
RCS file: /cvsroot/samba/source/include/vfs_macros.h,v
retrieving revision 1.1.2.11
diff -u -r1.1.2.11 vfs_macros.h
--- samba/source/include/vfs_macros.h	7 Aug 2003 21:47:46 -0000	1.1.2.11
+++ samba/source/include/vfs_macros.h	16 Dec 2003 04:27:25 -0000
@@ -46,7 +46,9 @@
 #define SMB_VFS_OPEN(conn, fname, flags, mode) ((conn)->vfs.ops.open((conn)->vfs.handles.open, (conn), (fname), (flags), (mode)))
 #define SMB_VFS_CLOSE(fsp, fd) ((fsp)->conn->vfs.ops.close((fsp)->conn->vfs.handles.close, (fsp), (fd)))
 #define SMB_VFS_READ(fsp, fd, data, n) ((fsp)->conn->vfs.ops.read((fsp)->conn->vfs.handles.read, (fsp), (fd), (data), (n)))
+#define SMB_VFS_PREAD(fsp, fd, data, n, off) ((fsp)->conn->vfs.ops.pread((fsp)->conn->vfs.handles.pread, (fsp), (fd), (data), (n), (off)))
 #define SMB_VFS_WRITE(fsp, fd, data, n) ((fsp)->conn->vfs.ops.write((fsp)->conn->vfs.handles.write, (fsp), (fd), (data), (n)))
+#define SMB_VFS_PWRITE(fsp, fd, data, n, off) ((fsp)->conn->vfs.ops.pwrite((fsp)->conn->vfs.handles.pwrite, (fsp), (fd), (data), (n), (off)))
 #define SMB_VFS_LSEEK(fsp, fd, offset, whence) ((fsp)->conn->vfs.ops.lseek((fsp)->conn->vfs.handles.lseek, (fsp), (fd), (offset), (whence)))
 #define SMB_VFS_SENDFILE(tofd, fsp, fromfd, header, offset, count) ((fsp)->conn->vfs.ops.sendfile((fsp)->conn->vfs.handles.sendfile, (tofd), (fsp), (fromfd), (header), (offset), (count)))
 #define SMB_VFS_RENAME(conn, old, new) ((conn)->vfs.ops.rename((conn)->vfs.handles.rename, (conn), (old), (new)))
@@ -142,7 +144,9 @@
 #define SMB_VFS_OPAQUE_OPEN(conn, fname, flags, mode) ((conn)->vfs_opaque.ops.open((conn)->vfs_opaque.handles.open, (conn), (fname), (flags), (mode)))
 #define SMB_VFS_OPAQUE_CLOSE(fsp, fd) ((fsp)->conn->vfs_opaque.ops.close((fsp)->conn->vfs_opaque.handles.close, (fsp), (fd)))
 #define SMB_VFS_OPAQUE_READ(fsp, fd, data, n) ((fsp)->conn->vfs_opaque.ops.read((fsp)->conn->vfs_opaque.handles.read, (fsp), (fd), (data), (n)))
+#define SMB_VFS_OPAQUE_PREAD(fsp, fd, data, n, off) ((fsp)->conn->vfs_opaque.ops.pread((fsp)->conn->vfs_opaque.handles.pread, (fsp), (fd), (data), (n), (off)))
 #define SMB_VFS_OPAQUE_WRITE(fsp, fd, data, n) ((fsp)->conn->vfs_opaque.ops.write((fsp)->conn->vfs_opaque.handles.write, (fsp), (fd), (data), (n)))
+#define SMB_VFS_OPAQUE_PWRITE(fsp, fd, data, n, off) ((fsp)->conn->vfs_opaque.ops.pwrite((fsp)->conn->vfs_opaque.handles.pwrite, (fsp), (fd), (data), (n), (off)))
 #define SMB_VFS_OPAQUE_LSEEK(fsp, fd, offset, whence) ((fsp)->conn->vfs_opaque.ops.lseek((fsp)->conn->vfs_opaque.handles.lseek, (fsp), (fd), (offset), (whence)))
 #define SMB_VFS_OPAQUE_SENDFILE(tofd, fsp, fromfd, header, offset, count) ((fsp)->conn->vfs_opaque.ops.sendfile((fsp)->conn->vfs_opaque.handles.sendfile, (tofd), (fsp), (fromfd), (header), (offset), (count)))
 #define SMB_VFS_OPAQUE_RENAME(conn, old, new) ((conn)->vfs_opaque.ops.rename((conn)->vfs_opaque.handles.rename, (conn), (old), (new)))
@@ -238,7 +242,9 @@
 #define SMB_VFS_NEXT_OPEN(handle, conn, fname, flags, mode) ((handle)->vfs_next.ops.open((handle)->vfs_next.handles.open, (conn), (fname), (flags), (mode)))
 #define SMB_VFS_NEXT_CLOSE(handle, fsp, fd) ((handle)->vfs_next.ops.close((handle)->vfs_next.handles.close, (fsp), (fd)))
 #define SMB_VFS_NEXT_READ(handle, fsp, fd, data, n) ((handle)->vfs_next.ops.read((handle)->vfs_next.handles.read, (fsp), (fd), (data), (n)))
+#define SMB_VFS_NEXT_PREAD(handle, fsp, fd, data, n, off) ((handle)->vfs_next.ops.pread((handle)->vfs_next.handles.pread, (fsp), (fd), (data), (n), (off)))
 #define SMB_VFS_NEXT_WRITE(handle, fsp, fd, data, n) ((handle)->vfs_next.ops.write((handle)->vfs_next.handles.write, (fsp), (fd), (data), (n)))
+#define SMB_VFS_NEXT_PWRITE(handle, fsp, fd, data, n, off) ((handle)->vfs_next.ops.pwrite((handle)->vfs_next.handles.pwrite, (fsp), (fd), (data), (n), (off)))
 #define SMB_VFS_NEXT_LSEEK(handle, fsp, fd, offset, whence) ((handle)->vfs_next.ops.lseek((handle)->vfs_next.handles.lseek, (fsp), (fd), (offset), (whence)))
 #define SMB_VFS_NEXT_SENDFILE(handle, tofd, fsp, fromfd, header, offset, count) ((handle)->vfs_next.ops.sendfile((handle)->vfs_next.handles.sendfile, (tofd), (fsp), (fromfd), (header), (offset), (count)))
 #define SMB_VFS_NEXT_RENAME(handle, conn, old, new) ((handle)->vfs_next.ops.rename((handle)->vfs_next.handles.rename, (conn), (old), (new)))
Index: samba/source/lib/system.c
===================================================================
RCS file: /cvsroot/samba/source/lib/system.c,v
retrieving revision 1.78.2.9
diff -u -r1.78.2.9 system.c
--- samba/source/lib/system.c	1 Oct 2003 17:01:21 -0000	1.78.2.9
+++ samba/source/lib/system.c	16 Dec 2003 04:27:26 -0000
@@ -100,6 +100,47 @@
 	return ret;
 }
 
+
+/*******************************************************************
+A pread wrapper that will deal with EINTR and 64-bit file offsets.
+********************************************************************/
+
+#if defined(HAVE_PREAD) || defined(HAVE_PREAD64)
+ssize_t sys_pread(int fd, void *buf, size_t count, SMB_OFF_T off)
+{
+	ssize_t ret;
+
+	do {
+#if defined(HAVE_EXPLICIT_LARGEFILE_SUPPORT) && defined(HAVE_OFF64_T) && defined(HAVE_PREAD64)
+		ret = pread64(fd, buf, count, off);
+#else
+		ret = pread(fd, buf, count, off);
+#endif
+	} while (ret == -1 && errno == EINTR);
+	return ret;
+}
+#endif
+
+/*******************************************************************
+A write wrapper that will deal with EINTR and 64-bit file offsets.
+********************************************************************/
+
+#if defined(HAVE_PWRITE) || defined(HAVE_PWRITE64)
+ssize_t sys_pwrite(int fd, const void *buf, size_t count, SMB_OFF_T off)
+{
+	ssize_t ret;
+
+	do {
+#if defined(HAVE_EXPLICIT_LARGEFILE_SUPPORT) && defined(HAVE_OFF64_T) && defined(HAVE_PWRITE64)
+		ret = pwrite64(fd, buf, count, off);
+#else
+		ret = pwrite(fd, buf, count, off);
+#endif
+	} while (ret == -1 && errno == EINTR);
+	return ret;
+}
+#endif
+
 /*******************************************************************
 A send wrapper that will deal with EINTR.
 ********************************************************************/
Index: samba/source/smbd/fileio.c
===================================================================
RCS file: /cvsroot/samba/source/smbd/fileio.c,v
retrieving revision 1.40.2.11
diff -u -r1.40.2.11 fileio.c
--- samba/source/smbd/fileio.c	2 Nov 2003 17:10:12 -0000	1.40.2.11
+++ samba/source/smbd/fileio.c	16 Dec 2003 04:27:26 -0000
@@ -95,16 +95,22 @@
 
 	flush_write_cache(fsp, READ_FLUSH);
 
+#if !defined(HAVE_PREAD) && !defined(HAVE_PREAD64)
 	if (seek_file(fsp,pos) == -1) {
 		DEBUG(3,("read_file: Failed to seek to %.0f\n",(double)pos));
 		return(ret);
 	}
-  
+#endif
+
 	if (n > 0) {
 #ifdef DMF_FIX
 		int numretries = 3;
 tryagain:
+#if defined(HAVE_PREAD) || defined(HAVE_PREAD64)
+		readret = SMB_VFS_PREAD(fsp,fsp->fd,data,n,pos);
+#else
 		readret = SMB_VFS_READ(fsp,fsp->fd,data,n);
+#endif
 		if (readret == -1) {
 			if ((errno == EAGAIN) && numretries) {
 				DEBUG(3,("read_file EAGAIN retry in 10 seconds\n"));
@@ -115,7 +121,11 @@
 			return -1;
 		}
 #else /* NO DMF fix. */
+#if defined(HAVE_PREAD) || defined(HAVE_PREAD64)
+		readret = SMB_VFS_PREAD(fsp,fsp->fd,data,n,pos);
+#else
 		readret = SMB_VFS_READ(fsp,fsp->fd,data,n);
+#endif
 		if (readret == -1)
 			return -1;
 #endif
@@ -143,10 +153,17 @@
 {
 	ssize_t ret;
 
+#if defined(HAVE_PREAD) || defined(HAVE_PREAD64)
+        if (pos == -1)
+                ret = vfs_write_data(fsp, data, n);
+        else
+                ret = vfs_pwrite_data(fsp, data, n, pos);
+#else
 	if ((pos != -1) && (seek_file(fsp,pos) == -1))
 		return -1;
 
 	ret = vfs_write_data(fsp,data,n);
+#endif
 
 	DEBUG(10,("real_write_file (%s): pos = %.0f, size = %lu, returned %ld\n",
 		fsp->fsp_name, (double)pos, (unsigned long)n, (long)ret ));
Index: samba/source/smbd/vfs-wrap.c
===================================================================
RCS file: /cvsroot/samba/source/smbd/vfs-wrap.c,v
retrieving revision 1.37.2.13
diff -u -r1.37.2.13 vfs-wrap.c
--- samba/source/smbd/vfs-wrap.c	21 Nov 2003 23:01:37 -0000	1.37.2.13
+++ samba/source/smbd/vfs-wrap.c	16 Dec 2003 04:27:27 -0000
@@ -190,6 +190,49 @@
     return result;
 }
 
+ssize_t vfswrap_pread(vfs_handle_struct *       handle,
+                      files_struct *            fsp,
+                      int                       fd,
+                      void *                    data,
+                      size_t                    n,
+                      SMB_OFF_T                 offset)
+{
+    ssize_t result;
+
+#if defined(HAVE_PREAD) || defined(HAVE_PREAD64)
+    START_PROFILE_BYTES(syscall_pread, n);
+    result = sys_pread(fd, data, n, offset);
+    END_PROFILE(syscall_pread);
+ 
+    if (result == -1 && errno == ESPIPE) {
+	/* Maintain the fiction that pipes can be seeked (sought?) on. */
+	result = SMB_VFS_READ(fsp, fd, data, n);
+    }
+
+#else /* HAVE_PREAD */
+    SMB_OFF_T   curr;
+    int         lerrno;
+   
+    curr = SMB_VFS_LSEEK(fsp, fd, 0, SEEK_CUR);
+    if (curr == -1) {
+            return -1;
+    }
+
+    if (SMB_VFS_LSEEK(fsp, fd, offset, SEEK_SET) == -1) {
+            return -1;
+    }
+
+    result = SMB_VFS_READ(fsp, fd, data, n);
+    lerrno = errno;
+
+    SMB_VFS_LSEEK(fsp, fd, curr, SEEK_SET);
+    errno = lerrno;
+
+#endif /* HAVE_PREAD */
+
+    return result;
+}
+
 ssize_t vfswrap_write(vfs_handle_struct *handle, files_struct *fsp, int fd, const void *data, size_t n)
 {
     ssize_t result;
@@ -197,6 +240,49 @@
     START_PROFILE_BYTES(syscall_write, n);
     result = sys_write(fd, data, n);
     END_PROFILE(syscall_write);
+    return result;
+}
+
+ssize_t vfswrap_pwrite(vfs_handle_struct *	handle,
+                      files_struct *            fsp,
+                      int                       fd,
+                      const void *              data,
+                      size_t                    n,
+                      SMB_OFF_T                 offset)
+{
+    ssize_t result;
+
+#if defined(HAVE_PWRITE) || defined(HAVE_PRWITE64)
+    START_PROFILE_BYTES(syscall_pwrite, n);
+    result = sys_pwrite(fd, data, n, offset);
+    END_PROFILE(syscall_pwrite);
+
+    if (result == -1 && errno == ESPIPE) {
+	/* Maintain the fiction that pipes can be sought on. */
+	result = SMB_VFS_WRITE(fsp, fd, data, n);
+    }
+
+#else /* HAVE_PWRITE */
+    SMB_OFF_T   curr;
+    int         lerrno;
+
+    curr = SMB_VFS_LSEEK(fsp, fd, 0, SEEK_CUR);
+    if (curr == -1) {
+            return -1;
+    }
+
+    if (SMB_VFS_LSEEK(fsp, fd, offset, SEEK_SET) == -1) {
+            return -1;
+    }
+
+    result = SMB_VFS_WRITE(fsp, fd, data, n);
+    lerrno = errno;
+
+    SMB_VFS_LSEEK(fsp, fd, curr, SEEK_SET);
+    errno = lerrno;
+
+#endif /* HAVE_PWRITE */
+
     return result;
 }
 
Index: samba/source/smbd/vfs.c
===================================================================
RCS file: /cvsroot/samba/source/smbd/vfs.c,v
retrieving revision 1.57.2.22
diff -u -r1.57.2.22 vfs.c
--- samba/source/smbd/vfs.c	5 Sep 2003 19:59:55 -0000	1.57.2.22
+++ samba/source/smbd/vfs.c	16 Dec 2003 04:27:27 -0000
@@ -71,7 +71,9 @@
 		vfswrap_open,
 		vfswrap_close,
 		vfswrap_read,
+		vfswrap_pread,
 		vfswrap_write,
+		vfswrap_pwrite,
 		vfswrap_lseek,
 		vfswrap_sendfile,
 		vfswrap_rename,
@@ -429,6 +431,28 @@
 	return (ssize_t)total;
 }
 
+ssize_t vfs_pread_data(files_struct *fsp, char *buf,
+                size_t byte_count, SMB_OFF_T offset)
+{
+	size_t total=0;
+
+	while (total < byte_count)
+	{
+		ssize_t ret = SMB_VFS_PREAD(fsp, fsp->fd, buf + total,
+					byte_count - total, offset + total);
+
+		if (ret == 0) return total;
+		if (ret == -1) {
+			if (errno == EINTR)
+				continue;
+			else
+				return -1;
+		}
+		total += ret;
+	}
+	return (ssize_t)total;
+}
+
 /****************************************************************************
  Write data to a fd on the vfs.
 ****************************************************************************/
@@ -451,6 +475,25 @@
 	return (ssize_t)total;
 }
 
+ssize_t vfs_pwrite_data(files_struct *fsp,const char *buffer,
+                size_t N, SMB_OFF_T offset)
+{
+	size_t total=0;
+	ssize_t ret;
+
+	while (total < N) {
+		ret = SMB_VFS_PWRITE(fsp, fsp->fd, buffer + total,
+                                N - total, offset + total);
+
+		if (ret == -1)
+			return -1;
+		if (ret == 0)
+			return total;
+
+		total += ret;
+	}
+	return (ssize_t)total;
+}
 /****************************************************************************
  An allocate file space call using the vfs interface.
  Allocates space for a file from a filedescriptor.
Index: samba/source/utils/status.c
===================================================================
RCS file: /cvsroot/samba/source/utils/status.c,v
retrieving revision 1.72.2.15
diff -u -r1.72.2.15 status.c
--- samba/source/utils/status.c	12 Nov 2003 22:35:50 -0000	1.72.2.15
+++ samba/source/utils/status.c	16 Dec 2003 04:27:27 -0000
@@ -197,6 +197,12 @@
 	d_printf("write_count:                    %u\n", profile_p->syscall_write_count);
 	d_printf("write_time:                     %u\n", profile_p->syscall_write_time);
 	d_printf("write_bytes:                    %u\n", profile_p->syscall_write_bytes);
+	d_printf("pread_count:                    %u\n", profile_p->syscall_pread_count);
+	d_printf("pread_time:                     %u\n", profile_p->syscall_pread_time);
+	d_printf("pread_bytes:                    %u\n", profile_p->syscall_pread_bytes);
+	d_printf("pwrite_count:                   %u\n", profile_p->syscall_pwrite_count);
+	d_printf("pwrite_time:                    %u\n", profile_p->syscall_pwrite_time);
+	d_printf("pwrite_bytes:                   %u\n", profile_p->syscall_pwrite_bytes);
 #ifdef WITH_SENDFILE
 	d_printf("sendfile_count:                 %u\n", profile_p->syscall_sendfile_count);
 	d_printf("sendfile_time:                  %u\n", profile_p->syscall_sendfile_time);



More information about the samba-technical mailing list