[PATCH 5/5] vfs_default: add copy_file_range support for copy chunk

Björn Jacke bj at sernet.de
Thu Jan 19 23:01:43 UTC 2017


we try to use the copy_file_range syscall first, which can ideally be a
zero-copy operation. We fall back to userspace read/write if copy_file_range
is not available.

Signed-off-by: Bjoern Jacke <bj at sernet.de>
---
 source3/modules/vfs_default.c | 81 ++++++++++++++++++++++++++++---------------
 1 file changed, 53 insertions(+), 28 deletions(-)

diff --git a/source3/modules/vfs_default.c b/source3/modules/vfs_default.c
index d4610f7..abb2ee3 100644
--- a/source3/modules/vfs_default.c
+++ b/source3/modules/vfs_default.c
@@ -33,6 +33,9 @@
 #include "lib/util/tevent_ntstatus.h"
 #include "lib/util/sys_rw.h"
 #include "lib/pthreadpool/pthreadpool_tevent.h"
+#ifdef HAVE_SYS_SYSCALL_H
+#include <sys/syscall.h>
+#endif
 
 #undef DBGC_CLASS
 #define DBGC_CLASS DBGC_VFS
@@ -1608,9 +1611,6 @@ static struct tevent_req *vfswrap_copy_chunk_send(struct vfs_handle_struct *hand
 	struct vfs_cc_state *vfs_cc_state;
 	NTSTATUS status;
 
-	DEBUG(10, ("performing server side copy chunk of length %lu\n",
-		   (unsigned long)num));
-
 	req = tevent_req_create(mem_ctx, &vfs_cc_state, struct vfs_cc_state);
 	if (req == NULL) {
 		return NULL;
@@ -1641,7 +1641,6 @@ static struct tevent_req *vfswrap_copy_chunk_send(struct vfs_handle_struct *hand
 		return tevent_req_post(req, ev);
 	}
 
-	/* could use 2.6.33+ sendfile here to do this in kernel */
 	while (vfs_cc_state->copied < num) {
 		ssize_t ret;
 		struct lock_struct lck;
@@ -1666,27 +1665,6 @@ static struct tevent_req *vfswrap_copy_chunk_send(struct vfs_handle_struct *hand
 			return tevent_req_post(req, ev);
 		}
 
-		ret = SMB_VFS_PREAD(src_fsp, vfs_cc_state->buf,
-				    this_num, src_off);
-		if (ret == -1) {
-			saved_errno = errno;
-		}
-
-		SMB_VFS_STRICT_UNLOCK(src_fsp->conn, src_fsp, &lck);
-
-		if (ret == -1) {
-			errno = saved_errno;
-			tevent_req_nterror(req, map_nt_error_from_unix(errno));
-			return tevent_req_post(req, ev);
-		}
-		if (ret != this_num) {
-			/* zero tolerance for short reads */
-			tevent_req_nterror(req, NT_STATUS_IO_DEVICE_ERROR);
-			return tevent_req_post(req, ev);
-		}
-
-		src_off += ret;
-
 		if (dest_fsp->op == NULL) {
 			tevent_req_nterror(req, NT_STATUS_INTERNAL_ERROR);
 			return tevent_req_post(req, ev);
@@ -1704,25 +1682,72 @@ static struct tevent_req *vfswrap_copy_chunk_send(struct vfs_handle_struct *hand
 			return tevent_req_post(req, ev);
 		}
 
+#ifdef HAVE_SYSCALL_COPY_FILE_RANGE
+		/* use copy_file_range syscall if possible (Linux 4.5+) */
+		DEBUG(5, ("performing server side copy chunk "
+			  "(copy_file_range) of length %lu at offsets "
+			  " %lu/%lu\n", (unsigned long)num,
+			  (unsigned long)src_off, (unsigned long)dest_off));
+		SMB_VFS_LSEEK(src_fsp, src_off, SEEK_SET);
+		SMB_VFS_LSEEK(dest_fsp, dest_off, SEEK_SET);
+		ret = syscall(SYS_copy_file_range,
+				src_fsp->fh->fd, NULL,
+				dest_fsp->fh->fd, NULL,
+				this_num, 0);
+		if (ret == -1) {
+			DEBUG(0, ("failed copy_file_range: %s\n",
+				  strerror(errno) ));
+			if (errno != EXDEV &&
+			    errno != ENOTSUP &&
+			    errno != ENOSYS) {
+				/* fail for sure - no rw fallback path */
+				saved_errno = errno;
+				goto out;
+			} /* else continue rw fallback path */
+		} else {
+			/* well done, let's go do the tidy up... */
+			goto out;
+		}
+#endif
+		DEBUG(5, ("performing server side copy chunk (rw) "
+			  "of length %lu at offsets %lu/%lu\n",
+			  (unsigned long)num, (unsigned long)src_off,
+			  (unsigned long)dest_off));
+
+		ret = SMB_VFS_PREAD(src_fsp, vfs_cc_state->buf,
+				    this_num, src_off);
+
+		if (ret == -1) {
+			saved_errno = errno;
+			goto out;
+		}
+		if (ret != this_num) {
+			goto out;
+		}
+
+
 		ret = SMB_VFS_PWRITE(dest_fsp, vfs_cc_state->buf,
 				     this_num, dest_off);
 		if (ret == -1) {
 			saved_errno = errno;
 		}
-
+out:
+		SMB_VFS_STRICT_UNLOCK(src_fsp->conn, src_fsp, &lck);
 		SMB_VFS_STRICT_UNLOCK(dest_fsp->conn, dest_fsp, &lck);
 
+		src_off += ret;
+		dest_off += ret;
+
 		if (ret == -1) {
 			errno = saved_errno;
 			tevent_req_nterror(req, map_nt_error_from_unix(errno));
 			return tevent_req_post(req, ev);
 		}
 		if (ret != this_num) {
-			/* zero tolerance for short writes */
+			/* zero tolerance for short reads/writes */
 			tevent_req_nterror(req, NT_STATUS_IO_DEVICE_ERROR);
 			return tevent_req_post(req, ev);
 		}
-		dest_off += ret;
 
 		vfs_cc_state->copied += this_num;
 	}
-- 
2.7.4




More information about the samba-technical mailing list