[PATCH] Fix FreeBSD/Darwin and Linux sendfile
Jesse Miller
jmiller at jmiller.com
Mon Jan 30 21:11:18 UTC 2017
https://github.com/samba-team/samba/pull/76
Use poll() to emulate a blocking sendfile().
This avoids spinning on EAGAIN/EWOULDBLOCK under Linux implementation,
which greatly reduces CPU usage.
(https://lists.samba.org/archive/samba/2013-September/175838.html)
This fixes FreeBSD/Darwin implementation, which under most cases did
not work, it would break early when nwritten == 0. Which would happen
anytime it spun on EAGAIN writing 0 bytes.
(https://forums.freebsd.org/threads/55017/)
I believe other platforms sys_sendfile() code will also spin, but I don't have access to those platforms for testing, so they remain unchanged.
Jesse
diff --git a/source3/lib/sendfile.c b/source3/lib/sendfile.c
index 3d457bd6f13..f2b18b511cb 100644
--- a/source3/lib/sendfile.c
+++ b/source3/lib/sendfile.c
@@ -25,64 +25,122 @@
#include "includes.h"
+#if defined(FREEBSD_SENDFILE_API) || defined(DARWIN_SENDFILE_API) || defined(LINUX_SENDFILE_API)
+
+#include <sys/uio.h>
+#include <poll.h>
+
+ssize_t sys_sendfile_native(int tofd, int fromfd, off_t offset, size_t count, struct iovec *hv);
+
+ssize_t sys_sendfile(int tofd, int fromfd, const DATA_BLOB *header, off_t offset, size_t count)
+{
+ struct iovec hv;
+ size_t hdr_len = 0;
+ int total = 0;
+ int nwritten;
+ struct pollfd pfd;
+
+ if(header) {
+ hv.iov_base = (void *)header->data;
+ hv.iov_len = header->length;
+ hdr_len = header->length;
+ } else {
+ hv.iov_len = 0;
+ }
+
+ pfd.fd = tofd;
+ pfd.events = POLLOUT;
+
+ while(total < count + hdr_len) {
+ if((nwritten = sys_sendfile_native(tofd, fromfd, offset + total - hdr_len + hv.iov_len, count - total + hdr_len - hv.iov_len, &hv)) == -1) {
+ return -1;
+ }
+
+ total += nwritten;
+
+ if(total == count + hdr_len) {
+ break;
+ }
+
+ if(hv.iov_len > 0) {
+ if(hv.iov_len <= total) {
+ hv.iov_len = 0;
+ } else {
+ hv.iov_len -= nwritten;
+ hv.iov_base = ((uint8_t *)hv.iov_base) + nwritten;
+ }
+ }
+
+ if(poll(&pfd, 1, -1) == -1) {
+ if(errno != EINTR) {
+ return -1;
+ }
+ }
+ }
+
+ return total;
+}
+
+#endif
+
#if defined(LINUX_SENDFILE_API)
+#include <sys/types.h>
+#include <sys/socket.h>
#include <sys/sendfile.h>
+#include <unistd.h>
#ifndef MSG_MORE
#define MSG_MORE 0x8000
#endif
-ssize_t sys_sendfile(int tofd, int fromfd, const DATA_BLOB *header, off_t offset, size_t count)
+ssize_t sys_sendfile_native(int tofd, int fromfd, off_t offset, size_t count, struct iovec *hv)
{
- size_t total=0;
- ssize_t ret;
- size_t hdr_len = 0;
+ ssize_t nwritten;
+ int total = 0;
/*
* Send the header first.
* Use MSG_MORE to cork the TCP output until sendfile is called.
*/
- if (header) {
- hdr_len = header->length;
- while (total < hdr_len) {
- ret = sys_send(tofd, header->data + total,hdr_len - total, MSG_MORE);
- if (ret == -1)
+ if (hv->iov_len > 0) {
+ if((nwritten = send(tofd, hv->iov_base, hv->iov_len, MSG_MORE)) == -1) {
+ if(errno != EINTR || errno != EAGAIN || errno != EWOULDBLOCK) {
return -1;
- total += ret;
+ } else {
+ return 0;
+ }
+ }
+
+ if(nwritten < hv->iov_len) {
+ return nwritten;
}
}
- total = count;
- while (total) {
- ssize_t nwritten;
- do {
- nwritten = sendfile(tofd, fromfd, &offset, total);
- } while (nwritten == -1 && (errno == EINTR || errno == EAGAIN || errno == EWOULDBLOCK));
- if (nwritten == -1) {
- if (errno == ENOSYS || errno == EINVAL) {
+ total += nwritten;
+
+ if((nwritten = sendfile(tofd, fromfd, &offset, count)) == -1) {
+ if (errno == ENOSYS || errno == EINVAL) {
/* Ok - we're in a world of pain here. We just sent
- * the header, but the sendfile failed. We have to
- * emulate the sendfile at an upper layer before we
- * disable it's use. So we do something really ugly.
- * We set the errno to a strange value so we can detect
- * this at the upper level and take care of it without
- * layer violation. JRA.
- */
- errno = EINTR; /* Normally we can never return this. */
- }
- return -1;
+ * the header, but the sendfile failed. We have to
+ * emulate the sendfile at an upper layer before we
+ * disable it's use. So we do something really ugly.
+ * We set the errno to a strange value so we can detect
+ * this at the upper level and take care of it without
+ * layer violation. JRA.
+ */
+ errno = EINTR; /* Normally we can never return this. */
}
- if (nwritten == 0) {
- /*
- * EOF, return a short read
- */
- return hdr_len + (count - total);
+ if (errno != EINTR || errno != EAGAIN || errno != EWOULDBLOCK) {
+ return -1;
}
- total -= nwritten;
+ nwritten = 0;
}
- return count + hdr_len;
+
+ total += nwritten;
+
+ return total;
}
#elif defined(SOLARIS_SENDFILE_API)
@@ -240,63 +298,32 @@ ssize_t sys_sendfile(int tofd, int fromfd, const DATA_BLOB *header, off_t offset
#include <sys/types.h>
#include <sys/socket.h>
-#include <sys/uio.h>
-ssize_t sys_sendfile(int tofd, int fromfd,
- const DATA_BLOB *header, off_t offset, size_t count)
+ssize_t sys_sendfile_native(int tofd, int fromfd, off_t offset, size_t count, struct iovec *hv)
{
struct sf_hdtr sf_header = {0};
- struct iovec io_header = {0};
- off_t nwritten;
+ off_t nwritten;
int ret;
- if (header) {
- sf_header.headers = &io_header;
+ if (hv->iov_len > 0) {
+ sf_header.headers = hv;
sf_header.hdr_cnt = 1;
- io_header.iov_base = header->data;
- io_header.iov_len = header->length;
sf_header.trailers = NULL;
sf_header.trl_cnt = 0;
}
- while (count != 0) {
-
- nwritten = count;
+ nwritten = count;
#if defined(DARWIN_SENDFILE_API)
- /* Darwin recycles nwritten as a value-result parameter, apart from that this
- sendfile implementation is quite the same as the FreeBSD one */
- ret = sendfile(fromfd, tofd, offset, &nwritten, &sf_header, 0);
+ /* Darwin recycles nwritten as a value-result parameter, apart from that this
+ sendfile implementation is quite the same as the FreeBSD one */
+ ret = sendfile(fromfd, tofd, offset, &nwritten, &sf_header, 0);
#else
- ret = sendfile(fromfd, tofd, offset, count, &sf_header, &nwritten, 0);
+ ret = sendfile(fromfd, tofd, offset, count, &sf_header, &nwritten, 0);
#endif
- if (ret == -1 && errno != EINTR && errno != EAGAIN && errno != EWOULDBLOCK) {
- /* Send failed, we are toast. */
- return -1;
- }
- if (nwritten == 0) {
- /* EOF of offset is after EOF. */
- break;
- }
-
- if (sf_header.hdr_cnt) {
- if (io_header.iov_len <= nwritten) {
- /* Entire header was sent. */
- sf_header.headers = NULL;
- sf_header.hdr_cnt = 0;
- nwritten -= io_header.iov_len;
- } else {
- /* Partial header was sent. */
- io_header.iov_len -= nwritten;
- io_header.iov_base =
- ((uint8_t *)io_header.iov_base) + nwritten;
- nwritten = 0;
- }
- }
-
- offset += nwritten;
- count -= nwritten;
+ if (ret == -1 && errno != EINTR && errno != EAGAIN) {
+ return -1;
}
return nwritten;
More information about the samba-technical
mailing list