[PATCH] Change tevent "standard" backend to fall back from epoll -> poll
Jeremy Allison
jra at samba.org
Fri Feb 15 12:33:33 MST 2013
On Fri, Feb 15, 2013 at 04:59:46PM +0100, Stefan (metze) Metzmacher wrote:
>
> The HACK patches actually discovered some problems with the fallback...
>
> I spend the day with debugging and fixing...
>
> I'll do some more testing with the attached patches next week.
Ok, here are the epoll read/write multiplex patches
rebased on top of this complete patchset.
I'm running tests now with all the debugging enabled
(and one minor fix to avoid the "smbXcli" debug output
being interpreted as an smbclient prompt, see below :-).
Should help the complete review !
Cheers,
Jeremy
------------------hack fix for tests with debug output---------------
diff --git a/source3/script/tests/test_smbclient_s3.sh b/source3/script/tests/test_smbclient_s3.sh
index 596cd42..78f0b04 100755
--- a/source3/script/tests/test_smbclient_s3.sh
+++ b/source3/script/tests/test_smbclient_s3.sh
@@ -33,7 +33,7 @@ failed=0
# Test that a noninteractive smbclient does not prompt
test_noninteractive_no_prompt()
{
- prompt="smb"
+ prompt="smb:"
cmd='echo du | $SMBCLIENT "$@" -U$USERNAME%$PASSWORD //$SERVER/tmp -I $SERVER_IP $ADDARGS 2>&1'
eval echo "$cmd"
@@ -60,7 +60,7 @@ test_noninteractive_no_prompt()
# Test that an interactive smbclient prompts to stdout
test_interactive_prompt_stdout()
{
- prompt="smb"
+ prompt="smb:"
tmpfile=$PREFIX/smbclient_interactive_prompt_commands
cat > $tmpfile <<EOF
------------------hack fix for tests with debug output---------------
-------------- next part --------------
From c36ab91dfdfc6d41c37c7bc90a94c2f446cddc0f Mon Sep 17 00:00:00 2001
From: Jeremy Allison <jra at samba.org>
Date: Thu, 14 Feb 2013 13:50:56 -0800
Subject: [PATCH 1/7] Start to fix the epoll backend to support 2 fd events on
the same fd correctly.
Add a utility function epoll_add_duplicate_fd() and
a new flag EPOLL_ADDITIONAL_FD_WAS_MULTIPLEXED.
This will be called by epoll_add_event() to merge two
fde events with the same file descriptor.
Signed-off-by: Jeremy Allison <jra at samba.org>
---
lib/tevent/tevent_epoll.c | 67 +++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 67 insertions(+)
diff --git a/lib/tevent/tevent_epoll.c b/lib/tevent/tevent_epoll.c
index dcbfc16..abf5a26 100644
--- a/lib/tevent/tevent_epoll.c
+++ b/lib/tevent/tevent_epoll.c
@@ -5,6 +5,7 @@
Copyright (C) Andrew Tridgell 2003-2005
Copyright (C) Stefan Metzmacher 2005-2009
+ Copyright (C) Jeremy Allison 2013
** NOTE! The following LGPL license applies to the tevent
** library. This does NOT imply that all of Samba is released
@@ -237,6 +238,72 @@ static void epoll_check_reopen(struct epoll_event_context *epoll_ev)
#define EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT (1<<0)
#define EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR (1<<1)
#define EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR (1<<2)
+#define EPOLL_ADDITIONAL_FD_WAS_MULTIPLEXED (1<<3)
+
+/*
+ epoll cannot add the same file descriptor twice, once
+ with read, once with write which is allowed by the
+ tevent backend. Multiplex the existing fde, flag it
+ as such so we can search for the correct fde on
+ event triggering.
+*/
+
+static int epoll_add_duplicate_fd(struct epoll_event_context *epoll_ev,
+ struct tevent_fd *add_fde)
+{
+ struct epoll_event event;
+ struct tevent_fd *orig_fde;
+ int ret;
+
+ /* Find the existing fde that caused the EEXIST error. */
+ for (orig_fde = epoll_ev->ev->fd_events; orig_fde; orig_fde = orig_fde->next) {
+ if (orig_fde == add_fde) {
+ continue;
+ }
+ /*
+ * The multiplex fde must have the same fd, and also
+ * already have an epoll event attached.
+ */
+ if (orig_fde->fd == add_fde->fd &&
+ (orig_fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT)) {
+ break;
+ }
+ }
+ if (orig_fde == NULL) {
+ tevent_debug(epoll_ev->ev, TEVENT_DEBUG_FATAL,
+ "can't find multiplex fde");
+ return -1;
+ }
+
+ if (orig_fde->additional_flags & EPOLL_ADDITIONAL_FD_WAS_MULTIPLEXED) {
+ /* Logic error. Can't have more than 2 multiplexed fde's. */
+ tevent_debug(epoll_ev->ev, TEVENT_DEBUG_FATAL,
+ "multiplex fde has bad additional_flags");
+ return -1;
+ }
+
+ /* Modify the orig_fde to add in the new flags. */
+ ZERO_STRUCT(event);
+ event.events = epoll_map_flags(orig_fde->flags) |
+ epoll_map_flags(add_fde->flags);
+ event.data.ptr = orig_fde;
+ ret = epoll_ctl(epoll_ev->epoll_fd, EPOLL_CTL_MOD, orig_fde->fd, &event);
+ if (ret != 0) {
+ return ret;
+ }
+
+ /* Now flag both fde's as being multiplexed. */
+ orig_fde->additional_flags |= EPOLL_ADDITIONAL_FD_WAS_MULTIPLEXED;
+ add_fde->additional_flags |= EPOLL_ADDITIONAL_FD_WAS_MULTIPLEXED;
+ /*
+ * Make each fde->additional_data pointers point at each other
+ * so we can look them up from each other. They are now paired.
+ */
+ orig_fde->additional_data = (struct tevent_fd *)add_fde;
+ add_fde->additional_data = (struct tevent_fd *)orig_fde;
+
+ return 0;
+}
/*
add the epoll event to the given fd_event
--
1.8.1.3
From 23a67e7ccb0ee66fbf163a6849b843a54c24bf48 Mon Sep 17 00:00:00 2001
From: Jeremy Allison <jra at samba.org>
Date: Thu, 14 Feb 2013 13:52:41 -0800
Subject: [PATCH 2/7] If epoll_ctl(..EPOLL_CTL_ADD,..) failes with EEXIST,
merge the two fde's into one epoll event.
Signed-off-by: Jeremy Allison <jra at samba.org>
---
lib/tevent/tevent_epoll.c | 12 ++++++++++--
1 file changed, 10 insertions(+), 2 deletions(-)
diff --git a/lib/tevent/tevent_epoll.c b/lib/tevent/tevent_epoll.c
index abf5a26..a8e9b47 100644
--- a/lib/tevent/tevent_epoll.c
+++ b/lib/tevent/tevent_epoll.c
@@ -323,8 +323,16 @@ static void epoll_add_event(struct epoll_event_context *epoll_ev, struct tevent_
event.events = epoll_map_flags(fde->flags);
event.data.ptr = fde;
if (epoll_ctl(epoll_ev->epoll_fd, EPOLL_CTL_ADD, fde->fd, &event) != 0) {
- epoll_panic(epoll_ev, "EPOLL_CTL_ADD failed", false);
- return;
+ if (errno == EEXIST) {
+ if (epoll_add_duplicate_fd(epoll_ev, fde) != 0) {
+ epoll_panic(epoll_ev, "epoll_add_duplicate_fd "
+ "failed", false);
+ return;
+ }
+ } else {
+ epoll_panic(epoll_ev, "EPOLL_CTL_ADD failed", false);
+ return;
+ }
}
fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
--
1.8.1.3
From 7593b64cdde1b96527c7129cf926df9ca11add9d Mon Sep 17 00:00:00 2001
From: Jeremy Allison <jra at samba.org>
Date: Fri, 15 Feb 2013 10:16:44 -0800
Subject: [PATCH 3/7] Fix up epoll_del_event to cope with deleting a
multiplexed fde event.
Signed-off-by: Jeremy Allison <jra at samba.org>
---
lib/tevent/tevent_epoll.c | 31 ++++++++++++++++++++++++++++---
1 file changed, 28 insertions(+), 3 deletions(-)
diff --git a/lib/tevent/tevent_epoll.c b/lib/tevent/tevent_epoll.c
index a8e9b47..be17c07 100644
--- a/lib/tevent/tevent_epoll.c
+++ b/lib/tevent/tevent_epoll.c
@@ -348,6 +348,7 @@ static void epoll_add_event(struct epoll_event_context *epoll_ev, struct tevent_
static void epoll_del_event(struct epoll_event_context *epoll_ev, struct tevent_fd *fde)
{
struct epoll_event event;
+ int op = EPOLL_CTL_DEL;
if (epoll_ev->epoll_fd == -1) return;
@@ -356,14 +357,38 @@ static void epoll_del_event(struct epoll_event_context *epoll_ev, struct tevent_
/* if there's no epoll_event, we don't need to delete it */
if (!(fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT)) return;
+ /* Ensure we remove the flag on the correct event, even if paired. */
+ fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
+
+ if (fde->additional_flags & EPOLL_ADDITIONAL_FD_WAS_MULTIPLEXED) {
+ /*
+ * If this is a multiplexed fde, we need to replace it
+ * with the remaining paired event.
+ */
+ struct tevent_fd *mpx_fde = (struct tevent_fd *)fde->additional_data;
+ if (mpx_fde == NULL) {
+ epoll_panic(epoll_ev, "fde->additional_data==NULL", false);
+ return;
+ }
+ /* Remove multiplexed flag from both events. */
+ fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_WAS_MULTIPLEXED;
+ mpx_fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_WAS_MULTIPLEXED;
+ /* And unpair the two events. */
+ fde->additional_data = NULL;
+ mpx_fde->additional_data = NULL;
+
+ /* Modify, don't delete the remaining event. */
+ op = EPOLL_CTL_MOD;
+ fde = mpx_fde;
+ }
+
ZERO_STRUCT(event);
event.events = epoll_map_flags(fde->flags);
event.data.ptr = fde;
- if (epoll_ctl(epoll_ev->epoll_fd, EPOLL_CTL_DEL, fde->fd, &event) != 0) {
- epoll_panic(epoll_ev, "EPOLL_CTL_DEL failed", false);
+ if (epoll_ctl(epoll_ev->epoll_fd, op, fde->fd, &event) != 0) {
+ epoll_panic(epoll_ev, "epoll_ctl failed", false);
return;
}
- fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
}
/*
--
1.8.1.3
From e6de18833fc2c26602d72d709d5291581d6869f7 Mon Sep 17 00:00:00 2001
From: Jeremy Allison <jra at samba.org>
Date: Thu, 14 Feb 2013 14:14:50 -0800
Subject: [PATCH 4/7] Fix epoll_mod_event() to cope with modifying a
multiplexed fde event.
Signed-off-by: Jeremy Allison <jra at samba.org>
---
lib/tevent/tevent_epoll.c | 12 ++++++++++++
1 file changed, 12 insertions(+)
diff --git a/lib/tevent/tevent_epoll.c b/lib/tevent/tevent_epoll.c
index be17c07..2661bc2 100644
--- a/lib/tevent/tevent_epoll.c
+++ b/lib/tevent/tevent_epoll.c
@@ -403,6 +403,18 @@ static void epoll_mod_event(struct epoll_event_context *epoll_ev, struct tevent_
ZERO_STRUCT(event);
event.events = epoll_map_flags(fde->flags);
+ if (fde->additional_flags & EPOLL_ADDITIONAL_FD_WAS_MULTIPLEXED) {
+ /*
+ * This is a multiplexed fde, we need to include both
+ * flags in the modified event.
+ */
+ struct tevent_fd *mpx_fde = (struct tevent_fd *)fde->additional_data;
+ if (mpx_fde == NULL) {
+ epoll_panic(epoll_ev, "fde->additional_data==NULL", false);
+ return;
+ }
+ event.events |= epoll_map_flags(mpx_fde->flags);
+ }
event.data.ptr = fde;
if (epoll_ctl(epoll_ev->epoll_fd, EPOLL_CTL_MOD, fde->fd, &event) != 0) {
epoll_panic(epoll_ev, "EPOLL_CTL_MOD failed", false);
--
1.8.1.3
From 016158beed91f3eb55988421a5f68636577c20ae Mon Sep 17 00:00:00 2001
From: Jeremy Allison <jra at samba.org>
Date: Thu, 14 Feb 2013 15:53:38 -0800
Subject: [PATCH 5/7] Add utility function epoll_handle_hup_or_err()
We'll use this to handle the EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR
and EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR flags with multiplexed
events in the event loop.
Signed-off-by: Jeremy Allison <jra at samba.org>
---
lib/tevent/tevent_epoll.c | 31 +++++++++++++++++++++++++++++++
1 file changed, 31 insertions(+)
diff --git a/lib/tevent/tevent_epoll.c b/lib/tevent/tevent_epoll.c
index 2661bc2..3ea3ec0 100644
--- a/lib/tevent/tevent_epoll.c
+++ b/lib/tevent/tevent_epoll.c
@@ -461,6 +461,37 @@ static void epoll_change_event(struct epoll_event_context *epoll_ev, struct teve
}
/*
+ Cope with epoll returning EPOLLHUP|EPOLLERR on an event.
+ Return true if there's nothing else to do, false if
+ this event needs further handling.
+*/
+static bool epoll_handle_hup_or_err(struct epoll_event_context *epoll_ev,
+ struct tevent_fd *fde)
+{
+ if (fde == NULL) {
+ /* Nothing to do if no event. */
+ return true;
+ }
+
+ fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR;
+ /*
+ * if we only wait for TEVENT_FD_WRITE, we should not tell the
+ * event handler about it, and remove the epoll_event,
+ * as we only report errors when waiting for read events,
+ * to match the select() behavior
+ */
+ if (!(fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR)) {
+ epoll_del_event(epoll_ev, fde);
+ /* Do the same as the poll backend and
+ remove the writeable flag. */
+ fde->flags &= ~TEVENT_FD_WRITE;
+ return true;
+ }
+ /* This has TEVENT_FD_READ set, we're not finished. */
+ return false;
+}
+
+/*
event loop handling using epoll
*/
static int epoll_event_loop(struct epoll_event_context *epoll_ev, struct timeval *tvalp)
--
1.8.1.3
From 09bac3ee8476ba0b77bed7daf0e746b5353cabbb Mon Sep 17 00:00:00 2001
From: Jeremy Allison <jra at samba.org>
Date: Thu, 14 Feb 2013 15:59:40 -0800
Subject: [PATCH 6/7] In epoll_event_loop() ensure we trigger the right handler
for a multiplexed fde event.
Signed-off-by: Jeremy Allison <jra at samba.org>
---
lib/tevent/tevent_epoll.c | 38 ++++++++++++++++++++++++++++++--------
1 file changed, 30 insertions(+), 8 deletions(-)
diff --git a/lib/tevent/tevent_epoll.c b/lib/tevent/tevent_epoll.c
index 3ea3ec0..cbddc08 100644
--- a/lib/tevent/tevent_epoll.c
+++ b/lib/tevent/tevent_epoll.c
@@ -540,27 +540,49 @@ static int epoll_event_loop(struct epoll_event_context *epoll_ev, struct timeval
struct tevent_fd *fde = talloc_get_type(events[i].data.ptr,
struct tevent_fd);
uint16_t flags = 0;
+ struct tevent_fd *mpx_fde = NULL;
if (fde == NULL) {
epoll_panic(epoll_ev, "epoll_wait() gave bad data", true);
return -1;
}
- if (events[i].events & (EPOLLHUP|EPOLLERR)) {
- fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR;
+ if (fde->additional_flags & EPOLL_ADDITIONAL_FD_WAS_MULTIPLEXED) {
/*
- * if we only wait for TEVENT_FD_WRITE, we should not tell the
- * event handler about it, and remove the epoll_event,
- * as we only report errors when waiting for read events,
- * to match the select() behavior
+ * Save off the multiplexed event in case we need
+ * to use it to call the handler function.
*/
- if (!(fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR)) {
- epoll_del_event(epoll_ev, fde);
+ mpx_fde = (struct tevent_fd *)fde->additional_data;
+ if (mpx_fde == NULL) {
+ epoll_panic(epoll_ev, "logic error", true);
+ return -1;
+ }
+ }
+ if (events[i].events & (EPOLLHUP|EPOLLERR)) {
+ bool handled_fde = epoll_handle_hup_or_err(epoll_ev, fde);
+ bool handled_mpx = epoll_handle_hup_or_err(epoll_ev, mpx_fde);
+
+ if (handled_fde && handled_mpx) {
+ /* Fully dealt with this event. */
continue;
}
+ if (!handled_mpx) {
+ /*
+ * If the mpx event was the one that needs
+ * further handling, it's the TEVENT_FD_READ
+ * event so switch over and call that handler.
+ */
+ fde = mpx_fde;
+ }
flags |= TEVENT_FD_READ;
}
if (events[i].events & EPOLLIN) flags |= TEVENT_FD_READ;
if (events[i].events & EPOLLOUT) flags |= TEVENT_FD_WRITE;
+ if (mpx_fde) {
+ /* Ensure we got the right fde. */
+ if ((flags & fde->flags) == 0) {
+ fde = mpx_fde;
+ }
+ }
if (flags) {
fde->handler(epoll_ev->ev, fde, flags, fde->private_data);
break;
--
1.8.1.3
From e7c948e9fd950b62f21da825a657ae19db73467f Mon Sep 17 00:00:00 2001
From: Jeremy Allison <jra at samba.org>
Date: Thu, 14 Feb 2013 14:16:31 -0800
Subject: [PATCH 7/7] Regression test to ensure that a tevent backend can cope
with separate read/write events on a single fd.
This tests the multiplex fd changes to the epoll backend to
ensure they work correctly.
Signed-off-by: Jeremy Allison <jra at samba.org>
---
lib/tevent/testsuite.c | 52 ++++++++++++++++++++++++++++++++++----------------
1 file changed, 36 insertions(+), 16 deletions(-)
diff --git a/lib/tevent/testsuite.c b/lib/tevent/testsuite.c
index 3d2a79a..a6ab583 100644
--- a/lib/tevent/testsuite.c
+++ b/lib/tevent/testsuite.c
@@ -4,6 +4,7 @@
testing of the events subsystem
Copyright (C) Stefan Metzmacher 2006-2009
+ Copyright (C) Jeremy Allison 2013
** NOTE! The following LGPL license applies to the tevent
** library. This does NOT imply that all of Samba is released
@@ -34,19 +35,26 @@
#endif
static int fde_count;
+static int fde_wcount;
-static void fde_handler(struct tevent_context *ev_ctx, struct tevent_fd *f,
+static void fde_handler_readwrite(struct tevent_context *ev_ctx, struct tevent_fd *f,
uint16_t flags, void *private_data)
{
int *fd = (int *)private_data;
- char c;
+ char c = 0;
#ifdef SA_SIGINFO
kill(getpid(), SIGUSR1);
#endif
kill(getpid(), SIGALRM);
- read(fd[0], &c, 1);
- write(fd[1], &c, 1);
- fde_count++;
+
+ if ((flags & TEVENT_FD_WRITE) && (fde_wcount - fde_count < 256)) {
+ /* Don't fill the pipe and block... */
+ write(fd[1], &c, 1);
+ fde_wcount++;
+ } else {
+ read(fd[0], &c, 1);
+ fde_count++;
+ }
}
static void finished_handler(struct tevent_context *ev_ctx, struct tevent_timer *te,
@@ -70,7 +78,10 @@ static bool test_event_context(struct torture_context *test,
int fd[2] = { -1, -1 };
const char *backend = (const char *)test_data;
int alarm_count=0, info_count=0;
- struct tevent_fd *fde;
+ struct tevent_fd *fde_read;
+ struct tevent_fd *fde_read_1;
+ struct tevent_fd *fde_write;
+ struct tevent_fd *fde_write_1;
#ifdef SA_RESTART
struct tevent_signal *se1 = NULL;
#endif
@@ -80,7 +91,6 @@ static bool test_event_context(struct torture_context *test,
#endif
int finished=0;
struct timeval t;
- char c = 0;
ev_ctx = tevent_context_init_byname(test, backend);
if (ev_ctx == NULL) {
@@ -92,13 +102,23 @@ static bool test_event_context(struct torture_context *test,
/* reset globals */
fde_count = 0;
+ fde_wcount = 0;
/* create a pipe */
pipe(fd);
- fde = tevent_add_fd(ev_ctx, ev_ctx, fd[0], TEVENT_FD_READ,
- fde_handler, fd);
- tevent_fd_set_auto_close(fde);
+ fde_read = tevent_add_fd(ev_ctx, ev_ctx, fd[0], TEVENT_FD_READ,
+ fde_handler_readwrite, fd);
+ fde_write_1 = tevent_add_fd(ev_ctx, ev_ctx, fd[0], TEVENT_FD_WRITE,
+ fde_handler_readwrite, fd);
+
+ fde_write = tevent_add_fd(ev_ctx, ev_ctx, fd[1], TEVENT_FD_WRITE,
+ fde_handler_readwrite, fd);
+ fde_read_1 = tevent_add_fd(ev_ctx, ev_ctx, fd[1], TEVENT_FD_READ,
+ fde_handler_readwrite, fd);
+
+ tevent_fd_set_auto_close(fde_read);
+ tevent_fd_set_auto_close(fde_write);
tevent_add_timer(ev_ctx, ev_ctx, timeval_current_ofs(2,0),
finished_handler, &finished);
@@ -113,8 +133,6 @@ static bool test_event_context(struct torture_context *test,
se3 = tevent_add_signal(ev_ctx, ev_ctx, SIGUSR1, SA_SIGINFO, count_handler, &info_count);
#endif
- write(fd[1], &c, 1);
-
t = timeval_current();
while (!finished) {
errno = 0;
@@ -124,8 +142,10 @@ static bool test_event_context(struct torture_context *test,
}
}
- talloc_free(fde);
- close(fd[1]);
+ talloc_free(fde_read);
+ talloc_free(fde_write);
+ talloc_free(fde_read_1);
+ talloc_free(fde_write_1);
while (alarm_count < fde_count+1) {
if (tevent_loop_once(ev_ctx) == -1) {
@@ -139,11 +159,11 @@ static bool test_event_context(struct torture_context *test,
talloc_free(se1);
#endif
- torture_assert_int_equal(test, alarm_count, 1+fde_count, "alarm count mismatch");
+ torture_assert_int_equal(test, alarm_count, 1+fde_count+fde_wcount, "alarm count mismatch");
#ifdef SA_SIGINFO
talloc_free(se3);
- torture_assert_int_equal(test, info_count, fde_count, "info count mismatch");
+ torture_assert_int_equal(test, info_count, fde_count+fde_wcount, "info count mismatch");
#endif
talloc_free(ev_ctx);
--
1.8.1.3
More information about the samba-technical
mailing list