samba_dnsupdate timeouts (was Re: [PATCH] python indent bugfix in dns_hub.py)

Volker Lendecke Volker.Lendecke at SerNet.DE
Thu Feb 7 17:03:07 UTC 2019


On Thu, Feb 07, 2019 at 03:09:03PM +0100, Stefan Metzmacher wrote:

> The key is that we have this:
> 
> while (true) {
>    msg_ctx = messaging_init();
>    messaging_register(msg_ctx, NULL, MSG_PONG, pong_message);
>    messaging_send(msg_ctx, MSG_PING, ...)
>    TALLOC_FREE(msg_ctx);
> }
> 
> messaging_init() calls messaging_dgm_init() = > socket()/bind()
> and TALLOC_CTX() calls unlink()
> 
> As discussed catching ECONNREFUSED and retry would also be fix for the
> problem, them my samba_dnsupdate patch is just an optimization.

Attached find a patchset that *should* fix this issue. gitlab ci
running.

Volker

-- 
SerNet GmbH, Bahnhofsallee 1b, 37081 Göttingen
phone: 0551-370000-0, mailto:kontakt at sernet.de
Gesch.F.: Dr. Johannes Loxen und Reinhild Jung
AG Göttingen: HR-B 2816 - http://www.sernet.de
-------------- next part --------------
From a2f49c92d7dffd8ed884039c316dea2bece77139 Mon Sep 17 00:00:00 2001
From: Volker Lendecke <vl at samba.org>
Date: Thu, 7 Feb 2019 15:57:06 +0100
Subject: [PATCH 1/3] messages_dgm: Use saved errno value

In this case this is just a cleanup, the value has just been set by
messaging_dgm_sendmsg. But as that already saves errno into a local
variable, use that.

Signed-off-by: Volker Lendecke <vl at samba.org>
---
 source3/lib/messages_dgm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/source3/lib/messages_dgm.c b/source3/lib/messages_dgm.c
index 37eefeb0a4a..cb0c17d6c24 100644
--- a/source3/lib/messages_dgm.c
+++ b/source3/lib/messages_dgm.c
@@ -553,7 +553,7 @@ static void messaging_dgm_out_threaded_job(void *private_data)
 		if (state->sent != -1) {
 			return;
 		}
-		if (errno != ENOBUFS) {
+		if (state->err != ENOBUFS) {
 			return;
 		}
 
-- 
2.11.0


From d93d0b481c7d0e5d419e3f4c309a7619cc82edaa Mon Sep 17 00:00:00 2001
From: Volker Lendecke <vl at samba.org>
Date: Thu, 7 Feb 2019 16:15:46 +0100
Subject: [PATCH 2/3] messages_dgm: Properly handle receiver re-initialization

This only properly covers the small-message nonblocking case. Covering
the large-message and the blocking case is a much larger effort assuming
we want to re-send the failed message if parts of the message has gone
through properly. Don't do that for now.

Signed-off-by: Volker Lendecke <vl at samba.org>
---
 source3/lib/messages_dgm.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/source3/lib/messages_dgm.c b/source3/lib/messages_dgm.c
index cb0c17d6c24..aaafcc10307 100644
--- a/source3/lib/messages_dgm.c
+++ b/source3/lib/messages_dgm.c
@@ -1419,6 +1419,7 @@ int messaging_dgm_send(pid_t pid,
 	struct messaging_dgm_context *ctx = global_dgm_context;
 	struct messaging_dgm_out *out;
 	int ret;
+	unsigned retries = 0;
 
 	if (ctx == NULL) {
 		return ENOTCONN;
@@ -1426,6 +1427,7 @@ int messaging_dgm_send(pid_t pid,
 
 	messaging_dgm_validate(ctx);
 
+again:
 	ret = messaging_dgm_out_get(ctx, pid, &out);
 	if (ret != 0) {
 		return ret;
@@ -1435,6 +1437,20 @@ int messaging_dgm_send(pid_t pid,
 
 	ret = messaging_dgm_out_send_fragmented(ctx->ev, out, iov, iovlen,
 						fds, num_fds);
+	if (ret == ECONNREFUSED) {
+		/*
+		 * We cache outgoing sockets. If the receiver has
+		 * closed and re-opened the socket since our last
+		 * message, we get connection refused. Retry.
+		 */
+
+		TALLOC_FREE(out);
+
+		if (retries < 5) {
+			retries += 1;
+			goto again;
+		}
+	}
 	return ret;
 }
 
-- 
2.11.0


From d22cb9b62523b960a271af5d61a0b53233b5e715 Mon Sep 17 00:00:00 2001
From: Volker Lendecke <vl at samba.org>
Date: Thu, 7 Feb 2019 17:48:34 +0100
Subject: [PATCH 3/3] torture3: Extend read3 for the "messaging target
 re-inits" failure

Do ping_pong a hundred times, re-initializing the msg_ctx every time.

Signed-off-by: Volker Lendecke <vl at samba.org>
---
 source3/torture/test_messaging_read.c | 40 +++++++++++++++++------------------
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/source3/torture/test_messaging_read.c b/source3/torture/test_messaging_read.c
index d3e4079074b..ae75e83b1f0 100644
--- a/source3/torture/test_messaging_read.c
+++ b/source3/torture/test_messaging_read.c
@@ -250,14 +250,13 @@ fail:
 }
 
 struct msg_pingpong_state {
-	uint8_t dummy;
+	struct messaging_context *msg_ctx;
 };
 
 static void msg_pingpong_done(struct tevent_req *subreq);
 
 static struct tevent_req *msg_pingpong_send(TALLOC_CTX *mem_ctx,
 					    struct tevent_context *ev,
-					    struct messaging_context *msg_ctx,
 					    struct server_id dst)
 {
 	struct tevent_req *req, *subreq;
@@ -269,13 +268,19 @@ static struct tevent_req *msg_pingpong_send(TALLOC_CTX *mem_ctx,
 		return NULL;
 	}
 
-	status = messaging_send_buf(msg_ctx, dst, MSG_PING, NULL, 0);
+	state->msg_ctx = messaging_init(state, ev);
+	if (tevent_req_nomem(state->msg_ctx, req)) {
+		return tevent_req_post(req, ev);
+	}
+
+	status = messaging_send_buf(state->msg_ctx, dst, MSG_PING, NULL, 0);
 	if (!NT_STATUS_IS_OK(status)) {
+		DBG_DEBUG("messaging_send_buf failed: %s\n", nt_errstr(status));
 		tevent_req_error(req, map_errno_from_nt_status(status));
 		return tevent_req_post(req, ev);
 	}
 
-	subreq = messaging_read_send(state, ev, msg_ctx, MSG_PONG);
+	subreq = messaging_read_send(state, ev, state->msg_ctx, MSG_PONG);
 	if (tevent_req_nomem(subreq, req)) {
 		return tevent_req_post(req, ev);
 	}
@@ -308,18 +313,17 @@ static int msg_pingpong_recv(struct tevent_req *req)
 	return 0;
 }
 
-static int msg_pingpong(struct messaging_context *msg_ctx,
-			struct server_id dst)
+static int msg_pingpong(struct server_id dst)
 {
 	struct tevent_context *ev;
 	struct tevent_req *req;
 	int ret = ENOMEM;
 
-	ev = tevent_context_init(msg_ctx);
+	ev = tevent_context_init(talloc_tos());
 	if (ev == NULL) {
 		goto fail;
 	}
-	req = msg_pingpong_send(ev, ev, msg_ctx, dst);
+	req = msg_pingpong_send(ev, ev, dst);
 	if (req == NULL) {
 		goto fail;
 	}
@@ -398,7 +402,7 @@ bool run_messaging_read3(int dummy)
 	pid_t child;
 	int ready_pipe[2];
 	int exit_pipe[2];
-	int ret;
+	int i, ret;
 	char c;
 	struct server_id dst;
 	ssize_t written;
@@ -433,19 +437,15 @@ bool run_messaging_read3(int dummy)
 		fprintf(stderr, "tevent_context_init failed\n");
 		goto fail;
 	}
-	msg_ctx = messaging_init(ev, ev);
-	if (msg_ctx == NULL) {
-		fprintf(stderr, "messaging_init failed\n");
-		goto fail;
-	}
 
-	dst = messaging_server_id(msg_ctx);
-	dst.pid = child;
+	dst = (struct server_id){ .pid = child, .vnn = NONCLUSTER_VNN, };
 
-	ret = msg_pingpong(msg_ctx, dst);
-	if (ret != 0){
-		fprintf(stderr, "msg_pingpong failed\n");
-		goto fail;
+	for (i=0; i<100; i++) {
+		ret = msg_pingpong(dst);
+		if (ret != 0){
+			fprintf(stderr, "msg_pingpong failed\n");
+			goto fail;
+		}
 	}
 
 	printf("Parent: telling child to exit\n");
-- 
2.11.0



More information about the samba-technical mailing list