[PATCH] s3-messaging/ctdb is broken in master

Ralph Boehme slow at samba.org
Sun Jul 10 12:40:08 UTC 2016


On Sun, Jul 10, 2016 at 02:04:35PM +0200, Volker Lendecke wrote:
> On Sun, Jul 10, 2016 at 01:36:31PM +0200, Ralph Boehme wrote:
> > It seems s3-messaging/ctdb is broken in master. After an smbd is
> > forked to handle a session it crashed when trying to do messaging with
> > ctdb but the IPC fd gives EBADF.
> > 
> > Turns out the commits
> > 
> >   3fe3226daa8488e0fa787c40359c3401b6f05fc0 and
> >   3fe3226daa8488e0fa787c40359c3401b6f05fc0^
> > 
> > introduced a regression.
> 
> ooops...

sorry for missing this in the review, the patches by itself looked
good.

Isn't the lack of ctdb coverage in autobuild really frightening? Iirc
obnox has worked on it somewhat recently, maybe we should make this an
coordinated effort.

> > Please review and push if ok.
> 
> Of course it looks good, however -- why do you remove the error
> information by always doing "return EIO;" instead of "return ret;"?

sorry, obviously wrong, updated patchset attached.

Cheerio!
-slow
-------------- next part --------------
From f9c74dc57c4c3bf197a29cbf53bb8b8f0ca76304 Mon Sep 17 00:00:00 2001
From: Ralph Boehme <slow at samba.org>
Date: Sat, 9 Jul 2016 08:48:49 +0200
Subject: [PATCH 1/5] ctdbd_conn: split ctdbd_init_connection()

Split ctdbd_init_connection() into an internal function that does the
connection setup and only keep the conn object allocation in
ctdbd_init_connection().

This is in preperation of adding ctdbd_reinit_connection() which will
use the new internal function as well.

Signed-off-by: Ralph Boehme <slow at samba.org>
---
 source3/lib/ctdbd_conn.c | 46 ++++++++++++++++++++++++++++++----------------
 1 file changed, 30 insertions(+), 16 deletions(-)

diff --git a/source3/lib/ctdbd_conn.c b/source3/lib/ctdbd_conn.c
index d073c72..a86fe7e 100644
--- a/source3/lib/ctdbd_conn.c
+++ b/source3/lib/ctdbd_conn.c
@@ -405,20 +405,13 @@ static int ctdbd_connection_destructor(struct ctdbd_connection *c)
  * Get us a ctdbd connection
  */
 
-int ctdbd_init_connection(TALLOC_CTX *mem_ctx,
-			  const char *sockname, int timeout,
-			  struct ctdbd_connection **pconn)
+static int ctdbd_init_connection_internal(TALLOC_CTX *mem_ctx,
+					  const char *sockname, int timeout,
+					  struct ctdbd_connection *conn)
 {
-	struct ctdbd_connection *conn;
 	int ret;
 
-	if (!(conn = talloc_zero(mem_ctx, struct ctdbd_connection))) {
-		DEBUG(0, ("talloc failed\n"));
-		return ENOMEM;
-	}
-
 	conn->timeout = timeout;
-
 	if (conn->timeout == 0) {
 		conn->timeout = -1;
 	}
@@ -426,31 +419,52 @@ int ctdbd_init_connection(TALLOC_CTX *mem_ctx,
 	ret = ctdbd_connect(sockname, &conn->fd);
 	if (ret != 0) {
 		DEBUG(1, ("ctdbd_connect failed: %s\n", strerror(ret)));
-		goto fail;
+		return ret;
 	}
 	talloc_set_destructor(conn, ctdbd_connection_destructor);
 
 	ret = get_cluster_vnn(conn, &conn->our_vnn);
-
 	if (ret != 0) {
 		DEBUG(10, ("get_cluster_vnn failed: %s\n", strerror(ret)));
-		goto fail;
+		return ret;
 	}
 
 	if (!ctdbd_working(conn, conn->our_vnn)) {
 		DEBUG(2, ("Node is not working, can not connect\n"));
-		ret = EIO;
-		goto fail;
+		return EIO;
 	}
 
 	generate_random_buffer((unsigned char *)&conn->rand_srvid,
 			       sizeof(conn->rand_srvid));
 
 	ret = register_with_ctdbd(conn, conn->rand_srvid, NULL, NULL);
-
 	if (ret != 0) {
 		DEBUG(5, ("Could not register random srvid: %s\n",
 			  strerror(ret)));
+		return ret;
+	}
+
+	return 0;
+}
+
+int ctdbd_init_connection(TALLOC_CTX *mem_ctx,
+			  const char *sockname, int timeout,
+			  struct ctdbd_connection **pconn)
+{
+	struct ctdbd_connection *conn;
+	int ret;
+
+	if (!(conn = talloc_zero(mem_ctx, struct ctdbd_connection))) {
+		DEBUG(0, ("talloc failed\n"));
+		return ENOMEM;
+	}
+
+	ret = ctdbd_init_connection_internal(mem_ctx,
+					     sockname,
+					     timeout,
+					     conn);
+	if (ret != 0) {
+		DEBUG(0, ("ctdbd_init_connection_internal failed\n"));
 		goto fail;
 	}
 
-- 
2.5.0


From fac2e2d08b794c59479f2fe2ace614211243c1a3 Mon Sep 17 00:00:00 2001
From: Ralph Boehme <slow at samba.org>
Date: Sat, 9 Jul 2016 08:59:09 +0200
Subject: [PATCH 2/5] ctdbd_conn: add ctdbd_reinit_connection()

Signed-off-by: Ralph Boehme <slow at samba.org>
---
 source3/include/ctdbd_conn.h |  3 +++
 source3/lib/ctdbd_conn.c     | 24 ++++++++++++++++++++++++
 2 files changed, 27 insertions(+)

diff --git a/source3/include/ctdbd_conn.h b/source3/include/ctdbd_conn.h
index 11e71ba..bbebbce 100644
--- a/source3/include/ctdbd_conn.h
+++ b/source3/include/ctdbd_conn.h
@@ -33,6 +33,9 @@ struct messaging_rec;
 int ctdbd_init_connection(TALLOC_CTX *mem_ctx,
 			  const char *sockname, int timeout,
 			  struct ctdbd_connection **pconn);
+int ctdbd_reinit_connection(TALLOC_CTX *mem_ctx,
+			    const char *sockname, int timeout,
+			    struct ctdbd_connection *conn);
 
 uint32_t ctdbd_vnn(const struct ctdbd_connection *conn);
 
diff --git a/source3/lib/ctdbd_conn.c b/source3/lib/ctdbd_conn.c
index a86fe7e..32cbd3a 100644
--- a/source3/lib/ctdbd_conn.c
+++ b/source3/lib/ctdbd_conn.c
@@ -476,6 +476,30 @@ int ctdbd_init_connection(TALLOC_CTX *mem_ctx,
 	return ret;
 }
 
+int ctdbd_reinit_connection(TALLOC_CTX *mem_ctx,
+			    const char *sockname, int timeout,
+			    struct ctdbd_connection *conn)
+{
+	int ret;
+
+	ret = ctdbd_connection_destructor(conn);
+	if (ret != 0) {
+		DBG_ERR("ctdbd_connection_destructor failed\n");
+		return ret;
+	}
+
+	ret = ctdbd_init_connection_internal(mem_ctx,
+					     sockname,
+					     timeout,
+					     conn);
+	if (ret != 0) {
+		DBG_ERR("ctdbd_init_connection_internal failed\n");
+		return ret;
+	}
+
+	return 0;
+}
+
 int ctdbd_conn_get_fd(struct ctdbd_connection *conn)
 {
 	return conn->fd;
-- 
2.5.0


From 171b36a1800c2a67d5c88a85304586eb51575214 Mon Sep 17 00:00:00 2001
From: Ralph Boehme <slow at samba.org>
Date: Sat, 9 Jul 2016 13:20:01 +0200
Subject: [PATCH 3/5] s3-messaging/ctdb: split messaging_ctdbd_init()

Split out and internal function from messaging_ctdbd_init() that does
the connection setup. Keep the conn object allocation in
messaging_ctdbd_init().

This is in preperation of adding messaging_ctdbd_reinit() which will use
the new internal function as well.

messaging_ctdbd_init_internal() has a new reinit flag,
messaging_ctdbd_init() calls with reinit=false resulting in unmodified
behaviour.

Signed-off-by: Ralph Boehme <slow at samba.org>
---
 source3/lib/messages_ctdbd.c | 80 +++++++++++++++++++++++++++++---------------
 1 file changed, 53 insertions(+), 27 deletions(-)

diff --git a/source3/lib/messages_ctdbd.c b/source3/lib/messages_ctdbd.c
index 1645ccf..5a9fefc 100644
--- a/source3/lib/messages_ctdbd.c
+++ b/source3/lib/messages_ctdbd.c
@@ -174,41 +174,40 @@ static void messaging_ctdbd_readable(struct tevent_context *ev,
 	ctdbd_socket_readable(conn);
 }
 
-int messaging_ctdbd_init(struct messaging_context *msg_ctx,
-			 TALLOC_CTX *mem_ctx,
-			 struct messaging_backend **presult)
+static int messaging_ctdbd_init_internal(struct messaging_context *msg_ctx,
+					 TALLOC_CTX *mem_ctx,
+					 struct messaging_ctdbd_context *ctx,
+					 bool reinit)
 {
-	struct messaging_backend *result;
-	struct messaging_ctdbd_context *ctx;
 	struct tevent_context *ev;
 	int ret, ctdb_fd;
 
-	if (!(result = talloc(mem_ctx, struct messaging_backend))) {
-		DEBUG(0, ("talloc failed\n"));
-		return ENOMEM;
-	}
-
-	if (!(ctx = talloc(result, struct messaging_ctdbd_context))) {
-		DEBUG(0, ("talloc failed\n"));
-		TALLOC_FREE(result);
-		return ENOMEM;
-	}
-
-	ret = ctdbd_init_connection(ctx, lp_ctdbd_socket(),
-				    lp_ctdb_timeout(), &ctx->conn);
-
-	if (ret != 0) {
-		DBG_DEBUG("ctdbd_init_connection failed: %s\n",
-			  strerror(ret));
-		TALLOC_FREE(result);
-		return ret;
+	if (reinit) {
+		ret = ctdbd_reinit_connection(ctx,
+					      lp_ctdbd_socket(),
+					      lp_ctdb_timeout(),
+					      ctx->conn);
+		if (ret != 0) {
+			DBG_ERR("ctdbd_reinit_connection failed: %s\n",
+				strerror(ret));
+			return ret;
+		}
+	} else {
+		ret = ctdbd_init_connection(ctx,
+					    lp_ctdbd_socket(),
+					    lp_ctdb_timeout(),
+					    &ctx->conn);
+		if (ret != 0) {
+			DBG_ERR("ctdbd_init_connection failed: %s\n",
+				strerror(ret));
+			return ret;
+		}
 	}
 
 	ret = register_with_ctdbd(ctx->conn, MSG_SRVID_SAMBA, NULL, NULL);
 	if (ret != 0) {
 		DBG_DEBUG("Could not register MSG_SRVID_SAMBA: %s\n",
 			  strerror(ret));
-		TALLOC_FREE(result);
 		return ret;
 	}
 
@@ -217,7 +216,6 @@ int messaging_ctdbd_init(struct messaging_context *msg_ctx,
 	if (ret != 0) {
 		DEBUG(10, ("register_with_ctdbd failed: %s\n",
 			   strerror(ret)));
-		TALLOC_FREE(result);
 		return ret;
 	}
 
@@ -227,7 +225,6 @@ int messaging_ctdbd_init(struct messaging_context *msg_ctx,
 	ctx->fde = tevent_add_fd(ev, ctx, ctdb_fd, TEVENT_FD_READ,
 				 messaging_ctdbd_readable, ctx->conn);
 	if (ctx->fde == NULL) {
-		TALLOC_FREE(result);
 		return ENOMEM;
 	}
 
@@ -237,6 +234,35 @@ int messaging_ctdbd_init(struct messaging_context *msg_ctx,
 
 	set_my_vnn(ctdbd_vnn(ctx->conn));
 
+	return 0;
+}
+
+int messaging_ctdbd_init(struct messaging_context *msg_ctx,
+			 TALLOC_CTX *mem_ctx,
+			 struct messaging_backend **presult)
+{
+	struct messaging_backend *result;
+	struct messaging_ctdbd_context *ctx;
+	struct tevent_context *ev;
+	int ret, ctdb_fd;
+
+	if (!(result = talloc(mem_ctx, struct messaging_backend))) {
+		DEBUG(0, ("talloc failed\n"));
+		return ENOMEM;
+	}
+
+	if (!(ctx = talloc(result, struct messaging_ctdbd_context))) {
+		DEBUG(0, ("talloc failed\n"));
+		TALLOC_FREE(result);
+		return ENOMEM;
+	}
+
+	ret = messaging_ctdbd_init_internal(msg_ctx, mem_ctx, ctx, false);
+	if (ret != 0) {
+		TALLOC_FREE(result);
+		return ret;
+	}
+
 	result->send_fn = messaging_ctdb_send;
 	result->private_data = (void *)ctx;
 
-- 
2.5.0


From e4492c3511a957e35fa8327b7523c27cd30de7a4 Mon Sep 17 00:00:00 2001
From: Ralph Boehme <slow at samba.org>
Date: Sat, 9 Jul 2016 14:30:35 +0200
Subject: [PATCH 4/5] s3-messaging/ctdb: add messaging_ctdbd_reinit()

Signed-off-by: Ralph Boehme <slow at samba.org>
---
 source3/include/messages.h   |  3 +++
 source3/lib/ctdb_dummy.c     |  7 +++++++
 source3/lib/messages_ctdbd.c | 16 ++++++++++++++++
 3 files changed, 26 insertions(+)

diff --git a/source3/include/messages.h b/source3/include/messages.h
index 8bbe026..2eaf146 100644
--- a/source3/include/messages.h
+++ b/source3/include/messages.h
@@ -79,6 +79,9 @@ struct messaging_backend {
 int messaging_ctdbd_init(struct messaging_context *msg_ctx,
 			 TALLOC_CTX *mem_ctx,
 			 struct messaging_backend **presult);
+int messaging_ctdbd_reinit(struct messaging_context *msg_ctx,
+			   TALLOC_CTX *mem_ctx,
+			   struct messaging_backend *backend);
 struct ctdbd_connection *messaging_ctdbd_connection(void);
 
 bool message_send_all(struct messaging_context *msg_ctx,
diff --git a/source3/lib/ctdb_dummy.c b/source3/lib/ctdb_dummy.c
index ec0bcc4..8b617ba 100644
--- a/source3/lib/ctdb_dummy.c
+++ b/source3/lib/ctdb_dummy.c
@@ -83,6 +83,13 @@ int messaging_ctdbd_init(struct messaging_context *msg_ctx,
 	return ENOSYS;
 }
 
+int messaging_ctdbd_reinit(struct messaging_context *msg_ctx,
+			   TALLOC_CTX *mem_ctx,
+			   struct messaging_backend *backend)
+{
+	return ENOSYS;
+}
+
 struct ctdbd_connection *messaging_ctdbd_connection(void)
 {
 	return NULL;
diff --git a/source3/lib/messages_ctdbd.c b/source3/lib/messages_ctdbd.c
index 5a9fefc..519f4e6 100644
--- a/source3/lib/messages_ctdbd.c
+++ b/source3/lib/messages_ctdbd.c
@@ -269,3 +269,19 @@ int messaging_ctdbd_init(struct messaging_context *msg_ctx,
 	*presult = result;
 	return 0;
 }
+
+int messaging_ctdbd_reinit(struct messaging_context *msg_ctx,
+			   TALLOC_CTX *mem_ctx,
+			   struct messaging_backend *backend)
+{
+	struct messaging_ctdbd_context *ctx = talloc_get_type_abort(
+		backend->private_data, struct messaging_ctdbd_context);
+	int ret;
+
+	ret = messaging_ctdbd_init_internal(msg_ctx, mem_ctx, ctx, true);
+	if (ret != 0) {
+		return ret;
+	}
+
+	return 0;
+}
-- 
2.5.0


From 70a3b637fbc5f70405f33c3c6961f8feca1fca52 Mon Sep 17 00:00:00 2001
From: Ralph Boehme <slow at samba.org>
Date: Sat, 9 Jul 2016 14:33:52 +0200
Subject: [PATCH 5/5] s3-messaging: use messaging_ctdbd_reinit() in
 messaging_reinit()

This is the last step to fix a regression introduced by

  3fe3226daa8488e0fa787c40359c3401b6f05fc0 and
  3fe3226daa8488e0fa787c40359c3401b6f05fc0^

where we pass the ctdb-messaging object conn to db_open() and add a
reference to it to the private db_ctdb_ctx for later use. Unfortunately
reinit_after_fork() destroys conn, leaving us with an invalid reference.

The previous patches added new lower level functions
messaging_ctdbd_reinit() and ctdbd_reinit_connection(), finally use them
them from messaging_reinit(). They preserve the conn object and simply
reinitialize the IPC fd.

Signed-off-by: Ralph Boehme <slow at samba.org>
---
 source3/lib/messages.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/source3/lib/messages.c b/source3/lib/messages.c
index 65e975e..5d90947 100644
--- a/source3/lib/messages.c
+++ b/source3/lib/messages.c
@@ -416,11 +416,9 @@ NTSTATUS messaging_reinit(struct messaging_context *msg_ctx)
 		return map_nt_error_from_unix(ret);
 	}
 
-	TALLOC_FREE(msg_ctx->remote);
-
 	if (lp_clustering()) {
-		ret = messaging_ctdbd_init(msg_ctx, msg_ctx,
-					   &msg_ctx->remote);
+		ret = messaging_ctdbd_reinit(msg_ctx, msg_ctx,
+					     msg_ctx->remote);
 
 		if (ret != 0) {
 			DEBUG(1, ("messaging_ctdbd_init failed: %s\n",
-- 
2.5.0



More information about the samba-technical mailing list