[PATCHES] Cleanup dead records in messages.tdb

Christof Schmitt cs at samba.org
Wed Apr 2 18:21:51 MDT 2014


On Wed, Apr 02, 2014 at 04:57:03PM -0700, Jeremy Allison wrote:
> On Wed, Apr 02, 2014 at 10:51:47AM +0200, Volker Lendecke wrote:
> > On Tue, Apr 01, 2014 at 01:49:13PM -0700, Christof Schmitt wrote:
> > > When a smbd process dies, pending messages.tdb records for this process
> > > will not get cleaned up. I have seen one case where the messages.tdb
> > > file grew very large after I/O requests were delayed and smbd child
> > > processes died.
> > > 
> > > These patches implement cleanup of messages.tdb records that is
> > > triggered after a smbd dies; any record with an invalid key or a
> > > non-existing PID will get deleted.
> > 
> > Can we avoid the traverse in the normal case? Is it really
> > that the smbds in your case left corrupt keys behind? In
> > remove_child_pid we know which child died, so we could
> > directly check and delete its record. Traverses always scare
> > me :-)
> 
> OK, here is a version that doesn't traverse, but
> only cleans up the records for the affected
> pid.
> 
> Christof, it's based on your idea and Volker's
> comments. Can you review and let me know what
> you think ?

I assume that the !procid_is_local check is just a defensive
programming, the function should never get called for a remote PID. A
minor question would be why you have messaging_tdb_cleanup return a
status, only to ignore it in the caller.

Overall it looks good:
Reviewed-by: Christof Schmitt <cs at samba.org>

> 
> Thanks,
> 
>         Jeremy.

> From 58b3923b4e2b285f3ad039cda791326da6ba6e0a Mon Sep 17 00:00:00 2001
> From: Jeremy Allison <jra at samba.org>
> Date: Wed, 2 Apr 2014 16:45:25 -0700
> Subject: [PATCH] s3: messages: Implement cleanup of dead records.
> 
> When a smbd process dies, pending messages.tdb records for this process
> might not get cleaned up. Implement a cleanup for dead records that is
> triggered after a smbd dies uncleanly; the records for that PID are
> deleted.
> 
> Based on a patchset from Christof Schmitt <cs at samba.org>.
> 
> Signed-off-by: Jeremy Allison <jra at samba.org>
> ---
>  source3/include/messages.h   |  6 ++++++
>  source3/lib/messages.c       | 17 +++++++++++++++++
>  source3/lib/messages_local.c | 38 ++++++++++++++++++++++++++++++++++++++
>  source3/smbd/server.c        |  7 +++++++
>  4 files changed, 68 insertions(+)
> 
> diff --git a/source3/include/messages.h b/source3/include/messages.h
> index d7a2853..47c5f7a 100644
> --- a/source3/include/messages.h
> +++ b/source3/include/messages.h
> @@ -97,6 +97,9 @@ NTSTATUS messaging_tdb_init(struct messaging_context *msg_ctx,
>  
>  bool messaging_tdb_parent_init(TALLOC_CTX *mem_ctx);
>  
> +NTSTATUS messaging_tdb_cleanup(struct messaging_context *msg_ctx,
> +			struct server_id pid);
> +
>  NTSTATUS messaging_ctdbd_init(struct messaging_context *msg_ctx,
>  			      TALLOC_CTX *mem_ctx,
>  			      struct messaging_backend **presult);
> @@ -143,6 +146,9 @@ struct tevent_req *messaging_read_send(TALLOC_CTX *mem_ctx,
>  int messaging_read_recv(struct tevent_req *req, TALLOC_CTX *mem_ctx,
>  			struct messaging_rec **presult);
>  
> +void messaging_cleanup_server(struct messaging_context *msg_ctx,
> +				struct server_id pid);
> +
>  #include "librpc/gen_ndr/ndr_messaging.h"
>  
>  #endif
> diff --git a/source3/lib/messages.c b/source3/lib/messages.c
> index 96b6b88..4ff933d 100644
> --- a/source3/lib/messages.c
> +++ b/source3/lib/messages.c
> @@ -567,4 +567,21 @@ void messaging_dispatch_rec(struct messaging_context *msg_ctx,
>  	return;
>  }
>  
> +/*
> +  Call when a process has terminated abnormally.
> +*/
> +void messaging_cleanup_server(struct messaging_context *msg_ctx,
> +				struct server_id server)
> +{
> +	if (server_id_is_disconnected(&server)) {
> +		return;
> +	}
> +
> +	if (!procid_is_local(&server)) {
> +		return;
> +	}
> +
> +	(void)messaging_tdb_cleanup(msg_ctx, server);
> +
> +}
>  /** @} **/
> diff --git a/source3/lib/messages_local.c b/source3/lib/messages_local.c
> index 1fe89c3..d535df1 100644
> --- a/source3/lib/messages_local.c
> +++ b/source3/lib/messages_local.c
> @@ -45,6 +45,7 @@
>  #include "includes.h"
>  #include "system/filesys.h"
>  #include "messages.h"
> +#include "serverid.h"
>  #include "lib/tdb_wrap/tdb_wrap.h"
>  #include "lib/param/param.h"
>  
> @@ -221,6 +222,43 @@ static TDB_DATA message_key_pid(TALLOC_CTX *mem_ctx, struct server_id pid)
>  	return kbuf;
>  }
>  
> +/*******************************************************************
> + Called when a process has terminated abnormally. Remove all messages
> + pending for it.
> +******************************************************************/
> +
> +NTSTATUS messaging_tdb_cleanup(struct messaging_context *msg_ctx,
> +				struct server_id pid)
> +{
> +	struct messaging_tdb_context *ctx = talloc_get_type(
> +					msg_ctx->local->private_data,
> +					struct messaging_tdb_context);
> +	struct tdb_wrap *tdb = ctx->tdb;
> +	TDB_DATA key;
> +	TALLOC_CTX *frame = talloc_stackframe();
> +
> +	key = message_key_pid(frame, pid);
> +	/*
> +	 * We have to lock the key to avoid
> +	 * races in case the server_id was
> +	 * re-used and is active (a remote
> +	 * possibility, true). We only
> +	 * clean up the database if we
> +	 * know server_id doesn't exist
> +	 * while checked under the chainlock.
> +	 */
> +	if (tdb_chainlock(tdb->tdb, key) != 0) {
> +		TALLOC_FREE(frame);
> +		return NT_STATUS_LOCK_NOT_GRANTED;
> +	}
> +	if (!serverid_exists(&pid)) {
> +		(void)tdb_delete(tdb->tdb, key);
> +	}
> +	tdb_chainunlock(tdb->tdb, key);
> +	TALLOC_FREE(frame);
> +	return NT_STATUS_OK;
> +}
> +
>  /*
>    Fetch the messaging array for a process
>   */
> diff --git a/source3/smbd/server.c b/source3/smbd/server.c
> index 29e688d..bc9d293 100644
> --- a/source3/smbd/server.c
> +++ b/source3/smbd/server.c
> @@ -481,6 +481,13 @@ static void remove_child_pid(struct smbd_parent_context *parent,
>  						parent);
>  			DEBUG(1,("Scheduled cleanup of brl and lock database after unclean shutdown\n"));
>  		}
> +
> +		/*
> +		 * Ensure we flush any stored messages
> +		 * queued for the child process that
> +		 * terminated uncleanly.
> +		 */
> +		messaging_cleanup_server(parent->msg_ctx, child_id);
>  	}
>  
>  	if (!serverid_deregister(child_id)) {
> -- 
> 1.9.1.423.g4596e3a
> 



More information about the samba-technical mailing list