[SCM] CTDB repository - branch 1.3 updated - ctdb-1.9.1-320-gef6cd76

Ronnie Sahlberg sahlberg at samba.org
Tue Feb 22 22:23:38 MST 2011


The branch, 1.3 has been updated
       via  ef6cd76866bfaff3f462ef71f9dba028fac4f3ae (commit)
       via  1f7c408e04b67145757d24b86b30a52865a8403b (commit)
       via  b1a5d2abafa69861cda29b694b35998a310ccc3e (commit)
       via  40cc810b184970d1ef20ad5078f30132db233cee (commit)
       via  6163e62da18767b65bbb524b7d351b38b373c8bf (commit)
      from  771e31fbfafed5986aba01c1392385dc4086eccd (commit)

http://gitweb.samba.org/?p=sahlberg/ctdb.git;a=shortlog;h=1.3


- Log -----------------------------------------------------------------
commit ef6cd76866bfaff3f462ef71f9dba028fac4f3ae
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Wed Feb 23 15:46:36 2011 +1100

    Deferred attach : at early startup, defer any db attach calls until we are out of recovery.

commit 1f7c408e04b67145757d24b86b30a52865a8403b
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Mon Feb 21 16:48:18 2011 +1100

    new version 1.3.1

commit b1a5d2abafa69861cda29b694b35998a310ccc3e
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Mon Feb 21 16:47:45 2011 +1100

    50.samba   run the smbcontrol in the background. no need to block waiting for it.

commit 40cc810b184970d1ef20ad5078f30132db233cee
Merge: 771e31fbfafed5986aba01c1392385dc4086eccd 6163e62da18767b65bbb524b7d351b38b373c8bf
Author: Ronnie Sahlberg <sahlberg at lenovo-laptop.(none)>
Date:   Mon Feb 21 16:19:18 2011 +1100

    Merge branch '1.3' of 10.1.1.27:/shared/ctdb/ctdb-git into 1.3

commit 6163e62da18767b65bbb524b7d351b38b373c8bf
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Fri Feb 18 11:21:19 2011 +1100

    ctdb_req_dmaster from non-master
    
    If we find a situatior where we get a stray packet with the wrong
    dmaster, dont suicide with ctdb_fatal() since this is too disruptive.
    Just drop the stray packet and force a recovery to make sure all is good again.
    
    CQ S1022004

-----------------------------------------------------------------------

Summary of changes:
 config/events.d/50.samba   |    2 +-
 include/ctdb_private.h     |   11 ++++-
 packaging/RPM/ctdb.spec.in |    5 ++-
 server/ctdb_call.c         |    4 +-
 server/ctdb_control.c      |    4 +-
 server/ctdb_ltdb_server.c  |  106 +++++++++++++++++++++++++++++++++++++++-----
 server/ctdb_recover.c      |   10 ++++
 server/ctdb_tunables.c     |    3 +-
 8 files changed, 127 insertions(+), 18 deletions(-)


Changeset truncated at 500 lines:

diff --git a/config/events.d/50.samba b/config/events.d/50.samba
index 415b89f..e8e3366 100755
--- a/config/events.d/50.samba
+++ b/config/events.d/50.samba
@@ -275,7 +275,7 @@ case "$1" in
 	ip=$3
 	maskbits=$4
 
-	smbcontrol winbindd ip-dropped $ip >/dev/null 2>/dev/null
+	smbcontrol winbindd ip-dropped $ip >/dev/null 2>/dev/null &
 	;;
     *)
 	ctdb_standard_event_handler "$@"
diff --git a/include/ctdb_private.h b/include/ctdb_private.h
index 4dcf9a5..c1499b4 100644
--- a/include/ctdb_private.h
+++ b/include/ctdb_private.h
@@ -118,6 +118,7 @@ struct ctdb_tunable {
 	uint32_t use_status_events_for_monitoring;
 	uint32_t allow_unhealthy_db_read;
 	uint32_t stat_history_interval;
+	uint32_t deferred_attach_timeout;
 };
 
 /*
@@ -488,6 +489,9 @@ struct ctdb_context {
 
 	/* used in the recovery daemon to remember the ip allocation */
 	struct trbt_tree *ip_tree;
+
+	/* Used to defer db attach requests while in recovery mode */
+	struct ctdb_deferred_attach_context *deferred_attach;
 };
 
 struct ctdb_db_context {
@@ -799,7 +803,10 @@ int ctdb_daemon_send_control(struct ctdb_context *ctdb, uint32_t destnode,
 			     void *private_data);
 
 int32_t ctdb_control_db_attach(struct ctdb_context *ctdb, TDB_DATA indata, 
-			       TDB_DATA *outdata, uint64_t tdb_flags, bool persistent);
+			       TDB_DATA *outdata, uint64_t tdb_flags,
+			       bool persistent, uint32_t client_id,
+			       struct ctdb_req_control *c,
+			       bool *async_reply);
 
 int ctdb_daemon_set_call(struct ctdb_context *ctdb, uint32_t db_id,
 			 ctdb_fn_t fn, int id);
@@ -1362,4 +1369,6 @@ int32_t ctdb_control_get_stat_history(struct ctdb_context *ctdb,
 
 int ctdb_deferred_drop_all_ips(struct ctdb_context *ctdb);
 
+int ctdb_process_deferred_attach(struct ctdb_context *ctdb);
+
 #endif
diff --git a/packaging/RPM/ctdb.spec.in b/packaging/RPM/ctdb.spec.in
index e7a5d71..41a0712 100644
--- a/packaging/RPM/ctdb.spec.in
+++ b/packaging/RPM/ctdb.spec.in
@@ -3,7 +3,7 @@ Name: ctdb
 Summary: Clustered TDB
 Vendor: Samba Team
 Packager: Samba Team <samba at samba.org>
-Version: 1.3.0
+Version: 1.3.1
 Release: 1GITHASH
 Epoch: 0
 License: GNU GPL version 3
@@ -143,6 +143,9 @@ development libraries for ctdb
 %{_libdir}/libctdb.a
 
 %changelog
+* Mon Feb 21 2011 : Version 1.3.1
+ - Trigger recovery instead of ctdb_fatal() when we get an invalid dmaster. CQ 1022004
+ - Ping winbindd on all ip reallocations, not just natgw related ones. CQ 1021636
 * Fri Feb 18 2011 : Version 1.3.0
  - Initial branch for 1.3
 * Tue Feb 8 2011 : Version 1.2.20
diff --git a/server/ctdb_call.c b/server/ctdb_call.c
index d6c0866..be6e8f9 100644
--- a/server/ctdb_call.c
+++ b/server/ctdb_call.c
@@ -369,7 +369,9 @@ void ctdb_request_dmaster(struct ctdb_context *ctdb, struct ctdb_req_header *hdr
 			 (unsigned long long)c->rsn, (unsigned long long)header.rsn, c->hdr.reqid,
 			 (key.dsize >= 4)?(*(uint32_t *)key.dptr):0));
 		if (header.rsn != 0 || header.dmaster != ctdb->pnn) {
-			ctdb_fatal(ctdb, "ctdb_req_dmaster from non-master");
+			DEBUG(DEBUG_ERR,("ctdb_req_dmaster from non-master. Force a recovery.\n"));
+
+			ctdb->recovery_mode = CTDB_RECOVERY_ACTIVE;
 			return;
 		}
 	}
diff --git a/server/ctdb_control.c b/server/ctdb_control.c
index 90900c9..69724e3 100644
--- a/server/ctdb_control.c
+++ b/server/ctdb_control.c
@@ -221,10 +221,10 @@ static int32_t ctdb_control_dispatch(struct ctdb_context *ctdb,
 	}
 
 	case CTDB_CONTROL_DB_ATTACH:
-		return ctdb_control_db_attach(ctdb, indata, outdata, srvid, false);
+	  return ctdb_control_db_attach(ctdb, indata, outdata, srvid, false, client_id, c, async_reply);
 
 	case CTDB_CONTROL_DB_ATTACH_PERSISTENT:
-		return ctdb_control_db_attach(ctdb, indata, outdata, srvid, true);
+	  return ctdb_control_db_attach(ctdb, indata, outdata, srvid, true, client_id, c, async_reply);
 
 	case CTDB_CONTROL_SET_CALL: {
 		struct ctdb_control_set_call *sc = 
diff --git a/server/ctdb_ltdb_server.c b/server/ctdb_ltdb_server.c
index ba2a9cb..3e90b2d 100644
--- a/server/ctdb_ltdb_server.c
+++ b/server/ctdb_ltdb_server.c
@@ -745,33 +745,117 @@ again:
 }
 
 
+struct ctdb_deferred_attach_context {
+	struct ctdb_deferred_attach_context *next, *prev;
+	struct ctdb_context *ctdb;
+	struct ctdb_req_control *c;
+};
+
+
+static int ctdb_deferred_attach_destructor(struct ctdb_deferred_attach_context *da_ctx)
+{
+	DLIST_REMOVE(da_ctx->ctdb->deferred_attach, da_ctx);
+
+	return 0;
+}
+
+static void ctdb_deferred_attach_timeout(struct event_context *ev, struct timed_event *te, struct timeval t, void *private_data)
+{
+	struct ctdb_deferred_attach_context *da_ctx = talloc_get_type(private_data, struct ctdb_deferred_attach_context);
+	struct ctdb_context *ctdb = da_ctx->ctdb;
+
+	ctdb_request_control_reply(ctdb, da_ctx->c, NULL, -1, NULL);
+	talloc_free(da_ctx);
+}
+
+static void ctdb_deferred_attach_callback(struct event_context *ev, struct timed_event *te, struct timeval t, void *private_data)
+{
+	struct ctdb_deferred_attach_context *da_ctx = talloc_get_type(private_data, struct ctdb_deferred_attach_context);
+	struct ctdb_context *ctdb = da_ctx->ctdb;
+
+	/* This talloc-steals the packet ->c */
+	ctdb_input_pkt(ctdb, (struct ctdb_req_header *)da_ctx->c);
+	talloc_free(da_ctx);
+}
+
+int ctdb_process_deferred_attach(struct ctdb_context *ctdb)
+{
+	struct ctdb_deferred_attach_context *da_ctx;
+
+	/* call it from the main event loop as soon as the current event 
+	   finishes.
+	 */
+	while ((da_ctx = ctdb->deferred_attach) != NULL) {
+		DLIST_REMOVE(ctdb->deferred_attach, da_ctx);
+		event_add_timed(ctdb->ev, ctdb, timeval_current_ofs(1,0), ctdb_deferred_attach_callback, da_ctx);
+	}
+
+	return 0;
+}
+
 /*
   a client has asked to attach a new database
  */
 int32_t ctdb_control_db_attach(struct ctdb_context *ctdb, TDB_DATA indata,
 			       TDB_DATA *outdata, uint64_t tdb_flags, 
-			       bool persistent)
+			       bool persistent, uint32_t client_id,
+			       struct ctdb_req_control *c,
+			       bool *async_reply)
 {
 	const char *db_name = (const char *)indata.dptr;
 	struct ctdb_db_context *db;
 	struct ctdb_node *node = ctdb->nodes[ctdb->pnn];
 
+	/* dont allow any local clients to attach while we are in recovery mode
+	 * except for the recovery daemon.
+	 * allow all attach from the network since these are always from remote
+	 * recovery daemons.
+	 */
+	if (client_id != 0) {
+		struct ctdb_client *client = ctdb_reqid_find(ctdb, client_id, struct ctdb_client);
+
+		if (client == NULL) {
+			DEBUG(DEBUG_ERR,("DB Attach to database %s refused. Can not match clientid:%d to a client structure.\n", db_name, client_id));
+			return -1;
+		}
+
+		/* If the node is inactive it is not part of the cluster
+		   and we should not allow clients to attach to any
+		   databases
+		*/
+		if (node->flags & NODE_FLAGS_INACTIVE) {
+			DEBUG(DEBUG_ERR,("DB Attach to database %s refused since node is inactive (disconnected or banned)\n", db_name));
+			return -1;
+		}
+
+		if (ctdb->recovery_mode == CTDB_RECOVERY_ACTIVE
+		 && client->pid != ctdb->recoverd_pid) {
+			struct ctdb_deferred_attach_context *da_ctx = talloc(client, struct ctdb_deferred_attach_context);
+
+			if (da_ctx == NULL) {
+				DEBUG(DEBUG_ERR,("DB Attach to database %s deferral for client with pid:%d failed due to OOM.\n", db_name, client->pid));
+				return -1;
+			}
+
+			da_ctx->ctdb = ctdb;
+			da_ctx->c = talloc_steal(da_ctx, c);
+			talloc_set_destructor(da_ctx, ctdb_deferred_attach_destructor);
+			DLIST_ADD(ctdb->deferred_attach, da_ctx);
+
+			event_add_timed(ctdb->ev, da_ctx, timeval_current_ofs(ctdb->tunable.deferred_attach_timeout, 0), ctdb_deferred_attach_timeout, da_ctx);
+
+			DEBUG(DEBUG_ERR,("DB Attach to database %s deferred for client with pid:%d since node is in recovery mode.\n", db_name, client->pid));
+			*async_reply = true;
+			return 0;
+		}
+	}
+
 	/* the client can optionally pass additional tdb flags, but we
 	   only allow a subset of those on the database in ctdb. Note
 	   that tdb_flags is passed in via the (otherwise unused)
 	   srvid to the attach control */
 	tdb_flags &= (TDB_NOSYNC|TDB_INCOMPATIBLE_HASH);
 
-	/* If the node is inactive it is not part of the cluster
-	   and we should not allow clients to attach to any
-	   databases
-	*/
-	if (node->flags & NODE_FLAGS_INACTIVE) {
-		DEBUG(DEBUG_ERR,("DB Attach to database %s refused since node is inactive (disconnected or banned)\n", db_name));
-		return -1;
-	}
-
-
 	/* see if we already have this name */
 	db = ctdb_db_handle(ctdb, db_name);
 	if (db) {
diff --git a/server/ctdb_recover.c b/server/ctdb_recover.c
index 4db4d97..8f79f84 100644
--- a/server/ctdb_recover.c
+++ b/server/ctdb_recover.c
@@ -630,6 +630,11 @@ static void set_recmode_handler(struct event_context *ev, struct fd_event *fde,
 
 	state->ctdb->recovery_mode = state->recmode;
 
+	/* release any deferred attach calls from clients */
+	if (state->recmode == CTDB_RECOVERY_NORMAL) {
+		ctdb_process_deferred_attach(state->ctdb);
+	}
+
 	ctdb_request_control_reply(state->ctdb, state->c, NULL, 0, NULL);
 	talloc_free(state);
 	return;
@@ -716,6 +721,11 @@ int32_t ctdb_control_set_recmode(struct ctdb_context *ctdb,
 	state->fd[0] = -1;
 	state->fd[1] = -1;
 
+	/* release any deferred attach calls from clients */
+	if (recmode == CTDB_RECOVERY_NORMAL) {
+		ctdb_process_deferred_attach(ctdb);
+	}
+
 	if (ctdb->tunable.verify_recovery_lock == 0) {
 		/* dont need to verify the reclock file */
 		ctdb->recovery_mode = recmode;
diff --git a/server/ctdb_tunables.c b/server/ctdb_tunables.c
index 4cd1b45..0f8d7c8 100644
--- a/server/ctdb_tunables.c
+++ b/server/ctdb_tunables.c
@@ -65,7 +65,8 @@ static const struct {
 	{ "MaxQueueDropMsg",  1000000, offsetof(struct ctdb_tunable, max_queue_depth_drop_msg) },
 	{ "UseStatusEvents",     0,  offsetof(struct ctdb_tunable, use_status_events_for_monitoring) },
 	{ "AllowUnhealthyDBRead", 0,  offsetof(struct ctdb_tunable, allow_unhealthy_db_read) },
-	{ "StatHistoryInterval",  1,  offsetof(struct ctdb_tunable, stat_history_interval) }
+	{ "StatHistoryInterval",  1,  offsetof(struct ctdb_tunable, stat_history_interval) },
+	{ "DeferredAttachTO",  120,  offsetof(struct ctdb_tunable, deferred_attach_timeout) }
 };
 
 /*


-- 
CTDB repository


More information about the samba-cvs mailing list