[SCM] CTDB repository - branch master updated - ctdb-1.0.114-315-gc386f2c

Ronnie Sahlberg sahlberg at samba.org
Mon Sep 13 23:49:17 MDT 2010


The branch, master has been updated
       via  c386f2c62f06f1c60047b7d4b1ec7a9eec11873c (commit)
       via  80b8889267339b870868841ff077e850bc5b52e2 (commit)
       via  93df096773c89f21f77b3bcf9aa90bf28881b852 (commit)
       via  942f44123350d4d0c4ad7f3fcd5ff2d0d175739b (commit)
      from  1261f3d9702800a4e59550c881350daf479f00ef (commit)

http://gitweb.samba.org/?p=sahlberg/ctdb.git;a=shortlog;h=master


- Log -----------------------------------------------------------------
commit c386f2c62f06f1c60047b7d4b1ec7a9eec11873c
Author: Stefan Metzmacher <metze at samba.org>
Date:   Tue Aug 31 09:28:34 2010 +0200

    server/banning: also release all ips if we're banning ourself
    
    metze

commit 80b8889267339b870868841ff077e850bc5b52e2
Author: Stefan Metzmacher <metze at samba.org>
Date:   Mon Aug 30 18:25:28 2010 +0200

    server/recoverd: if we can't get the recovery lock, ban ourself
    
    metze

commit 93df096773c89f21f77b3bcf9aa90bf28881b852
Author: Stefan Metzmacher <metze at samba.org>
Date:   Tue Aug 31 08:42:32 2010 +0200

    server/recoverd: do takeover_run after verifying the reclock file
    
    metze

commit 942f44123350d4d0c4ad7f3fcd5ff2d0d175739b
Author: Stefan Metzmacher <metze at samba.org>
Date:   Tue Aug 24 09:22:49 2010 +0200

    server/monitor: ask for a takeoverrun after propagating our new flags
    
    metze

-----------------------------------------------------------------------

Summary of changes:
 include/ctdb_private.h |    1 +
 server/ctdb_banning.c  |   30 +++++++++++++++++++++++++++++-
 server/ctdb_monitor.c  |   48 +++++++++++++++---------------------------------
 server/ctdb_recoverd.c |   15 +++++++++------
 4 files changed, 54 insertions(+), 40 deletions(-)


Changeset truncated at 500 lines:

diff --git a/include/ctdb_private.h b/include/ctdb_private.h
index b707afd..89b8f08 100644
--- a/include/ctdb_private.h
+++ b/include/ctdb_private.h
@@ -1291,6 +1291,7 @@ int ctdb_vacuum_init(struct ctdb_db_context *ctdb_db);
 int32_t ctdb_control_enable_script(struct ctdb_context *ctdb, TDB_DATA indata);
 int32_t ctdb_control_disable_script(struct ctdb_context *ctdb, TDB_DATA indata);
 
+int32_t ctdb_local_node_got_banned(struct ctdb_context *ctdb);
 int32_t ctdb_control_set_ban_state(struct ctdb_context *ctdb, TDB_DATA indata);
 int32_t ctdb_control_get_ban_state(struct ctdb_context *ctdb, TDB_DATA *outdata);
 int32_t ctdb_control_set_db_priority(struct ctdb_context *ctdb, TDB_DATA indata);
diff --git a/server/ctdb_banning.c b/server/ctdb_banning.c
index 3d5f216..5684907 100644
--- a/server/ctdb_banning.c
+++ b/server/ctdb_banning.c
@@ -42,6 +42,31 @@ ctdb_ban_node_event(struct event_context *ev, struct timed_event *te,
 	}
 }
 
+int32_t ctdb_local_node_got_banned(struct ctdb_context *ctdb)
+{
+	uint32_t i;
+
+	/* make sure we are frozen */
+	DEBUG(DEBUG_NOTICE,("This node has been banned - forcing freeze and recovery\n"));
+
+	/* Reset the generation id to 1 to make us ignore any
+	   REQ/REPLY CALL/DMASTER someone sends to us.
+	   We are now banned so we shouldnt service database calls
+	   anymore.
+	*/
+	ctdb->vnn_map->generation = INVALID_GENERATION;
+
+	for (i=1; i<=NUM_DB_PRIORITIES; i++) {
+		if (ctdb_start_freeze(ctdb, i) != 0) {
+			DEBUG(DEBUG_ERR,(__location__ " Failed to freeze db priority %u\n", i));
+		}
+	}
+	ctdb_release_all_ips(ctdb);
+	ctdb->recovery_mode = CTDB_RECOVERY_ACTIVE;
+
+	return 0;
+}
+
 int32_t ctdb_control_set_ban_state(struct ctdb_context *ctdb, TDB_DATA indata)
 {
 	struct ctdb_ban_time *bantime = (struct ctdb_ban_time *)indata.dptr;
@@ -96,7 +121,10 @@ int32_t ctdb_control_set_ban_state(struct ctdb_context *ctdb, TDB_DATA indata)
 	ctdb->nodes[bantime->pnn]->flags |= NODE_FLAGS_BANNED;
 
 	event_add_timed(ctdb->ev, ctdb->banning_ctx, timeval_current_ofs(bantime->time,0), ctdb_ban_node_event, ctdb);
-	
+	if (bantime->pnn == ctdb->pnn) {
+		return ctdb_local_node_got_banned(ctdb);
+	}
+
 	return 0;
 }
 
diff --git a/server/ctdb_monitor.c b/server/ctdb_monitor.c
index 7f5da5c..dff6f42 100644
--- a/server/ctdb_monitor.c
+++ b/server/ctdb_monitor.c
@@ -114,6 +114,7 @@ static void ctdb_health_callback(struct ctdb_context *ctdb, int status, void *p)
 	int ret;
 	TDB_DATA rddata;
 	struct takeover_run_reply rd;
+	const char *state_str = NULL;
 
 	c.pnn = ctdb->pnn;
 	c.old_flags = node->flags;
@@ -141,28 +142,12 @@ static void ctdb_health_callback(struct ctdb_context *ctdb, int status, void *p)
 		ctdb->monitor->next_interval = 5;
 
 		ctdb_run_notification_script(ctdb, "unhealthy");
-
-		/* ask the recmaster to reallocate all addresses */
-		DEBUG(DEBUG_ERR,("Node became UNHEALTHY. Ask recovery master %u to perform ip reallocation\n", ctdb->recovery_master));
-		ret = ctdb_daemon_send_message(ctdb, ctdb->recovery_master, CTDB_SRVID_TAKEOVER_RUN, rddata);
-		if (ret != 0) {
-			DEBUG(DEBUG_ERR,(__location__ " Failed to send ip takeover run request message to %u\n", ctdb->recovery_master));
-		}
-
 	} else if (status == 0 && (node->flags & NODE_FLAGS_UNHEALTHY)) {
 		DEBUG(DEBUG_NOTICE,("monitor event OK - node re-enabled\n"));
 		node->flags &= ~NODE_FLAGS_UNHEALTHY;
 		ctdb->monitor->next_interval = 5;
 
 		ctdb_run_notification_script(ctdb, "healthy");
-
-		/* ask the recmaster to reallocate all addresses */
-		DEBUG(DEBUG_ERR,("Node became HEALTHY. Ask recovery master %u to perform ip reallocation\n", ctdb->recovery_master));
-		ret = ctdb_daemon_send_message(ctdb, ctdb->recovery_master, CTDB_SRVID_TAKEOVER_RUN, rddata);
-		if (ret != 0) {
-			DEBUG(DEBUG_ERR,(__location__ " Failed to send ip takeover run request message to %u\n", ctdb->recovery_master));
-		}
-
 	}
 
 after_change_status:
@@ -190,6 +175,19 @@ after_change_status:
 	ctdb_daemon_send_message(ctdb, ctdb->pnn,
 				 CTDB_SRVID_PUSH_NODE_FLAGS, data);
 
+	if (c.new_flags & NODE_FLAGS_UNHEALTHY) {
+		state_str = "UNHEALTHY";
+	} else {
+		state_str = "HEALTHY";
+	}
+
+	/* ask the recmaster to reallocate all addresses */
+	DEBUG(DEBUG_ERR,("Node became %s. Ask recovery master %u to perform ip reallocation\n",
+			 state_str, ctdb->recovery_master));
+	ret = ctdb_daemon_send_message(ctdb, ctdb->recovery_master, CTDB_SRVID_TAKEOVER_RUN, rddata);
+	if (ret != 0) {
+		DEBUG(DEBUG_ERR,(__location__ " Failed to send ip takeover run request message to %u\n", ctdb->recovery_master));
+	}
 }
 
 
@@ -433,7 +431,6 @@ int32_t ctdb_control_modflags(struct ctdb_context *ctdb, TDB_DATA indata)
 	struct ctdb_node_flag_change *c = (struct ctdb_node_flag_change *)indata.dptr;
 	struct ctdb_node *node;
 	uint32_t old_flags;
-	int i;
 
 	if (c->pnn >= ctdb->num_nodes) {
 		DEBUG(DEBUG_ERR,(__location__ " Node %d is invalid, num_nodes :%d\n", c->pnn, ctdb->num_nodes));
@@ -483,22 +480,7 @@ int32_t ctdb_control_modflags(struct ctdb_context *ctdb, TDB_DATA indata)
 
 	/* if we have become banned, we should go into recovery mode */
 	if ((node->flags & NODE_FLAGS_BANNED) && !(c->old_flags & NODE_FLAGS_BANNED) && (node->pnn == ctdb->pnn)) {
-		/* make sure we are frozen */
-		DEBUG(DEBUG_NOTICE,("This node has been banned - forcing freeze and recovery\n"));
-		/* Reset the generation id to 1 to make us ignore any
-		   REQ/REPLY CALL/DMASTER someone sends to us.
-		   We are now banned so we shouldnt service database calls
-		   anymore.
-		*/
-		ctdb->vnn_map->generation = INVALID_GENERATION;
-
-		for (i=1; i<=NUM_DB_PRIORITIES; i++) {
-			if (ctdb_start_freeze(ctdb, i) != 0) {
-				DEBUG(DEBUG_ERR,(__location__ " Failed to freeze db priority %u\n", i));
-			}
-		}
-		ctdb_release_all_ips(ctdb);
-		ctdb->recovery_mode = CTDB_RECOVERY_ACTIVE;
+		return ctdb_local_node_got_banned(ctdb);
 	}
 	
 	return 0;
diff --git a/server/ctdb_recoverd.c b/server/ctdb_recoverd.c
index 437e4cb..30c34b3 100644
--- a/server/ctdb_recoverd.c
+++ b/server/ctdb_recoverd.c
@@ -1382,8 +1382,10 @@ static int do_recovery(struct ctdb_recoverd *rec,
 		DEBUG(DEBUG_ERR,("Taking out recovery lock from recovery daemon\n"));
 		start_time = timeval_current();
 		if (!ctdb_recovery_lock(ctdb, true)) {
-			ctdb_set_culprit(rec, pnn);
-			DEBUG(DEBUG_ERR,("Unable to get recovery lock - aborting recovery\n"));
+			DEBUG(DEBUG_ERR,("Unable to get recovery lock - aborting recovery "
+					 "and ban ourself for %u seconds\n",
+					 ctdb->tunable.recovery_ban_period));
+			ctdb_ban_node(rec, pnn, ctdb->tunable.recovery_ban_period);
 			return -1;
 		}
 		ctdb_ctrl_report_recd_lock_latency(ctdb, CONTROL_TIMEOUT(), timeval_elapsed(&start_time));
@@ -3009,10 +3011,6 @@ static void main_loop(struct ctdb_context *ctdb, struct ctdb_recoverd *rec,
 			rec->reallocate_callers = NULL;
 		}
 	}
-	/* if there are takeovers requested, perform it and notify the waiters */
-	if (rec->reallocate_callers) {
-		process_ipreallocate_requests(ctdb, rec);
-	}
 
 	if (rec->recmaster == (uint32_t)-1) {
 		DEBUG(DEBUG_NOTICE,(__location__ " Initial recovery master set - forcing election\n"));
@@ -3199,6 +3197,11 @@ static void main_loop(struct ctdb_context *ctdb, struct ctdb_recoverd *rec,
 		}
 	}
 
+	/* if there are takeovers requested, perform it and notify the waiters */
+	if (rec->reallocate_callers) {
+		process_ipreallocate_requests(ctdb, rec);
+	}
+
 	/* get the nodemap for all active remote nodes
 	 */
 	remote_nodemaps = talloc_array(mem_ctx, struct ctdb_node_map *, nodemap->num);


-- 
CTDB repository


More information about the samba-cvs mailing list