[SCM] CTDB repository - branch master updated - ctdb-1.0.114-315-gc386f2c
Ronnie Sahlberg
sahlberg at samba.org
Mon Sep 13 23:49:17 MDT 2010
The branch, master has been updated
via c386f2c62f06f1c60047b7d4b1ec7a9eec11873c (commit)
via 80b8889267339b870868841ff077e850bc5b52e2 (commit)
via 93df096773c89f21f77b3bcf9aa90bf28881b852 (commit)
via 942f44123350d4d0c4ad7f3fcd5ff2d0d175739b (commit)
from 1261f3d9702800a4e59550c881350daf479f00ef (commit)
http://gitweb.samba.org/?p=sahlberg/ctdb.git;a=shortlog;h=master
- Log -----------------------------------------------------------------
commit c386f2c62f06f1c60047b7d4b1ec7a9eec11873c
Author: Stefan Metzmacher <metze at samba.org>
Date: Tue Aug 31 09:28:34 2010 +0200
server/banning: also release all ips if we're banning ourself
metze
commit 80b8889267339b870868841ff077e850bc5b52e2
Author: Stefan Metzmacher <metze at samba.org>
Date: Mon Aug 30 18:25:28 2010 +0200
server/recoverd: if we can't get the recovery lock, ban ourself
metze
commit 93df096773c89f21f77b3bcf9aa90bf28881b852
Author: Stefan Metzmacher <metze at samba.org>
Date: Tue Aug 31 08:42:32 2010 +0200
server/recoverd: do takeover_run after verifying the reclock file
metze
commit 942f44123350d4d0c4ad7f3fcd5ff2d0d175739b
Author: Stefan Metzmacher <metze at samba.org>
Date: Tue Aug 24 09:22:49 2010 +0200
server/monitor: ask for a takeoverrun after propagating our new flags
metze
-----------------------------------------------------------------------
Summary of changes:
include/ctdb_private.h | 1 +
server/ctdb_banning.c | 30 +++++++++++++++++++++++++++++-
server/ctdb_monitor.c | 48 +++++++++++++++---------------------------------
server/ctdb_recoverd.c | 15 +++++++++------
4 files changed, 54 insertions(+), 40 deletions(-)
Changeset truncated at 500 lines:
diff --git a/include/ctdb_private.h b/include/ctdb_private.h
index b707afd..89b8f08 100644
--- a/include/ctdb_private.h
+++ b/include/ctdb_private.h
@@ -1291,6 +1291,7 @@ int ctdb_vacuum_init(struct ctdb_db_context *ctdb_db);
int32_t ctdb_control_enable_script(struct ctdb_context *ctdb, TDB_DATA indata);
int32_t ctdb_control_disable_script(struct ctdb_context *ctdb, TDB_DATA indata);
+int32_t ctdb_local_node_got_banned(struct ctdb_context *ctdb);
int32_t ctdb_control_set_ban_state(struct ctdb_context *ctdb, TDB_DATA indata);
int32_t ctdb_control_get_ban_state(struct ctdb_context *ctdb, TDB_DATA *outdata);
int32_t ctdb_control_set_db_priority(struct ctdb_context *ctdb, TDB_DATA indata);
diff --git a/server/ctdb_banning.c b/server/ctdb_banning.c
index 3d5f216..5684907 100644
--- a/server/ctdb_banning.c
+++ b/server/ctdb_banning.c
@@ -42,6 +42,31 @@ ctdb_ban_node_event(struct event_context *ev, struct timed_event *te,
}
}
+int32_t ctdb_local_node_got_banned(struct ctdb_context *ctdb)
+{
+ uint32_t i;
+
+ /* make sure we are frozen */
+ DEBUG(DEBUG_NOTICE,("This node has been banned - forcing freeze and recovery\n"));
+
+ /* Reset the generation id to 1 to make us ignore any
+ REQ/REPLY CALL/DMASTER someone sends to us.
+ We are now banned so we shouldnt service database calls
+ anymore.
+ */
+ ctdb->vnn_map->generation = INVALID_GENERATION;
+
+ for (i=1; i<=NUM_DB_PRIORITIES; i++) {
+ if (ctdb_start_freeze(ctdb, i) != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " Failed to freeze db priority %u\n", i));
+ }
+ }
+ ctdb_release_all_ips(ctdb);
+ ctdb->recovery_mode = CTDB_RECOVERY_ACTIVE;
+
+ return 0;
+}
+
int32_t ctdb_control_set_ban_state(struct ctdb_context *ctdb, TDB_DATA indata)
{
struct ctdb_ban_time *bantime = (struct ctdb_ban_time *)indata.dptr;
@@ -96,7 +121,10 @@ int32_t ctdb_control_set_ban_state(struct ctdb_context *ctdb, TDB_DATA indata)
ctdb->nodes[bantime->pnn]->flags |= NODE_FLAGS_BANNED;
event_add_timed(ctdb->ev, ctdb->banning_ctx, timeval_current_ofs(bantime->time,0), ctdb_ban_node_event, ctdb);
-
+ if (bantime->pnn == ctdb->pnn) {
+ return ctdb_local_node_got_banned(ctdb);
+ }
+
return 0;
}
diff --git a/server/ctdb_monitor.c b/server/ctdb_monitor.c
index 7f5da5c..dff6f42 100644
--- a/server/ctdb_monitor.c
+++ b/server/ctdb_monitor.c
@@ -114,6 +114,7 @@ static void ctdb_health_callback(struct ctdb_context *ctdb, int status, void *p)
int ret;
TDB_DATA rddata;
struct takeover_run_reply rd;
+ const char *state_str = NULL;
c.pnn = ctdb->pnn;
c.old_flags = node->flags;
@@ -141,28 +142,12 @@ static void ctdb_health_callback(struct ctdb_context *ctdb, int status, void *p)
ctdb->monitor->next_interval = 5;
ctdb_run_notification_script(ctdb, "unhealthy");
-
- /* ask the recmaster to reallocate all addresses */
- DEBUG(DEBUG_ERR,("Node became UNHEALTHY. Ask recovery master %u to perform ip reallocation\n", ctdb->recovery_master));
- ret = ctdb_daemon_send_message(ctdb, ctdb->recovery_master, CTDB_SRVID_TAKEOVER_RUN, rddata);
- if (ret != 0) {
- DEBUG(DEBUG_ERR,(__location__ " Failed to send ip takeover run request message to %u\n", ctdb->recovery_master));
- }
-
} else if (status == 0 && (node->flags & NODE_FLAGS_UNHEALTHY)) {
DEBUG(DEBUG_NOTICE,("monitor event OK - node re-enabled\n"));
node->flags &= ~NODE_FLAGS_UNHEALTHY;
ctdb->monitor->next_interval = 5;
ctdb_run_notification_script(ctdb, "healthy");
-
- /* ask the recmaster to reallocate all addresses */
- DEBUG(DEBUG_ERR,("Node became HEALTHY. Ask recovery master %u to perform ip reallocation\n", ctdb->recovery_master));
- ret = ctdb_daemon_send_message(ctdb, ctdb->recovery_master, CTDB_SRVID_TAKEOVER_RUN, rddata);
- if (ret != 0) {
- DEBUG(DEBUG_ERR,(__location__ " Failed to send ip takeover run request message to %u\n", ctdb->recovery_master));
- }
-
}
after_change_status:
@@ -190,6 +175,19 @@ after_change_status:
ctdb_daemon_send_message(ctdb, ctdb->pnn,
CTDB_SRVID_PUSH_NODE_FLAGS, data);
+ if (c.new_flags & NODE_FLAGS_UNHEALTHY) {
+ state_str = "UNHEALTHY";
+ } else {
+ state_str = "HEALTHY";
+ }
+
+ /* ask the recmaster to reallocate all addresses */
+ DEBUG(DEBUG_ERR,("Node became %s. Ask recovery master %u to perform ip reallocation\n",
+ state_str, ctdb->recovery_master));
+ ret = ctdb_daemon_send_message(ctdb, ctdb->recovery_master, CTDB_SRVID_TAKEOVER_RUN, rddata);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " Failed to send ip takeover run request message to %u\n", ctdb->recovery_master));
+ }
}
@@ -433,7 +431,6 @@ int32_t ctdb_control_modflags(struct ctdb_context *ctdb, TDB_DATA indata)
struct ctdb_node_flag_change *c = (struct ctdb_node_flag_change *)indata.dptr;
struct ctdb_node *node;
uint32_t old_flags;
- int i;
if (c->pnn >= ctdb->num_nodes) {
DEBUG(DEBUG_ERR,(__location__ " Node %d is invalid, num_nodes :%d\n", c->pnn, ctdb->num_nodes));
@@ -483,22 +480,7 @@ int32_t ctdb_control_modflags(struct ctdb_context *ctdb, TDB_DATA indata)
/* if we have become banned, we should go into recovery mode */
if ((node->flags & NODE_FLAGS_BANNED) && !(c->old_flags & NODE_FLAGS_BANNED) && (node->pnn == ctdb->pnn)) {
- /* make sure we are frozen */
- DEBUG(DEBUG_NOTICE,("This node has been banned - forcing freeze and recovery\n"));
- /* Reset the generation id to 1 to make us ignore any
- REQ/REPLY CALL/DMASTER someone sends to us.
- We are now banned so we shouldnt service database calls
- anymore.
- */
- ctdb->vnn_map->generation = INVALID_GENERATION;
-
- for (i=1; i<=NUM_DB_PRIORITIES; i++) {
- if (ctdb_start_freeze(ctdb, i) != 0) {
- DEBUG(DEBUG_ERR,(__location__ " Failed to freeze db priority %u\n", i));
- }
- }
- ctdb_release_all_ips(ctdb);
- ctdb->recovery_mode = CTDB_RECOVERY_ACTIVE;
+ return ctdb_local_node_got_banned(ctdb);
}
return 0;
diff --git a/server/ctdb_recoverd.c b/server/ctdb_recoverd.c
index 437e4cb..30c34b3 100644
--- a/server/ctdb_recoverd.c
+++ b/server/ctdb_recoverd.c
@@ -1382,8 +1382,10 @@ static int do_recovery(struct ctdb_recoverd *rec,
DEBUG(DEBUG_ERR,("Taking out recovery lock from recovery daemon\n"));
start_time = timeval_current();
if (!ctdb_recovery_lock(ctdb, true)) {
- ctdb_set_culprit(rec, pnn);
- DEBUG(DEBUG_ERR,("Unable to get recovery lock - aborting recovery\n"));
+ DEBUG(DEBUG_ERR,("Unable to get recovery lock - aborting recovery "
+ "and ban ourself for %u seconds\n",
+ ctdb->tunable.recovery_ban_period));
+ ctdb_ban_node(rec, pnn, ctdb->tunable.recovery_ban_period);
return -1;
}
ctdb_ctrl_report_recd_lock_latency(ctdb, CONTROL_TIMEOUT(), timeval_elapsed(&start_time));
@@ -3009,10 +3011,6 @@ static void main_loop(struct ctdb_context *ctdb, struct ctdb_recoverd *rec,
rec->reallocate_callers = NULL;
}
}
- /* if there are takeovers requested, perform it and notify the waiters */
- if (rec->reallocate_callers) {
- process_ipreallocate_requests(ctdb, rec);
- }
if (rec->recmaster == (uint32_t)-1) {
DEBUG(DEBUG_NOTICE,(__location__ " Initial recovery master set - forcing election\n"));
@@ -3199,6 +3197,11 @@ static void main_loop(struct ctdb_context *ctdb, struct ctdb_recoverd *rec,
}
}
+ /* if there are takeovers requested, perform it and notify the waiters */
+ if (rec->reallocate_callers) {
+ process_ipreallocate_requests(ctdb, rec);
+ }
+
/* get the nodemap for all active remote nodes
*/
remote_nodemaps = talloc_array(mem_ctx, struct ctdb_node_map *, nodemap->num);
--
CTDB repository
More information about the samba-cvs
mailing list