[SCM] CTDB repository - branch master updated -
ctdb-1.0.84-17-gfc8a364
Ronnie Sahlberg
sahlberg at samba.org
Fri Jun 19 05:12:03 GMT 2009
The branch, master has been updated
via fc8a364eb095ec11ca01246a583bf1dc53510141 (commit)
via 268c3e4b269a92741a02280c84384178e73de10e (commit)
via d177b08f1dc79534491f27726b05405d47e12e20 (commit)
via 6d1e4321b63973c2e53c63d386e8cc0bd9605cae (commit)
from facddcacb4a961cddb117818fa38a3e97770b2fa (commit)
http://gitweb.samba.org/?p=sahlberg/ctdb.git;a=shortlog;h=master
- Log -----------------------------------------------------------------
commit fc8a364eb095ec11ca01246a583bf1dc53510141
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date: Fri Jun 19 14:58:06 2009 +1000
dont leak file descriptors when set recmdoe timesout
commit 268c3e4b269a92741a02280c84384178e73de10e
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date: Fri Jun 19 14:54:22 2009 +1000
dont leak file descriptors
commit d177b08f1dc79534491f27726b05405d47e12e20
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date: Fri Jun 19 14:44:26 2009 +1000
in the recovery daemon, check that the recovery master can access the recovery lock file and verify it is not stale from a child process.
This allows us to timeout the operation if the underlying filesystem has become temporarily unresponsive without causing a new recovery.
commit 6d1e4321b63973c2e53c63d386e8cc0bd9605cae
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date: Fri Jun 19 13:09:11 2009 +1000
reduce the timeout we wait for the reclock child process to finish to 5 seconds
before we log an error and abort
-----------------------------------------------------------------------
Summary of changes:
server/ctdb_recover.c | 14 +++-
server/ctdb_recoverd.c | 185 ++++++++++++++++++++++++++++++++++++++++++++---
2 files changed, 185 insertions(+), 14 deletions(-)
Changeset truncated at 500 lines:
diff --git a/server/ctdb_recover.c b/server/ctdb_recover.c
index 7953c6b..526a310 100644
--- a/server/ctdb_recover.c
+++ b/server/ctdb_recover.c
@@ -531,7 +531,13 @@ static int set_recmode_destructor(struct ctdb_set_recmode_state *state)
double l = timeval_elapsed(&state->start_time);
ctdb_reclock_latency(state->ctdb, "daemon reclock", &state->ctdb->statistics.reclock.ctdbd, l);
-
+
+ if (state->fd[0] != -1) {
+ state->fd[0] = -1;
+ }
+ if (state->fd[1] != -1) {
+ state->fd[1] = -1;
+ }
kill(state->child, SIGKILL);
return 0;
}
@@ -645,6 +651,8 @@ int32_t ctdb_control_set_recmode(struct ctdb_context *ctdb,
CTDB_NO_MEMORY(ctdb, state);
state->start_time = timeval_current();
+ state->fd[0] = -1;
+ state->fd[1] = -1;
if (ctdb->tunable.verify_recovery_lock == 0) {
/* dont need to verify the reclock file */
@@ -693,16 +701,18 @@ int32_t ctdb_control_set_recmode(struct ctdb_context *ctdb,
_exit(0);
}
close(state->fd[1]);
+ state->fd[1] = -1;
talloc_set_destructor(state, set_recmode_destructor);
- state->te = event_add_timed(ctdb->ev, state, timeval_current_ofs(15, 0),
+ state->te = event_add_timed(ctdb->ev, state, timeval_current_ofs(5, 0),
ctdb_set_recmode_timeout, state);
state->fde = event_add_fd(ctdb->ev, state, state->fd[0],
EVENT_FD_READ|EVENT_FD_AUTOCLOSE,
set_recmode_handler,
(void *)state);
+
if (state->fde == NULL) {
talloc_free(state);
return -1;
diff --git a/server/ctdb_recoverd.c b/server/ctdb_recoverd.c
index 6b2fb5e..07f3f0d 100644
--- a/server/ctdb_recoverd.c
+++ b/server/ctdb_recoverd.c
@@ -2331,6 +2331,176 @@ static int get_remote_nodemaps(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx,
return 0;
}
+enum reclock_child_status { RECLOCK_CHECKING, RECLOCK_OK, RECLOCK_FAILED, RECLOCK_TIMEOUT};
+struct ctdb_check_reclock_state {
+ struct ctdb_context *ctdb;
+ struct timeval start_time;
+ int fd[2];
+ pid_t child;
+ struct timed_event *te;
+ struct fd_event *fde;
+ enum reclock_child_status status;
+};
+
+/* when we free the reclock state we must kill any child process.
+*/
+static int check_reclock_destructor(struct ctdb_check_reclock_state *state)
+{
+ struct ctdb_context *ctdb = state->ctdb;
+
+ ctdb_ctrl_report_recd_lock_latency(ctdb, CONTROL_TIMEOUT(), timeval_elapsed(&state->start_time));
+
+ if (state->fd[0] != -1) {
+ close(state->fd[0]);
+ state->fd[0] = -1;
+ }
+ if (state->fd[1] != -1) {
+ close(state->fd[1]);
+ state->fd[1] = -1;
+ }
+ kill(state->child, SIGKILL);
+ return 0;
+}
+
+/*
+ called if our check_reclock child times out. this would happen if
+ i/o to the reclock file blocks.
+ */
+static void ctdb_check_reclock_timeout(struct event_context *ev, struct timed_event *te,
+ struct timeval t, void *private_data)
+{
+ struct ctdb_check_reclock_state *state = talloc_get_type(private_data,
+ struct ctdb_check_reclock_state);
+
+ DEBUG(DEBUG_ERR,(__location__ " check_reclock child process hung/timedout CFS slow to grant locks?\n"));
+ state->status = RECLOCK_TIMEOUT;
+}
+
+/* this is called when the child process has completed checking the reclock
+ file and has written data back to us through the pipe.
+*/
+static void reclock_child_handler(struct event_context *ev, struct fd_event *fde,
+ uint16_t flags, void *private_data)
+{
+ struct ctdb_check_reclock_state *state= talloc_get_type(private_data,
+ struct ctdb_check_reclock_state);
+ char c = 0;
+ int ret;
+
+ /* we got a response from our child process so we can abort the
+ timeout.
+ */
+ talloc_free(state->te);
+ state->te = NULL;
+
+ ret = read(state->fd[0], &c, 1);
+ if (ret != 1 || c != RECLOCK_OK) {
+ DEBUG(DEBUG_ERR,(__location__ " reclock child process returned error %d\n", c));
+ state->status = RECLOCK_FAILED;
+
+ return;
+ }
+
+ state->status = RECLOCK_OK;
+ return;
+}
+
+static int check_recovery_lock(struct ctdb_context *ctdb)
+{
+ int ret;
+ struct ctdb_check_reclock_state *state;
+ pid_t parent = getpid();
+
+ if (ctdb->recovery_lock_fd == -1) {
+ DEBUG(DEBUG_CRIT,("recovery master doesn't have the recovery lock\n"));
+ return -1;
+ }
+
+ state = talloc(ctdb, struct ctdb_check_reclock_state);
+ CTDB_NO_MEMORY(ctdb, state);
+
+ state->ctdb = ctdb;
+ state->start_time = timeval_current();
+ state->status = RECLOCK_CHECKING;
+ state->fd[0] = -1;
+ state->fd[1] = -1;
+
+ ret = pipe(state->fd);
+ if (ret != 0) {
+ talloc_free(state);
+ DEBUG(DEBUG_CRIT,(__location__ " Failed to open pipe for check_reclock child\n"));
+ return -1;
+ }
+
+ state->child = fork();
+ if (state->child == (pid_t)-1) {
+ DEBUG(DEBUG_CRIT,(__location__ " fork() failed in check_reclock child\n"));
+ close(state->fd[0]);
+ state->fd[0] = -1;
+ close(state->fd[1]);
+ state->fd[1] = -1;
+ talloc_free(state);
+ return -1;
+ }
+
+ if (state->child == 0) {
+ char cc = RECLOCK_OK;
+ close(state->fd[0]);
+ state->fd[0] = -1;
+
+ if (pread(ctdb->recovery_lock_fd, &cc, 1, 0) == -1) {
+ DEBUG(DEBUG_CRIT,("failed read from recovery_lock_fd - %s\n", strerror(errno)));
+ cc = RECLOCK_FAILED;
+ }
+
+ write(state->fd[1], &cc, 1);
+ /* make sure we die when our parent dies */
+ while (kill(parent, 0) == 0 || errno != ESRCH) {
+ sleep(5);
+ write(state->fd[1], &cc, 1);
+ }
+ _exit(0);
+ }
+ close(state->fd[1]);
+ state->fd[1] = -1;
+
+ talloc_set_destructor(state, check_reclock_destructor);
+
+ state->te = event_add_timed(ctdb->ev, state, timeval_current_ofs(15, 0),
+ ctdb_check_reclock_timeout, state);
+ if (state->te == NULL) {
+ DEBUG(DEBUG_CRIT,(__location__ " Failed to create a timed event for reclock child\n"));
+ talloc_free(state);
+ return -1;
+ }
+
+ state->fde = event_add_fd(ctdb->ev, state, state->fd[0],
+ EVENT_FD_READ|EVENT_FD_AUTOCLOSE,
+ reclock_child_handler,
+ (void *)state);
+
+ if (state->fde == NULL) {
+ DEBUG(DEBUG_CRIT,(__location__ " Failed to create an fd event for reclock child\n"));
+ talloc_free(state);
+ return -1;
+ }
+
+ while (state->status == RECLOCK_CHECKING) {
+ event_loop_once(ctdb->ev);
+ }
+
+ if (state->status == RECLOCK_FAILED) {
+ DEBUG(DEBUG_ERR,(__location__ " reclock child failed when checking file\n"));
+ close(ctdb->recovery_lock_fd);
+ ctdb->recovery_lock_fd = -1;
+ talloc_free(state);
+ return -1;
+ }
+
+ talloc_free(state);
+ return 0;
+}
+
/*
the main monitoring loop
*/
@@ -2346,7 +2516,6 @@ static void monitor_cluster(struct ctdb_context *ctdb)
int32_t debug_level;
int i, j, ret;
struct ctdb_recoverd *rec;
- char c;
DEBUG(DEBUG_NOTICE,("monitor_cluster starting\n"));
@@ -2663,21 +2832,13 @@ again:
/* we should have the reclock - check its not stale */
- if (ctdb->recovery_lock_fd == -1) {
- DEBUG(DEBUG_CRIT,("recovery master doesn't have the recovery lock\n"));
- do_recovery(rec, mem_ctx, pnn, nodemap, vnnmap, ctdb->pnn);
- goto again;
- }
-
- if (pread(ctdb->recovery_lock_fd, &c, 1, 0) == -1) {
- DEBUG(DEBUG_CRIT,("failed read from recovery_lock_fd - %s\n", strerror(errno)));
- close(ctdb->recovery_lock_fd);
- ctdb->recovery_lock_fd = -1;
+ ret = check_recovery_lock(ctdb);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,("Failed check_recovery_lock. Force a recovery\n"));
do_recovery(rec, mem_ctx, pnn, nodemap, vnnmap, ctdb->pnn);
goto again;
}
-
/* get the nodemap for all active remote nodes
*/
remote_nodemaps = talloc_array(mem_ctx, struct ctdb_node_map *, nodemap->num);
--
CTDB repository
More information about the samba-cvs
mailing list