[SCM] CTDB repository - branch 1.2.40 updated - ctdb-1.2.56-8-g055234c
Amitay Isaacs
amitay at samba.org
Tue Feb 5 20:23:32 MST 2013
The branch, 1.2.40 has been updated
via 055234ceb7d71cc8d2bba4f15749006ce1b67176 (commit)
via 35a003d6c3135e61f86d4d5acd46a46bf1bc1a57 (commit)
via b86270fae7fd9f8a7a718e15d8c7436a918f28c4 (commit)
from 2c203bbad887dff6f38e9e47e640c05fad95a7be (commit)
http://gitweb.samba.org/?p=ctdb.git;a=shortlog;h=1.2.40
- Log -----------------------------------------------------------------
commit 055234ceb7d71cc8d2bba4f15749006ce1b67176
Author: Martin Schwenke <martin at meltin.net>
Date: Tue Feb 5 16:40:39 2013 +1100
initscript: export CTDB_DEBUG_LOCKS
Signed-off-by: Martin Schwenke <martin at meltin.net>
commit 35a003d6c3135e61f86d4d5acd46a46bf1bc1a57
Author: Martin Schwenke <martin at meltin.net>
Date: Tue Feb 5 13:16:46 2013 +1100
initscript: export CTDB_EXTERNAL_TRACE
This means it can be set like any other configuration option in the
configuration file, without needing to export it there.
Cherry-pick-from: a0ef73e197dc9147f7718e0813fe803ff0b3d54d
Signed-off-by: Martin Schwenke <martin at meltin.net>
commit b86270fae7fd9f8a7a718e15d8c7436a918f28c4
Author: Martin Schwenke <martin at meltin.net>
Date: Thu May 17 10:17:51 2012 +1000
ctdbd: Backport use of external script to debug hung eventscript
This is a cherry-pick from 6e68797af67bee36f2bad045f94806e7e98f27e9,
combined with several recent fixes:
8507303b525d20c74e8ec4e7c4f5f275945cd3b6
scripts: debug-hung-script.sh doesn't need functions/loadconfig
501461cc3e132d4adee9e91b5d4513a26bae2846
ctdbd: Remove debug_hung_script_ctx
0581f9a84e58764d194f4e04064c2c5b393c348b
ctdbd: Remove command-line option --debug-hung-script
3400b2ed34b6eb9496eb55f1aab6f89d2952060d
ctdbd: Complain loudly if CTDB_DEBUG_HUNG_SCRIPT script isn't executable
9b0d56b16775aa16f33bdfdf831256e085fa3339
ctdbd: Don't use a fixed length buffer for the hung script command
Signed-off-by: Martin Schwenke <martin at meltin.net>
-----------------------------------------------------------------------
Summary of changes:
Makefile.in | 1 +
config/ctdb.init | 7 ++++++
config/ctdb.sysconfig | 3 ++
config/debug-hung-script.sh | 4 +++
packaging/RPM/ctdb.spec.in | 1 +
server/eventscript.c | 48 ++++++++++++++++++++++++++-----------------
6 files changed, 45 insertions(+), 19 deletions(-)
create mode 100644 config/debug-hung-script.sh
Changeset truncated at 500 lines:
diff --git a/Makefile.in b/Makefile.in
index 136ae30..849abd7 100755
--- a/Makefile.in
+++ b/Makefile.in
@@ -291,6 +291,7 @@ install: all
if [ -f doc/onnode.1 ];then ${INSTALLCMD} -m 644 doc/onnode.1 $(DESTDIR)$(mandir)/man1; fi
if [ -f doc/ltdbtool.1 ]; then ${INSTALLCMD} -m 644 doc/ltdbtool.1 $(DESTDIR)$(mandir)/man1; fi
if [ ! -f $(DESTDIR)$(etcdir)/ctdb/notify.sh ];then ${INSTALLCMD} -m 755 config/notify.sh $(DESTDIR)$(etcdir)/ctdb; fi
+ if [ ! -f $(DESTDIR)$(etcdir)/ctdb/debug-hung-script.sh ];then ${INSTALLCMD} -m 755 config/debug-hung-script.sh $(DESTDIR)$(etcdir)/ctdb; fi
if [ ! -f $(DESTDIR)$(etcdir)/ctdb/ctdb-crash-cleanup.sh ];then ${INSTALLCMD} -m 755 config/ctdb-crash-cleanup.sh $(DESTDIR)$(etcdir)/ctdb; fi
if [ ! -f $(DESTDIR)$(etcdir)/ctdb/gcore_trace.sh ];then ${INSTALLCMD} -m 755 config/gcore_trace.sh $(DESTDIR)$(etcdir)/ctdb; fi
diff --git a/config/ctdb.init b/config/ctdb.init
index 4fe01e3..3c2412d 100755
--- a/config/ctdb.init
+++ b/config/ctdb.init
@@ -111,6 +111,11 @@ build_ctdb_options () {
maybe_set "--max-persistent-check-errors" "$CTDB_MAX_PERSISTENT_CHECK_ERRORS"
}
+export_debug_variables ()
+{
+ export CTDB_DEBUG_HUNG_SCRIPT CTDB_EXTERNAL_TRACE CTDB_DEBUG_LOCKS
+}
+
# Log given message or stdin to either syslog or a CTDB log file
do_log ()
{
@@ -262,6 +267,8 @@ start() {
build_ctdb_options
+ export_debug_variables
+
# make sure we drop any ips that might still be held if previous
# instance of ctdb got killed with -9 or similar
drop_all_public_ips
diff --git a/config/ctdb.sysconfig b/config/ctdb.sysconfig
index 1f2edc4..08a550f 100644
--- a/config/ctdb.sysconfig
+++ b/config/ctdb.sysconfig
@@ -92,6 +92,9 @@ CTDB_RECOVERY_LOCK="/some/place/on/shared/storage"
# a script to run when node health changes
# CTDB_NOTIFY_SCRIPT=/etc/ctdb/notify.sh
+# a script to collect data when an eventscript has hung
+# CTDB_DEBUG_HUNG_SCRIPT=/etc/ctdb/debug-hung-script.sh
+
# the directory to put the local ctdb database files in
# defaults to /var/ctdb
# CTDB_DBDIR=/var/ctdb
diff --git a/config/debug-hung-script.sh b/config/debug-hung-script.sh
new file mode 100644
index 0000000..dcf68ba
--- /dev/null
+++ b/config/debug-hung-script.sh
@@ -0,0 +1,4 @@
+#!/bin/sh
+
+echo "Pstree output for the hung script:"
+pstree -p -a $1
diff --git a/packaging/RPM/ctdb.spec.in b/packaging/RPM/ctdb.spec.in
index 43c2c3b..e66ce37 100644
--- a/packaging/RPM/ctdb.spec.in
+++ b/packaging/RPM/ctdb.spec.in
@@ -88,6 +88,7 @@ rm -rf $RPM_BUILD_ROOT
%config(noreplace) %{_sysconfdir}/sysconfig/ctdb
%config(noreplace) %{_sysconfdir}/ctdb/notify.sh
+%config(noreplace) %{_sysconfdir}/ctdb/debug-hung-script.sh
%config(noreplace) %{_sysconfdir}/ctdb/ctdb-crash-cleanup.sh
%config(noreplace) %{_sysconfdir}/ctdb/gcore_trace.sh
%config(noreplace) %{_sysconfdir}/ctdb/functions
diff --git a/server/eventscript.c b/server/eventscript.c
index 9ef1f3d..0715b46 100644
--- a/server/eventscript.c
+++ b/server/eventscript.c
@@ -504,15 +504,14 @@ static void ctdb_event_script_handler(struct event_context *ev, struct fd_event
}
}
-static void debug_timeout(struct ctdb_event_script_state *state)
+static void ctdb_run_debug_hung_script(struct ctdb_context *ctdb, struct ctdb_event_script_state *state)
{
struct ctdb_script_wire *current = get_current_script(state);
char *cmd;
pid_t pid;
- time_t t;
- char tbuf[100], buf[200];
+ const char * debug_hung_script = ETCDIR "/ctdb/debug-hung-script.sh";
- cmd = child_command_string(state->ctdb, state,
+ cmd = child_command_string(ctdb, state,
state->from_user, current->name,
state->call, state->options);
CTDB_NO_MEMORY_VOID(state->ctdb, cmd);
@@ -521,26 +520,36 @@ static void debug_timeout(struct ctdb_event_script_state *state)
cmd, timeval_elapsed(¤t->start), state->child));
talloc_free(cmd);
- t = time(NULL);
- strftime(tbuf, sizeof(tbuf)-1, "%Y%m%d%H%M%S", localtime(&t));
- sprintf(buf, "{ pstree -p; cat /proc/locks; ls -li /var/ctdb/ /var/ctdb/persistent; }"
- " >/tmp/ctdb.event.%s.%d", tbuf, getpid());
-
- pid = ctdb_fork(state->ctdb);
- if (pid == 0) {
- system(buf);
- /* Now we can kill the child */
+ if (!ctdb_fork_with_logging(ctdb, ctdb, NULL, NULL, &pid)) {
+ DEBUG(DEBUG_ERR,("Failed to fork a child process with logging to track hung event script\n"));
kill(state->child, SIGTERM);
- exit(0);
+ return;
}
if (pid == -1) {
DEBUG(DEBUG_ERR,("Fork for debug script failed : %s\n",
strerror(errno)));
- } else {
- DEBUG(DEBUG_ERR,("Logged timedout eventscript : %s\n", buf));
- /* Don't kill child until timeout done. */
- state->child = 0;
+ kill(state->child, SIGTERM);
+ return;
}
+ if (pid == 0) {
+ char *buf;
+
+ if (getenv("CTDB_DEBUG_HUNG_SCRIPT") != NULL) {
+ debug_hung_script = getenv("CTDB_DEBUG_HUNG_SCRIPT");
+ }
+
+ buf = talloc_asprintf(NULL, "%s %d",
+ debug_hung_script, state->child);
+ system(buf);
+ talloc_free(buf);
+
+ /* Now we can kill the child */
+ kill(state->child, SIGTERM);
+ _exit(0);
+ }
+
+ /* Don't kill child until timeout done. */
+ state->child = 0;
}
/* called when child times out */
@@ -564,10 +573,11 @@ static void ctdb_event_script_timeout(struct event_context *ev, struct timed_eve
case CTDB_EVENT_STATUS:
state->scripts->scripts[state->current].status = 0;
DEBUG(DEBUG_ERR,("Ignoring hung script for %s call %d\n", state->options, state->call));
+ ctdb_run_debug_hung_script(ctdb, state);
break;
default:
state->scripts->scripts[state->current].status = -ETIME;
- debug_timeout(state);
+ ctdb_run_debug_hung_script(ctdb, state);
}
talloc_free(state);
--
CTDB repository
More information about the samba-cvs
mailing list