[SCM] CTDB repository - branch master updated - ctdb-1.0.114-76-g221a9bb

Mon May 3 00:12:31 MDT 2010

The branch, master has been updated
       via  221a9bb41c3a7af0cc65cda78365010893ca1430 (commit)
       via  189f4a5af1053271b0834522e35c336df959aa03 (commit)
       via  e7069082e5f0380dcddee247db8754218ce18cab (commit)
       via  880896a27adfdd5173b2810b6b2f3889802046f0 (commit)
       via  d2e4a9912c4bd13eb4f12681adebe7e59a6d1fb2 (commit)
       via  439f049c7024d69aa4b87dc811e1772981ad29cb (commit)
       via  f8aa83788e3cc10ab7655a90d7b7b17ddbe48685 (commit)
       via  7ee5ecc8d53e78e2dec21197b74a74cc4ae1834c (commit)
       via  fcc63e04beb427c1f48deae6d3d98c78a2a67949 (commit)
      from  480af69b63b9162c85d8e04461ca9e4a083c04a4 (commit)

http://gitweb.samba.org/?p=sahlberg/ctdb.git;a=shortlog;h=master


- Log -----------------------------------------------------------------
commit 221a9bb41c3a7af0cc65cda78365010893ca1430
Merge: 189f4a5af1053271b0834522e35c336df959aa03 439f049c7024d69aa4b87dc811e1772981ad29cb
Author: Ronnie sahlberg <ronniesahlberg at gmail.com>
Date:   Mon May 3 15:57:41 2010 +1000

    Merge commit 'rusty/signal-fix'

commit 189f4a5af1053271b0834522e35c336df959aa03
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Mon May 3 15:52:02 2010 +1000

        Dont check ip assignment across the cluster while ip-verification
        checks are disabled

commit e7069082e5f0380dcddee247db8754218ce18cab
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Wed Apr 28 15:43:11 2010 +1000

    The recent change to the recovery daemon to keep track of and
    verify that all nodes agree on the most recent ip address assignments
    broke "ctdb moveip ..." since that call would never trigger
    a full takeover run and thus would immediately trigger an inconsistency.
    
    Add a new message to the recovery daemon where we can tell the recovery daemon to update its assignments.
    
    BZ62782

commit 880896a27adfdd5173b2810b6b2f3889802046f0
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Wed Apr 28 14:47:37 2010 +1000

    Make create_merged_ip_list() a static function since
    it is not called from outside of ctdb_takeover.c

commit d2e4a9912c4bd13eb4f12681adebe7e59a6d1fb2
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Wed Apr 28 14:44:53 2010 +1000

    In the log message when we have found an inconsistent ip address allocation,
    add extra log information about what the inconsistency is.

commit 439f049c7024d69aa4b87dc811e1772981ad29cb
Author: Rusty Russell <rusty at rustcorp.com.au>
Date:   Thu Apr 8 15:11:05 2010 +0930

    eventscript: simplify script timeout handling
    
    Now the script child signal handler doesn't do anything, we can unify the
    "timeout" and "abort" cases introduced in 9dd25cb751919799.
    
    Signed-off-by: Rusty Russell <rusty at rustcorp.com.au>

commit f8aa83788e3cc10ab7655a90d7b7b17ddbe48685
Author: Rusty Russell <rusty at rustcorp.com.au>
Date:   Thu Apr 8 15:09:08 2010 +0930

    eventscript: wait for debugging dump before killing timedout script
    
    Fairly simple: prevent the destructor from killing the script, and do it
    explicitly from the debugging child.
    
    We can remove the extra "already dead" test, since this will be detected
    in the destructor anyway.
    
    Signed-off-by: Rusty Russell <rusty at rustcorp.com.au>

commit 7ee5ecc8d53e78e2dec21197b74a74cc4ae1834c
Author: Rusty Russell <rusty at rustcorp.com.au>
Date:   Thu Apr 8 15:13:29 2010 +0930

    eventscript: don't do debugging system() from inside signal handler
    
    In the case of a timeout, we dump a log of what's happening to a file
    in /tmp.  We do it from the signal handler, which is an unreliable hack
    (BZ58365).
    
    Instead, create another (lower-priority) child to do the dump, then
    kill the timedout script.
    
    Note that this doesn't quite work as intended (the dump is often run
    after the script has been killed), so the next patch resolves this.
    
    Signed-off-by: Rusty Russell <rusty at rustcorp.com.au>

commit fcc63e04beb427c1f48deae6d3d98c78a2a67949
Author: Rusty Russell <rusty at rustcorp.com.au>
Date:   Thu Apr 8 10:35:04 2010 +0930

    eventscript: fix case where we fail to create child for some reason
    
    Initialize the child pid to 0 so destructor doesn't try to kill it:
    
    	server/eventscript.c:565 Sending SIGTERM to child pid:139742328
    	Failed to kill child process for eventscript, errno No such process(3)
    
    Signed-off-by: Rusty Russell <rusty at rustcorp.com.au>

-----------------------------------------------------------------------

Summary of changes:
 common/ctdb_util.c     |   12 ++++++
 include/ctdb.h         |    6 +++
 include/ctdb_private.h |    3 +
 server/ctdb_recoverd.c |   34 +++++++++++++++-
 server/ctdb_takeover.c |   25 +++++++++++-
 server/eventscript.c   |  101 ++++++++++++++++++++++-------------------------
 tools/ctdb.c           |    9 ++++
 7 files changed, 131 insertions(+), 59 deletions(-)


Changeset truncated at 500 lines:

diff --git a/common/ctdb_util.c b/common/ctdb_util.c
index 63abc02..433a2ad 100644
--- a/common/ctdb_util.c
+++ b/common/ctdb_util.c
@@ -339,6 +339,18 @@ void ctdb_high_priority(struct ctdb_context *ctdb)
 	}
 }
 
+/*
+  make ourselves slightly nicer: eg. a ctdb child.
+ */
+void ctdb_reduce_priority(struct ctdb_context *ctdb)
+{
+	errno = 0;
+	if (nice(10) == -1 && errno != 0) {
+		DEBUG(DEBUG_WARNING,("Unable to lower priority: %s\n",
+				     strerror(errno)));
+	}
+}
+
 void set_nonblocking(int fd)
 {
 	unsigned v;
diff --git a/include/ctdb.h b/include/ctdb.h
index 94198aa..540ca98 100644
--- a/include/ctdb.h
+++ b/include/ctdb.h
@@ -75,6 +75,12 @@ struct ctdb_call_info {
  */
 #define CTDB_SRVID_SET_NODE_FLAGS 0xF400000000000000LL
 
+/* 
+   a message ID to ask the recovery daemon to update the expected node
+   assignment for a public ip
+ */
+#define CTDB_SRVID_RECD_UPDATE_IP 0xF500000000000000LL
+
 /*
   a message to tell the recovery daemon to fetch a set of records
  */
diff --git a/include/ctdb_private.h b/include/ctdb_private.h
index 888b626..cb7c165 100644
--- a/include/ctdb_private.h
+++ b/include/ctdb_private.h
@@ -1263,6 +1263,7 @@ void ctdb_node_dead(struct ctdb_node *node);
 void ctdb_node_connected(struct ctdb_node *node);
 bool ctdb_blocking_freeze(struct ctdb_context *ctdb);
 void ctdb_high_priority(struct ctdb_context *ctdb);
+void ctdb_reduce_priority(struct ctdb_context *ctdb);
 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb, 
 				 struct ctdb_req_control *c,
 				 TDB_DATA indata, 
@@ -1639,5 +1640,7 @@ void ctdb_fault_setup(void);
 
 int verify_remote_ip_allocation(struct ctdb_context *ctdb, 
 				struct ctdb_all_public_ips *ips);
+int update_ip_assignment_tree(struct ctdb_context *ctdb,
+				struct ctdb_public_ip *ip);
 
 #endif
diff --git a/server/ctdb_recoverd.c b/server/ctdb_recoverd.c
index 5f34711..3ee607e 100644
--- a/server/ctdb_recoverd.c
+++ b/server/ctdb_recoverd.c
@@ -1275,9 +1275,11 @@ static int ctdb_reload_remote_public_ips(struct ctdb_context *ctdb,
 			return -1;
 		}
 
-		if (verify_remote_ip_allocation(ctdb, ctdb->nodes[j]->known_public_ips)) {
-			DEBUG(DEBUG_ERR,("Node %d has inconsistent public ip allocation and needs update.\n", ctdb->nodes[j]->pnn));
-			rec->need_takeover_run = true;
+		if (rec->ip_check_disable_ctx == NULL) {
+			if (verify_remote_ip_allocation(ctdb, ctdb->nodes[j]->known_public_ips)) {
+				DEBUG(DEBUG_ERR,("Node %d has inconsistent public ip allocation and needs update.\n", ctdb->nodes[j]->pnn));
+				rec->need_takeover_run = true;
+			}
 		}
 
 		/* grab a new shiny list of public ips from the node */
@@ -1891,6 +1893,29 @@ static void reenable_ip_check(struct event_context *ev, struct timed_event *te,
 	rec->ip_check_disable_ctx = NULL;
 }
 
+
+static void recd_update_ip_handler(struct ctdb_context *ctdb, uint64_t srvid, 
+			     TDB_DATA data, void *private_data)
+{
+	struct ctdb_recoverd *rec = talloc_get_type(private_data, struct ctdb_recoverd);
+	struct ctdb_public_ip *ip;
+
+	if (rec->recmaster != rec->ctdb->pnn) {
+		DEBUG(DEBUG_INFO,("Not recmaster, ignore update ip message\n"));
+		return;
+	}
+
+	if (data.dsize != sizeof(struct ctdb_public_ip)) {
+		DEBUG(DEBUG_ERR,(__location__ " Incorrect size of recd update ip message. Was %zd but expected %zd bytes\n", data.dsize, sizeof(struct ctdb_public_ip)));
+		return;
+	}
+
+	ip = (struct ctdb_public_ip *)data.dptr;
+
+	update_ip_assignment_tree(rec->ctdb, ip);
+}
+
+
 static void disable_ip_check_handler(struct ctdb_context *ctdb, uint64_t srvid, 
 			     TDB_DATA data, void *private_data)
 {
@@ -2862,6 +2887,9 @@ static void monitor_cluster(struct ctdb_context *ctdb)
 	/* register a message port for disabling the ip check for a short while */
 	ctdb_set_message_handler(ctdb, CTDB_SRVID_DISABLE_IP_CHECK, disable_ip_check_handler, rec);
 
+	/* register a message port for updating the recovery daemons node assignment for an ip */
+	ctdb_set_message_handler(ctdb, CTDB_SRVID_RECD_UPDATE_IP, recd_update_ip_handler, rec);
+
 again:
 	if (mem_ctx) {
 		talloc_free(mem_ctx);
diff --git a/server/ctdb_takeover.c b/server/ctdb_takeover.c
index 5433172..b18c030 100644
--- a/server/ctdb_takeover.c
+++ b/server/ctdb_takeover.c
@@ -1158,7 +1158,7 @@ void getips_count_callback(void *param, void *data)
 	*ip_list     = new_ip;
 }
 
-struct ctdb_public_ip_list *
+static struct ctdb_public_ip_list *
 create_merged_ip_list(struct ctdb_context *ctdb)
 {
 	int i, j;
@@ -2842,10 +2842,31 @@ int verify_remote_ip_allocation(struct ctdb_context *ctdb, struct ctdb_all_publi
 		}
 
 		if (tmp_ip->pnn != ips->ips[i].pnn) {
-			DEBUG(DEBUG_ERR,("Inconsistent ip allocation. Trigger reallocation.\n"));
+			DEBUG(DEBUG_ERR,("Inconsistent ip allocation. Trigger reallocation. Thinks %s is held by node %u while it is held by node %u\n", ctdb_addr_to_str(&ips->ips[i].addr), ips->ips[i].pnn, tmp_ip->pnn));
 			return -1;
 		}
 	}
 
 	return 0;
 }
+
+int update_ip_assignment_tree(struct ctdb_context *ctdb, struct ctdb_public_ip *ip)
+{
+	struct ctdb_public_ip_list *tmp_ip; 
+
+	if (ctdb->ip_tree == NULL) {
+		DEBUG(DEBUG_ERR,("No ctdb->ip_tree yet. Failed to update ip assignment\n"));
+		return -1;
+	}
+
+	tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ip->addr));
+	if (tmp_ip == NULL) {
+		DEBUG(DEBUG_ERR,(__location__ " Could not find record for address %s, update ip\n", ctdb_addr_to_str(&ip->addr)));
+		return -1;
+	}
+
+	DEBUG(DEBUG_NOTICE,("Updated ip assignment tree for ip : %s from node %u to node %u\n", ctdb_addr_to_str(&ip->addr), tmp_ip->pnn, ip->pnn));
+	tmp_ip->pnn = ip->pnn;
+
+	return 0;
+}
diff --git a/server/eventscript.c b/server/eventscript.c
index 7409551..8011269 100644
--- a/server/eventscript.c
+++ b/server/eventscript.c
@@ -27,44 +27,13 @@
 #include "lib/events/events.h"
 #include "../common/rb_tree.h"
 
-static struct {
-	struct timeval start;
-	const char *script_running;
-} child_state;
-
 static void ctdb_event_script_timeout(struct event_context *ev, struct timed_event *te, struct timeval t, void *p);
 
 /*
-  ctdbd sends us a SIGTERM when we should time out the current script
+  ctdbd sends us a SIGTERM when we should die.
  */
 static void sigterm(int sig)
 {
-	char tbuf[100], buf[200];
-	time_t t;
-
-	DEBUG(DEBUG_ERR,("Timed out running script '%s' after %.1f seconds pid :%d\n", 
-		 child_state.script_running, timeval_elapsed(&child_state.start), getpid()));
-
-	t = time(NULL);
-
-	strftime(tbuf, sizeof(tbuf)-1, "%Y%m%d%H%M%S", 	localtime(&t));
-	sprintf(buf, "{ pstree -p; cat /proc/locks; ls -li /var/ctdb/ /var/ctdb/persistent; }"
-		" >/tmp/ctdb.event.%s.%d", tbuf, getpid());
-	system(buf);
-
-	DEBUG(DEBUG_ERR,("Logged timedout eventscript : %s\n", buf));
-
-	/* all the child processes will be running in the same process group */
-	kill(-getpgrp(), SIGKILL);
-	_exit(1);
-}
-
-/*
-  ctdbd sends us a SIGABRT when we should abort the current script.
-  we abort any active monitor script any time a different event is generated.
- */
-static void sigabrt(int sig)
-{
 	/* all the child processes will be running in the same process group */
 	kill(-getpgrp(), SIGKILL);
 	_exit(1);
@@ -78,7 +47,6 @@ struct ctdb_event_script_state {
 	int fd[2];
 	void *private_data;
 	bool from_user;
-	bool aborted;
 	enum ctdb_eventscript_call call;
 	const char *options;
 	struct timeval timeout;
@@ -291,7 +259,6 @@ static int child_setup(struct ctdb_context *ctdb)
 	}
 
 	signal(SIGTERM, sigterm);
-	signal(SIGABRT, sigabrt);
 	return 0;
 }
 
@@ -368,7 +335,6 @@ static int child_run_script(struct ctdb_context *ctdb,
 	int ret;
 	TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
 
-	child_state.start = timeval_current();
 	ret = child_setup(ctdb);
 	if (ret != 0)
 		goto out;
@@ -376,7 +342,6 @@ static int child_run_script(struct ctdb_context *ctdb,
 	cmdstr = child_command_string(ctdb, tmp_ctx, from_user,
 				      current->name, call, options);
 	CTDB_NO_MEMORY(ctdb, cmdstr);
-	child_state.script_running = cmdstr;
 
 	DEBUG(DEBUG_DEBUG,("Executing event script %s\n",cmdstr));
 
@@ -518,6 +483,46 @@ static void ctdb_event_script_handler(struct event_context *ev, struct fd_event
 	}
 }
 
+static void debug_timeout(struct ctdb_event_script_state *state)
+{
+	struct ctdb_script_wire *current = get_current_script(state);
+	char *cmd;
+	pid_t pid;
+	time_t t;
+	char tbuf[100], buf[200];
+
+	cmd = child_command_string(state->ctdb, state,
+				   state->from_user, current->name,
+				   state->call, state->options);
+	CTDB_NO_MEMORY_VOID(state->ctdb, cmd);
+
+	DEBUG(DEBUG_ERR,("Timed out running script '%s' after %.1f seconds pid :%d\n",
+			 cmd, timeval_elapsed(&current->start), state->child));
+	talloc_free(cmd);
+
+	t = time(NULL);
+	strftime(tbuf, sizeof(tbuf)-1, "%Y%m%d%H%M%S", 	localtime(&t));
+	sprintf(buf, "{ pstree -p; cat /proc/locks; ls -li /var/ctdb/ /var/ctdb/persistent; }"
+			" >/tmp/ctdb.event.%s.%d", tbuf, getpid());
+
+	pid = fork();
+	if (pid == 0) {
+		ctdb_reduce_priority(state->ctdb);
+		system(buf);
+		/* Now we can kill the child */
+		kill(state->child, SIGTERM);
+		exit(0);
+	}
+	if (pid == -1) {
+		DEBUG(DEBUG_ERR,("Fork for debug script failed : %s\n",
+				 strerror(errno)));
+	} else {
+		DEBUG(DEBUG_ERR,("Logged timedout eventscript : %s\n", buf));
+		/* Don't kill child until timeout done. */
+		state->child = 0;
+	}
+}
+
 /* called when child times out */
 static void ctdb_event_script_timeout(struct event_context *ev, struct timed_event *te, 
 				      struct timeval t, void *p)
@@ -543,11 +548,7 @@ static void ctdb_event_script_timeout(struct event_context *ev, struct timed_eve
 		break;
         default:
 		state->scripts->scripts[state->current].status = -ETIME;
-	}
-
-	if (kill(state->child, 0) != 0) {
-		DEBUG(DEBUG_ERR,("Event script child process already dead, errno %s(%d)\n", strerror(errno), errno));
-		state->child = 0;
+		debug_timeout(state);
 	}
 
 	talloc_free(state);
@@ -561,17 +562,10 @@ static int event_script_destructor(struct ctdb_event_script_state *state)
 	int status;
 
 	if (state->child) {
-		if (state->aborted != True) {
-			DEBUG(DEBUG_ERR,(__location__ " Sending SIGTERM to child pid:%d\n", state->child));
+		DEBUG(DEBUG_ERR,(__location__ " Sending SIGTERM to child pid:%d\n", state->child));
 
-			if (kill(state->child, SIGTERM) != 0) {
-				DEBUG(DEBUG_ERR,("Failed to kill child process for eventscript, errno %s(%d)\n", strerror(errno), errno));
-			}
-		} else {
-			DEBUG(DEBUG_INFO,(__location__ " Sending SIGABRT to script child pid:%d\n", state->child));
-			if (kill(state->child, SIGABRT) != 0) {
-				DEBUG(DEBUG_ERR,("Failed to kill child process for eventscript, errno %s(%d)\n", strerror(errno), errno));
-			}
+		if (kill(state->child, SIGTERM) != 0) {
+			DEBUG(DEBUG_ERR,("Failed to kill child process for eventscript, errno %s(%d)\n", strerror(errno), errno));
 		}
 	}
 
@@ -668,7 +662,6 @@ static int ctdb_event_script_callback_v(struct ctdb_context *ctdb,
 	state->callback = callback;
 	state->private_data = private_data;
 	state->from_user = from_user;
-	state->aborted = False;
 	state->call = call;
 	state->options = talloc_vasprintf(state, fmt, ap);
 	state->timeout = timeval_set(ctdb->tunable.script_timeout, 0);
@@ -711,7 +704,6 @@ static int ctdb_event_script_callback_v(struct ctdb_context *ctdb,
 	/* Kill off any running monitor events to run this event. */
 	if (ctdb->current_monitor) {
 		/* Discard script status so we don't save to last_status */
-		ctdb->current_monitor->aborted = True;
 		talloc_free(ctdb->current_monitor->scripts);
 		ctdb->current_monitor->scripts = NULL;
 		talloc_free(ctdb->current_monitor);
@@ -729,6 +721,7 @@ static int ctdb_event_script_callback_v(struct ctdb_context *ctdb,
 		return -1;
 	}
 	state->current = 0;
+	state->child = 0;
 
 	if (!from_user && (call == CTDB_EVENT_MONITOR || call == CTDB_EVENT_STATUS)) {
 		ctdb->current_monitor = state;
diff --git a/tools/ctdb.c b/tools/ctdb.c
index 09c849c..5137846 100644
--- a/tools/ctdb.c
+++ b/tools/ctdb.c
@@ -1072,6 +1072,15 @@ static int move_ip(struct ctdb_context *ctdb, ctdb_sock_addr *addr, uint32_t pnn
 		return -1;
 	}
 
+	/* update the recovery daemon so it now knows to expect the new
+	   node assignment for this ip.
+	*/
+	ret = ctdb_send_message(ctdb, CTDB_BROADCAST_CONNECTED, CTDB_SRVID_RECD_UPDATE_IP, data);
+	if (ret != 0) {
+		DEBUG(DEBUG_ERR,("Failed to send message to update the ip on the recovery master.\n"));
+		return -1;
+	}
+
 	talloc_free(tmp_ctx);
 	return 0;
 }


-- 
CTDB repository