[SCM] CTDB repository - branch master updated - ctdb-1.0.96-10-g27d152a

Ronnie Sahlberg sahlberg at samba.org
Sun Oct 18 23:19:54 MDT 2009


The branch, master has been updated
       via  27d152a918680a59c7412aec7e1772f25b72d469 (commit)
       via  97ded8a629ec762f71bad28515e4fbc810790b1d (commit)
       via  02f68dc60e0b7bf26d631850b12834d5c71a88f2 (commit)
       via  ff824676fab94168707aada7423ae766bc0f711c (commit)
       via  f5e9f3007c10a937158bc8cdfabf33c984cf9c50 (commit)
       via  ddd089810a14efe4be6e1ff3eccaa604e4913c9e (commit)
       via  ef992a64d2376b621d4d2973ae22e567158aee12 (commit)
      from  82fad66123c1b8c5d4ed3b19c39acf6f367b3f37 (commit)

http://gitweb.samba.org/?p=sahlberg/ctdb.git;a=shortlog;h=master


- Log -----------------------------------------------------------------
commit 27d152a918680a59c7412aec7e1772f25b72d469
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Mon Oct 19 16:22:15 2009 +1100

    add a direcotry where multiple local scripts can be added to run when executing eventscripts

commit 97ded8a629ec762f71bad28515e4fbc810790b1d
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Mon Oct 19 15:33:20 2009 +1100

    wait a bit longer before shutting down when the reclock file is missing
    
    pring the filename of the missing file when we turn unhealthy and also
    a 'df'

commit 02f68dc60e0b7bf26d631850b12834d5c71a88f2
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Mon Oct 19 15:30:44 2009 +1100

    Revert "dont shutdown a node when the reclock file is temporarily unavailable."
    
    This reverts commit f5e9f3007c10a937158bc8cdfabf33c984cf9c50.

commit ff824676fab94168707aada7423ae766bc0f711c
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Thu Oct 15 16:03:43 2009 +1100

    Dont run eventscript monitor when the databases are frozen.
    The databases can become frozen a while before we do the actual recovery
    since we have the re-recovery timeout.
    
    There is no point in doing much monitoring if we are waiting for a recovery,
    or if we are banned.
    This will eliminate some annoying log entries where certain tests will fail if the databases are locked.

commit f5e9f3007c10a937158bc8cdfabf33c984cf9c50
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Thu Oct 15 13:19:10 2009 +1100

    dont shutdown a node when the reclock file is temporarily unavailable.
    Leave the node as UNHEALTHY this stops clients from accessing the node until
    the reclock file can be accessed again

commit ddd089810a14efe4be6e1ff3eccaa604e4913c9e
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Thu Oct 15 11:24:54 2009 +1100

    add logging everytime we create a filedescriptor in the main ctdb daemon
    so we can spot if there are leaks.
    
    plug two leaks for filedescriptors related to when sending ARP fail
    and one leak when we can not parse the local address during tcp connection establish

commit ef992a64d2376b621d4d2973ae22e567158aee12
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Thu Oct 15 07:41:56 2009 +1100

    new version 1.0.97

-----------------------------------------------------------------------

Summary of changes:
 common/system_linux.c      |    7 +++++++
 config/events.d/01.reclock |    8 +++++---
 config/functions           |    6 ++++++
 packaging/RPM/ctdb.spec    |    8 +++++++-
 server/ctdb_daemon.c       |    2 ++
 server/ctdb_lockwait.c     |    4 ++++
 server/ctdb_logging.c      |    2 ++
 server/ctdb_monitor.c      |   23 ++++++++++++++++++++++-
 server/ctdb_persistent.c   |    4 ++++
 server/ctdb_recover.c      |    4 ++++
 server/ctdb_recoverd.c     |    5 +++++
 server/ctdb_traverse.c     |    4 ++++
 server/ctdb_vacuum.c       |    2 ++
 server/eventscript.c       |    2 ++
 tcp/tcp_connect.c          |    7 +++++++
 15 files changed, 83 insertions(+), 5 deletions(-)


Changeset truncated at 500 lines:

diff --git a/common/system_linux.c b/common/system_linux.c
index 7a580cc..999208d 100644
--- a/common/system_linux.c
+++ b/common/system_linux.c
@@ -92,9 +92,11 @@ int ctdb_sys_send_arp(const ctdb_sock_addr *addr, const char *iface)
 			return -1;
 		}
 
+		DEBUG(DEBUG_NOTICE, (__location__ " Created SOCKET FD:%d for sending arp\n", s));
 		strncpy(ifr.ifr_name, iface, sizeof(ifr.ifr_name));
 		if (ioctl(s, SIOCGIFINDEX, &ifr) < 0) {
 			DEBUG(DEBUG_CRIT,(__location__ " interface '%s' not found\n", iface));
+			close(s);
 			return -1;
 		}
 
@@ -171,6 +173,7 @@ int ctdb_sys_send_arp(const ctdb_sock_addr *addr, const char *iface)
 		ret = sendto(s, buffer, 64, 0, (struct sockaddr *)&sall, sizeof(sall));
 		if (ret < 0 ){
 			DEBUG(DEBUG_CRIT,(__location__ " failed sendto\n"));
+			close(s);
 			return -1;
 		}
 
@@ -183,9 +186,11 @@ int ctdb_sys_send_arp(const ctdb_sock_addr *addr, const char *iface)
 			return -1;
 		}
 
+		DEBUG(DEBUG_NOTICE, (__location__ " Created SOCKET FD:%d for sending arp\n", s));
 		strncpy(ifr.ifr_name, iface, sizeof(ifr.ifr_name));
 		if (ioctl(s, SIOCGIFINDEX, &ifr) < 0) {
 			DEBUG(DEBUG_CRIT,(__location__ " interface '%s' not found\n", iface));
+			close(s);
 			return -1;
 		}
 
@@ -422,6 +427,8 @@ int ctdb_sys_open_capture_socket(const char *iface, void **private_data)
 		return -1;
 	}
 
+	DEBUG(DEBUG_NOTICE, (__location__ " Created RAW SOCKET FD:%d for tcp tickle\n", s));
+
 	set_nonblocking(s);
 	set_close_on_exec(s);
 
diff --git a/config/events.d/01.reclock b/config/events.d/01.reclock
index 74b9cea..281c089 100755
--- a/config/events.d/01.reclock
+++ b/config/events.d/01.reclock
@@ -22,8 +22,9 @@ case $cmd in
 
       monitor)
 	ctdb_counter_incr "$RECLOCKCOUNT"
-	ctdb_counter_limit "$RECLOCKCOUNT" 20 && {
-		echo "Reclock file can not be accessed. Shutting down."
+	ctdb_counter_limit "$RECLOCKCOUNT" 200 && {
+		echo "Reclock file \"$RECLOCKFILE\" can not be accessed. Shutting down."
+		df
 		sleep 1
 		ctdb shutdown
 	}
@@ -46,7 +47,8 @@ case $cmd in
 
 
 	ctdb_counter_limit "$RECLOCKCOUNT" 3 && {
-		echo "Reclock file can not be accessed. Mark node UNHEALTHY."
+		echo "Reclock file \"$RECLOCKFILE\" can not be accessed. Mark node UNHEALTHY."
+		df
 		exit 1;
 	}
 	;;
diff --git a/config/functions b/config/functions
index 1117189..1d1d054 100644
--- a/config/functions
+++ b/config/functions
@@ -540,4 +540,10 @@ ctdb_counter_limit () {
 	. $CTDB_BASE/rc.local
 }
 
+[ -d $CTDB_BASE/rc.local.d ] && {
+	for i in $CTDB_BASE/rc.local.d/* ; do
+		[ -x "$i" ] && . "$i"
+	done
+}
+
 
diff --git a/packaging/RPM/ctdb.spec b/packaging/RPM/ctdb.spec
index 25cbc0e..01a5751 100644
--- a/packaging/RPM/ctdb.spec
+++ b/packaging/RPM/ctdb.spec
@@ -4,7 +4,7 @@ Summary: Clustered TDB
 Vendor: Samba Team
 Packager: Samba Team <samba at samba.org>
 Name: ctdb
-Version: 1.0.96
+Version: 1.0.97
 Release: 1
 Epoch: 0
 License: GNU GPL version 3
@@ -132,6 +132,12 @@ fi
 %{_libdir}/pkgconfig/ctdb.pc
 
 %changelog
+* Thu Oct 14 2009 : Version 1.0.97
+ - From martins : update onnode.
+   Update onnode to allow specifying an alternative nodes file from
+   the command line and also to be able to specify hostnames on the
+   list of targets :
+   onnode host1,host2,...   
 * Tue Oct 13 2009 : Version 1.0.96
  - Add more debugging output when eventscripts have trouble. Print a 
    "pstree -p" to the log when scripts have hung.
diff --git a/server/ctdb_daemon.c b/server/ctdb_daemon.c
index 0af3bc1..cc496f5 100644
--- a/server/ctdb_daemon.c
+++ b/server/ctdb_daemon.c
@@ -559,6 +559,8 @@ static void ctdb_accept_client(struct event_context *ev, struct fd_event *fde,
 	set_nonblocking(fd);
 	set_close_on_exec(fd);
 
+	DEBUG(DEBUG_NOTICE,(__location__ " Created SOCKET FD:%d to connected child\n", fd));
+
 	client = talloc_zero(ctdb, struct ctdb_client);
 #ifdef _AIX
 	if (getsockopt(fd, SOL_SOCKET, SO_PEERID, &cr, &crl) == 0) {
diff --git a/server/ctdb_lockwait.c b/server/ctdb_lockwait.c
index be3a097..e02cd21 100644
--- a/server/ctdb_lockwait.c
+++ b/server/ctdb_lockwait.c
@@ -146,6 +146,10 @@ struct lockwait_handle *ctdb_lockwait(struct ctdb_db_context *ctdb_db,
 	}
 
 	close(result->fd[1]);
+	set_close_on_exec(result->fd[0]);
+
+	DEBUG(DEBUG_NOTICE, (__location__ " Created PIPE FD:%d to child lockwait process\n", result->fd[0]));
+
 	talloc_set_destructor(result, lockwait_destructor);
 
 	result->fde = event_add_fd(ctdb_db->ctdb->ev, result, result->fd[0],
diff --git a/server/ctdb_logging.c b/server/ctdb_logging.c
index ebf4eef..7cd58ad 100644
--- a/server/ctdb_logging.c
+++ b/server/ctdb_logging.c
@@ -250,6 +250,8 @@ int ctdb_set_child_logging(struct ctdb_context *ctdb)
 	set_close_on_exec(p[0]);
 	ctdb->log->pfd = p[0];
 
+	DEBUG(DEBUG_NOTICE, (__location__ " Created PIPE FD:%d for logging\n", p[0]));
+
 	close(1);
 	close(2);
 	if (p[1] != 1) {
diff --git a/server/ctdb_monitor.c b/server/ctdb_monitor.c
index fc96fd7..5e1f7ad 100644
--- a/server/ctdb_monitor.c
+++ b/server/ctdb_monitor.c
@@ -227,10 +227,31 @@ static void ctdb_check_health(struct event_context *ev, struct timed_event *te,
 						 ctdb->monitor->monitor_context, ctdb_startup_callback, 
 						 ctdb, "startup");
 	} else {
-		ret = ctdb_event_script_callback(ctdb, 
+		int i;
+		int skip_monitoring = 0;
+		
+		if (ctdb->recovery_mode != CTDB_RECOVERY_NORMAL) {
+			skip_monitoring = 1;
+			DEBUG(DEBUG_ERR,("Skip monitoring during recovery\n"));
+		}
+		for (i=1; i<=NUM_DB_PRIORITIES; i++) {
+			if (ctdb->freeze_handles[i] != 0) {
+				DEBUG(DEBUG_ERR,("Skip monitoring since databases are frozen\n"));
+				skip_monitoring = 1;
+				break;
+			}
+		}
+		if (skip_monitoring) {
+			event_add_timed(ctdb->ev, ctdb->monitor->monitor_context,
+					timeval_current_ofs(ctdb->monitor->next_interval, 0), 
+					ctdb_check_health, ctdb);
+			return;
+		} else {
+			ret = ctdb_event_script_callback(ctdb, 
 						 timeval_current_ofs(ctdb->tunable.script_timeout, 0),
 						 ctdb->monitor->monitor_context, ctdb_health_callback, 
 						 ctdb, "monitor");
+		}
 	}
 
 	if (ret != 0) {
diff --git a/server/ctdb_persistent.c b/server/ctdb_persistent.c
index f7578d1..77538d9 100644
--- a/server/ctdb_persistent.c
+++ b/server/ctdb_persistent.c
@@ -454,8 +454,12 @@ struct childwrite_handle *ctdb_childwrite(struct ctdb_db_context *ctdb_db,
 	}
 
 	close(result->fd[1]);
+	set_close_on_exec(result->fd[0]);
+
 	talloc_set_destructor(result, childwrite_destructor);
 
+	DEBUG(DEBUG_NOTICE, (__location__ " Created PIPE FD:%d for ctdb_childwrite\n", result->fd[0]));
+
 	result->fde = event_add_fd(ctdb_db->ctdb->ev, result, result->fd[0],
 				   EVENT_FD_READ|EVENT_FD_AUTOCLOSE, childwrite_handler,
 				   (void *)result);
diff --git a/server/ctdb_recover.c b/server/ctdb_recover.c
index 21dc28d..862168e 100644
--- a/server/ctdb_recover.c
+++ b/server/ctdb_recover.c
@@ -736,10 +736,14 @@ int32_t ctdb_control_set_recmode(struct ctdb_context *ctdb,
 		_exit(0);
 	}
 	close(state->fd[1]);
+	set_close_on_exec(state->fd[0]);
+
 	state->fd[1] = -1;
 
 	talloc_set_destructor(state, set_recmode_destructor);
 
+	DEBUG(DEBUG_NOTICE, (__location__ " Created PIPE FD:%d for setrecmode\n", state->fd[0]));
+
 	state->te = event_add_timed(ctdb->ev, state, timeval_current_ofs(5, 0),
 				    ctdb_set_recmode_timeout, state);
 
diff --git a/server/ctdb_recoverd.c b/server/ctdb_recoverd.c
index d759856..83fc808 100644
--- a/server/ctdb_recoverd.c
+++ b/server/ctdb_recoverd.c
@@ -2520,6 +2520,9 @@ static int check_recovery_lock(struct ctdb_context *ctdb)
 	}
 	close(state->fd[1]);
 	state->fd[1] = -1;
+	set_close_on_exec(state->fd[0]);
+
+	DEBUG(DEBUG_NOTICE, (__location__ " Created PIPE FD:%d for check_recovery_lock\n", state->fd[0]));
 
 	talloc_set_destructor(state, check_reclock_destructor);
 
@@ -3311,6 +3314,8 @@ int ctdb_start_recoverd(struct ctdb_context *ctdb)
 		exit(1);
 	}
 
+	DEBUG(DEBUG_NOTICE, (__location__ " Created PIPE FD:%d to recovery daemon\n", fd[0]));
+
 	event_add_fd(ctdb->ev, ctdb, fd[0], EVENT_FD_READ|EVENT_FD_AUTOCLOSE, 
 		     ctdb_recoverd_parent, &fd[0]);	
 
diff --git a/server/ctdb_traverse.c b/server/ctdb_traverse.c
index dbb4b4f..d66036f 100644
--- a/server/ctdb_traverse.c
+++ b/server/ctdb_traverse.c
@@ -176,6 +176,8 @@ static struct ctdb_traverse_local_handle *ctdb_traverse_local(struct ctdb_db_con
 	}
 
 	close(h->fd[1]);
+	set_close_on_exec(h->fd[0]);
+
 	talloc_set_destructor(h, traverse_local_destructor);
 
 	DLIST_ADD(ctdb_db->traverse, h);
@@ -184,6 +186,8 @@ static struct ctdb_traverse_local_handle *ctdb_traverse_local(struct ctdb_db_con
 	  setup a packet queue between the child and the parent. This
 	  copes with all the async and packet boundary issues
 	 */
+	DEBUG(DEBUG_NOTICE, (__location__ " Created PIPE FD:%d to child traverse\n", h->fd[0]));
+
 	h->queue = ctdb_queue_setup(ctdb_db->ctdb, h, h->fd[0], 0, ctdb_traverse_local_handler, h);
 	if (h->queue == NULL) {
 		talloc_free(h);
diff --git a/server/ctdb_vacuum.c b/server/ctdb_vacuum.c
index 69991b5..b1927ba 100644
--- a/server/ctdb_vacuum.c
+++ b/server/ctdb_vacuum.c
@@ -847,6 +847,8 @@ ctdb_vacuum_event(struct event_context *ev, struct timed_event *te,
 		timeval_current_ofs(ctdb->tunable.vacuum_max_run_time, 0),
 		vacuum_child_timeout, child_ctx);
 
+	DEBUG(DEBUG_NOTICE, (__location__ " Created PIPE FD:%d to child vacuum process\n", child_ctx->fd[0]));
+
 	event_add_fd(ctdb->ev, child_ctx, child_ctx->fd[0],
 		EVENT_FD_READ|EVENT_FD_AUTOCLOSE,
 		vacuum_child_handler,
diff --git a/server/eventscript.c b/server/eventscript.c
index 6d2b370..3d139c0 100644
--- a/server/eventscript.c
+++ b/server/eventscript.c
@@ -812,6 +812,8 @@ static int ctdb_event_script_callback_v(struct ctdb_context *ctdb,
 	close(state->fd[1]);
 	set_close_on_exec(state->fd[0]);
 
+	DEBUG(DEBUG_NOTICE, (__location__ " Created PIPE FD:%d to child eventscript process\n", state->fd[0]));
+
 	event_add_fd(ctdb->ev, state, state->fd[0], EVENT_FD_READ|EVENT_FD_AUTOCLOSE,
 		     ctdb_event_script_handler, state);
 
diff --git a/tcp/tcp_connect.c b/tcp/tcp_connect.c
index e3d7772..2074860 100644
--- a/tcp/tcp_connect.c
+++ b/tcp/tcp_connect.c
@@ -158,6 +158,8 @@ void ctdb_tcp_node_connect(struct event_context *ev, struct timed_event *te,
 	set_nonblocking(tnode->fd);
 	set_close_on_exec(tnode->fd);
 
+	DEBUG(DEBUG_NOTICE, (__location__ " Created TCP SOCKET FD:%d\n", tnode->fd));
+
 	/* Bind our side of the socketpair to the same address we use to listen
 	 * on incoming CTDB traffic.
 	 * We must specify this address to make sure that the address we expose to
@@ -166,6 +168,8 @@ void ctdb_tcp_node_connect(struct event_context *ev, struct timed_event *te,
 	 */
 	ZERO_STRUCT(sock_in);
 	if (ctdb_tcp_get_address(ctdb, ctdb->address.address, &sock_in) != 0) {
+		DEBUG(DEBUG_ERR, (__location__ " Failed to find our address. Failing bind.\n"));
+		close(tnode->fd);
 		return;
 	}
 
@@ -186,6 +190,7 @@ void ctdb_tcp_node_connect(struct event_context *ev, struct timed_event *te,
 	default:
 		DEBUG(DEBUG_ERR, (__location__ " unknown family %u\n",
 			sock_in.sa.sa_family));
+		close(tnode->fd);
 		return;
 	}
 #ifdef HAVE_SOCK_SIN_LEN
@@ -253,6 +258,8 @@ static void ctdb_listen_event(struct event_context *ev, struct fd_event *fde,
 	set_nonblocking(in->fd);
 	set_close_on_exec(in->fd);
 
+	DEBUG(DEBUG_NOTICE, (__location__ " Created SOCKET FD:%d to incoming ctdb connection\n", fd));
+
         setsockopt(in->fd,SOL_SOCKET,SO_KEEPALIVE,(char *)&one,sizeof(one));
 
 	in->queue = ctdb_queue_setup(ctdb, in, in->fd, CTDB_TCP_ALIGNMENT, 


-- 
CTDB repository


More information about the samba-cvs mailing list