[SCM] CTDB repository - branch 1.2-nodeflags updated - ctdb-1.9.1-245-g42f6125

Sun Dec 5 18:17:55 MST 2010

The branch, 1.2-nodeflags has been updated
       via  42f6125b66cb18bce362693358f4085c58dd0695 (commit)
       via  b72cb646592252dcbcd142e9527267d0a44e174d (commit)
       via  3dea4622bcfbe0bc1b64382db553237ceb548d4e (commit)
       via  04098c1123893efc7fec02cd957345c35b92d845 (commit)
       via  73f1794bbb91ab5341bbb07388349edbd8cee1ad (commit)
      from  934b30a8593ebe09b1882713339d59c09db0f4ff (commit)

http://gitweb.samba.org/?p=sahlberg/ctdb.git;a=shortlog;h=1.2-nodeflags


- Log -----------------------------------------------------------------
commit 42f6125b66cb18bce362693358f4085c58dd0695
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Mon Dec 6 11:30:24 2010 +1100

    Add 60.ganesha to what gets installed by make install as well as by the RPM

commit b72cb646592252dcbcd142e9527267d0a44e174d
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Mon Dec 6 11:26:43 2010 +1100

    add a missing part of the import of the previous ganesha patch

commit 3dea4622bcfbe0bc1b64382db553237ceb548d4e
Author: Chandra Seetharaman <sekharan at us.ibm.com>
Date:   Fri Dec 3 15:26:22 2010 -0800

    make changes to ctdb event scripts to support NFS-Ganesha.
    
    make changes to ctdb event scripts to support NFS-Ganesha.
    
    Signed-off-by: Chandra Seetharaman <sekharan at us.ibm.com>
    Signed-off-by: Ronnie Sahlberg <ronniesahlberg at gmail.com>

commit 04098c1123893efc7fec02cd957345c35b92d845
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Fri Dec 3 13:28:35 2010 +1100

    during ip allocation, there are failure modes where a node might hold a ip address
    but thinks it is still unassigned (-1).
    
    add code to the recovery daemon to detect this case and trigger a reallocation
    so that the ip gets covered
    
    and change the takeip code to allow for this condition, taking on an ip address that is
    already hosted.
    
    cq s1021073

commit 73f1794bbb91ab5341bbb07388349edbd8cee1ad
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Fri Dec 3 06:07:03 2010 +1100

    dont try starting samba through the "init" event

-----------------------------------------------------------------------

Summary of changes:
 Makefile.in                |    1 +
 config/events.d/50.samba   |    2 +
 config/events.d/60.ganesha |  158 ++++++++++++++++++++++++++++++++++++++++++++
 config/events.d/60.nfs     |    2 +
 config/functions           |    1 +
 packaging/RPM/ctdb.spec.in |    1 +
 server/ctdb_recoverd.c     |   14 +++-
 server/ctdb_takeover.c     |    9 +++
 8 files changed, 184 insertions(+), 4 deletions(-)
 create mode 100644 config/events.d/60.ganesha


Changeset truncated at 500 lines:

diff --git a/Makefile.in b/Makefile.in
index 849d626..e5ef81f 100755
--- a/Makefile.in
+++ b/Makefile.in
@@ -243,6 +243,7 @@ install: all
 	${INSTALLCMD} -m 755 config/events.d/41.httpd $(DESTDIR)$(etcdir)/ctdb/events.d
 	${INSTALLCMD} -m 755 config/events.d/50.samba $(DESTDIR)$(etcdir)/ctdb/events.d
 	${INSTALLCMD} -m 755 config/events.d/60.nfs $(DESTDIR)$(etcdir)/ctdb/events.d
+	${INSTALLCMD} -m 644 config/events.d/60.ganesha $(DESTDIR)$(etcdir)/ctdb/events.d
 	${INSTALLCMD} -m 755 config/events.d/62.cnfs $(DESTDIR)$(etcdir)/ctdb/events.d
 	${INSTALLCMD} -m 755 config/events.d/70.iscsi $(DESTDIR)$(etcdir)/ctdb/events.d
 	${INSTALLCMD} -m 755 config/events.d/91.lvs $(DESTDIR)$(etcdir)/ctdb/events.d
diff --git a/config/events.d/50.samba b/config/events.d/50.samba
index 54a29cf..6e84f5f 100755
--- a/config/events.d/50.samba
+++ b/config/events.d/50.samba
@@ -206,8 +206,10 @@ periodic_cleanup() {
 
 ###########################
 
+[ "$1" = "init" ] || {
 ctdb_start_stop_service
 ctdb_start_stop_service "winbind"
+}
 
 is_ctdb_managed_service || is_ctdb_managed_service "winbind" || exit 0
 
diff --git a/config/events.d/60.ganesha b/config/events.d/60.ganesha
new file mode 100644
index 0000000..b7bc923
--- /dev/null
+++ b/config/events.d/60.ganesha
@@ -0,0 +1,158 @@
+#!/bin/sh
+# script to manage nfs in a clustered environment
+
+start_nfs() {
+       /bin/mkdir -p $CTDB_VARDIR/state/nfs
+       /bin/mkdir -p $CTDB_VARDIR/state/statd/ip
+       ctdb_service_stop
+       ctdb_service_start
+       echo 1 > /proc/sys/net/ipv4/tcp_tw_recycle
+}
+
+. $CTDB_BASE/functions
+
+loadconfig nfs
+
+[ "$NFS_SERVER_MODE" == "GANESHA" ] || exit 0
+
+service_name="nfs-ganesha-gpfs"
+
+ctdb_start_stop_service
+
+is_ctdb_managed_service || exit 0
+
+case "$1" in
+     init)
+       # read statd from persistent database
+       ;;
+     startup)
+       ctdb_service_start
+       mkdir -p $CTDB_VARDIR/state/statd
+       touch $CTDB_VARDIR/state/statd/update-trigger
+       ;;
+
+     shutdown)
+       ctdb_service_stop
+       ;;
+
+     takeip)
+       ctdb_service_set_reconfigure
+       ;;
+
+     releaseip)
+       ctdb_service_set_reconfigure
+       ;;
+
+      monitor)
+       if ctdb_service_needs_reconfigure ; then
+           ctdb_service_reconfigure
+           exit 0
+       fi
+
+       update_tickles 2049
+
+       # check that statd responds to rpc requests
+       # if statd is not running we try to restart it
+       if ctdb_check_rpc "STATD" status 1 >/dev/null ; then
+               (service_name="nfs_statd"; ctdb_counter_init)
+       else
+               p="rpc.statd" ; cmd="$p"
+               cmd="${cmd}${STATD_HOSTNAME:+ -n }${STATD_HOSTNAME}"
+               cmd="${cmd}${STATD_PORT:+ -p }${STATD_PORT}"
+               cmd="${cmd}${STATD_OUTGOING_PORT:+ -o }${STATD_OUTGOING_PORT}"
+               (
+                       service_name="nfs_statd"
+                       ctdb_counter_incr
+                       ctdb_check_counter_limit 10 quiet >/dev/null
+               ) || {
+                       echo "$ctdb_check_rpc_out"
+                       echo "Trying to restart STATD [$cmd]"
+               }
+               $cmd
+       fi
+
+       # check that NFS responds to rpc requests
+       [ "$CTDB_NFS_SKIP_KNFSD_ALIVE_CHECK" = "yes" ] || {
+           if ctdb_check_rpc "NFS" nfs 3 >/dev/null ; then
+               (service_name="nfs_knfsd"; ctdb_counter_init)
+           else
+               (
+                       service_name="nfs_knfsd"
+                       ctdb_counter_incr
+
+                       ctdb_check_counter_equal 10 || {
+                               echo "Trying to restart NFS service"
+                               ctdb_service_stop
+                               ctdb_service_start
+                               exit 0
+                       }
+
+                       ctdb_check_counter_limit 15 quiet >/dev/null
+               ) || {
+                       echo "$ctdb_check_rpc_out"
+                       echo "Trying to restart NFS service"
+                       ctdb_service_stop
+                       ctdb_service_start
+                       exit 1
+               }
+           fi
+       }
+
+       # and that its directories are available
+       [ "$CTDB_NFS_SKIP_SHARE_CHECK" = "yes" ] || {
+           grep Path /etc/ganesha/gpfs.ganesha.exports.conf |
+           cut -f2 -d\" | ctdb_check_directories
+       } || exit $?
+
+       # check that lockd responds to rpc requests
+       ctdb_check_rpc "LOCKD" nlockmgr 4 || {
+               echo "Trying to restart lock manager service"
+               ctdb_service_stop
+               ctdb_service_start
+               exit 1
+       }
+
+       # check mounts responds to rpc requests
+       ctdb_check_rpc "MOUNTD" mountd 1 >/dev/null || {
+               echo "Trying to restart mountd service"
+               ctdb_service_stop
+               ctdb_service_start
+               exit 1
+       }
+
+       # rquotad needs special handling since it is sometimes not started
+       # correctly on RHEL5
+       # this is not a critical service so we dont flag the node as unhealthy
+       ctdb_check_rpc "RQUOTAD" rquotad 1 || {
+               p="rpc.rquotad"
+               cmd="${p}${RQUOTAD_PORT:+ -p }${RQUOTAD_PORT}"
+               echo "Trying to restart RQUOTAD [${cmd}]"
+               killall -q -9 $p
+               $cmd &
+       }
+
+       # once every 60 seconds, update the statd state database for which
+       # clients need notifications
+       LAST_UPDATE=`stat --printf="%Y" $CTDB_VARDIR/state/statd/update-trigger 2>/dev/null`
+       CURRENT_TIME=`date +"%s"`
+       [ $CURRENT_TIME -ge $(($LAST_UPDATE + 60)) ] && {
+           mkdir -p $CTDB_VARDIR/state/statd
+           touch $CTDB_VARDIR/state/statd/update-trigger
+           $CTDB_BASE/statd-callout updatelocal &
+           $CTDB_BASE/statd-callout updateremote &
+       }
+               ;;
+
+    ipreallocated)
+       # if the ips have been reallocated, we must restart the lockmanager
+       # across all nodes and ping all statd listeners
+       [ -x $CTDB_BASE/statd-callout ] && {
+               $CTDB_BASE/statd-callout notify &
+       } >/dev/null 2>&1
+       ;;
+    *)
+       ctdb_standard_event_handler "$@"
+       ;;
+esac
+
+exit 0
diff --git a/config/events.d/60.nfs b/config/events.d/60.nfs
index 23f22b7..a8fe243 100755
--- a/config/events.d/60.nfs
+++ b/config/events.d/60.nfs
@@ -17,6 +17,8 @@ service_stop="startstop_nfs stop"
 
 loadconfig
 
+[ "$NFS_SERVER_MODE" != "GANESHA" ] || exit 0
+
 ctdb_start_stop_service
 
 is_ctdb_managed_service || exit 0
diff --git a/config/functions b/config/functions
index ee38225..4dc645b 100755
--- a/config/functions
+++ b/config/functions
@@ -674,6 +674,7 @@ is_ctdb_managed_service ()
     ctdb_compat_managed_service "$CTDB_MANAGES_ISCSI"    "iscsi"
     ctdb_compat_managed_service "$CTDB_MANAGES_CLAMD"    "clamd"
     ctdb_compat_managed_service "$CTDB_MANAGES_NFS"      "nfs"
+    ctdb_compat_managed_service "$CTDB_MANAGES_NFS"      "nfs-ganesha-gpfs"
 
     # Returns 0 if "<space>$_service_name<space>" appears in $t
     [ "${t#* ${_service_name} }" != "${t}" ]
diff --git a/packaging/RPM/ctdb.spec.in b/packaging/RPM/ctdb.spec.in
index 09c4a9e..09474d3 100644
--- a/packaging/RPM/ctdb.spec.in
+++ b/packaging/RPM/ctdb.spec.in
@@ -107,6 +107,7 @@ rm -rf $RPM_BUILD_ROOT
 %{_sysconfdir}/ctdb/events.d/41.httpd
 %{_sysconfdir}/ctdb/events.d/50.samba
 %{_sysconfdir}/ctdb/events.d/60.nfs
+%{_sysconfdir}/ctdb/events.d/60.ganesha
 %{_sysconfdir}/ctdb/events.d/62.cnfs
 %{_sysconfdir}/ctdb/events.d/70.iscsi
 %{_sysconfdir}/ctdb/events.d/91.lvs
diff --git a/server/ctdb_recoverd.c b/server/ctdb_recoverd.c
index fc56088..053e8e0 100644
--- a/server/ctdb_recoverd.c
+++ b/server/ctdb_recoverd.c
@@ -2444,7 +2444,7 @@ static enum monitor_result verify_recmaster(struct ctdb_recoverd *rec, struct ct
 
 /* called to check that the local allocation of public ip addresses is ok.
 */
-static int verify_local_ip_allocation(struct ctdb_context *ctdb, struct ctdb_recoverd *rec, uint32_t pnn)
+static int verify_local_ip_allocation(struct ctdb_context *ctdb, struct ctdb_recoverd *rec, uint32_t pnn, struct ctdb_node_map *nodemap)
 {
 	TALLOC_CTX *mem_ctx = talloc_new(NULL);
 	struct ctdb_control_get_ifaces *ifaces = NULL;
@@ -2535,11 +2535,17 @@ static int verify_local_ip_allocation(struct ctdb_context *ctdb, struct ctdb_rec
 	   and we dont have ones we shouldnt have.
 	   if we find an inconsistency we set recmode to
 	   active on the local node and wait for the recmaster
-	   to do a full blown recovery
+	   to do a full blown recovery.
+	   also if the pnn is -1 and we are healthy and can host the ip
+	   we also request a ip reallocation.
 	*/
 	if (ctdb->tunable.disable_ip_failover == 0) {
 		for (j=0; j<ips->num; j++) {
-			if (ips->ips[j].pnn == pnn) {
+			if (ips->ips[j].pnn == -1 && nodemap->nodes[pnn].flags == 0) {
+				DEBUG(DEBUG_CRIT,("Public address '%s' is not assigned and we could serve this ip\n",
+						ctdb_addr_to_str(&ips->ips[j].addr)));
+				need_takeover_run = true;
+			} else if (ips->ips[j].pnn == pnn) {
 				if (!ctdb_sys_have_ip(&ips->ips[j].addr)) {
 					DEBUG(DEBUG_CRIT,("Public address '%s' is missing and we should serve this ip\n",
 						ctdb_addr_to_str(&ips->ips[j].addr)));
@@ -3071,7 +3077,7 @@ static void main_loop(struct ctdb_context *ctdb, struct ctdb_recoverd *rec,
 	 */ 
 	if (ctdb->tunable.disable_ip_failover == 0) {
 		if (rec->ip_check_disable_ctx == NULL) {
-			if (verify_local_ip_allocation(ctdb, rec, pnn) != 0) {
+			if (verify_local_ip_allocation(ctdb, rec, pnn, nodemap) != 0) {
 				DEBUG(DEBUG_ERR, (__location__ " Public IPs were inconsistent.\n"));
 			}
 		}
diff --git a/server/ctdb_takeover.c b/server/ctdb_takeover.c
index d495807..682d17b 100644
--- a/server/ctdb_takeover.c
+++ b/server/ctdb_takeover.c
@@ -611,6 +611,15 @@ int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
 		return -1;
 	}
 
+	if (vnn->pnn == -1 && have_ip) {
+		vnn->pnn = ctdb->pnn;
+		DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
+				  "and we already have it on iface[%s], update local daemon\n",
+				 ctdb_addr_to_str(&vnn->public_address),
+				  ctdb_vnn_iface_string(vnn)));
+		return 0;
+	}
+
 	if (vnn->iface) {
 		if (vnn->iface->link_up) {
 			/* only move when the rebalance gains something */


-- 
CTDB repository