Rev 625: make sure all public IPs are removed at startup in http://samba.org/~tridge/ctdb

tridge at samba.org tridge at samba.org
Fri Sep 14 01:56:41 GMT 2007


------------------------------------------------------------
revno: 625
revision-id: tridge at samba.org-20070914015640-8wfbokl3itj39eth
parent: tridge at samba.org-20070913234912-u44tuok7s24eklh9
parent: sahlberg at ronnie-20070914003710-0cin31uddj49nitz
committer: Andrew Tridgell <tridge at samba.org>
branch nick: tridge.test2
timestamp: Fri 2007-09-14 11:56:40 +1000
message:
  make sure all public IPs are removed at startup
modified:
  config/events.d/10.interface   10.interface-20070604050809-s21zslfirn07zjt8-1
  server/ctdb_recoverd.c         recoverd.c-20070503213540-bvxuyd9jm1f7ig90-1
    ------------------------------------------------------------
    revno: 432.1.283
    revision-id: sahlberg at ronnie-20070914003710-0cin31uddj49nitz
    parent: sahlberg at ronnie-20070914001636-32qjmvbp13zd6ccj
    committer: Ronnie Sahlberg <sahlberg at ronnie>
    branch nick: ctdb
    timestamp: Fri 2007-09-14 10:37:10 +1000
    message:
      during startup make sure to delete any public addresses from any 
      interface
    modified:
      config/events.d/10.interface   10.interface-20070604050809-s21zslfirn07zjt8-1
    ------------------------------------------------------------
    revno: 432.1.282
    revision-id: sahlberg at ronnie-20070914001636-32qjmvbp13zd6ccj
    parent: sahlberg at ronnie-20070913225627-sdkux18lovqbuuv5
    committer: Ronnie Sahlberg <sahlberg at ronnie>
    branch nick: ctdb
    timestamp: Fri 2007-09-14 10:16:36 +1000
    message:
      let each node verify that they have a correct assignment of public ip 
      addresses (i.e. htey hold those they should hold   and they dont hold 
      any of those they shouldnt hold)
      
      if an inconsistency is found, mark the local node as recovery mode 
      active
      and wait for the recovery master to trigger a full blown recovery
    modified:
      server/ctdb_recoverd.c         recoverd.c-20070503213540-bvxuyd9jm1f7ig90-1
=== modified file 'config/events.d/10.interface'
--- a/config/events.d/10.interface	2007-09-13 04:28:18 +0000
+++ b/config/events.d/10.interface	2007-09-14 01:56:40 +0000
@@ -35,6 +35,7 @@
 	destip=`echo $dest | cut -d: -f1`
 	destport=`echo $dest | cut -d: -f2`
 	ctdb killtcp $srcip:$srcport $destip:$destport >/dev/null 2>&1 || _failed=1
+	echo "`date` Killing TCP connection $srcip:$srcport $destip:$destport"
 	_killcount=`expr $_killcount + 1`
     done < $connfile
     /bin/rm -f $connfile
@@ -66,6 +67,15 @@
 	[ -f /proc/sys/net/ipv4/conf/all/arp_filter ] && {
 	    echo 1 > /proc/sys/net/ipv4/conf/all/arp_filter
 	}
+	cat "$CTDB_PUBLIC_ADDRESSES" | cut -d/ -f1 | while read _IP; do
+		_IP_HELD=`/sbin/ip addr show | grep "inet $_IP/"`
+		[ -z "$_IP_HELD" ] || {
+			_IFACE=`echo $_IP_HELD | sed -e "s/.*\s//"`
+			_NM=`echo $_IP_HELD | sed -e "s/.*$_IP\///" -e "s/\s.*//"`
+			echo "`date` Removing public address $_IP/$_NM from device $_IFACE"
+			/sbin/ip addr del $_IP/$_NM dev $_IFACE
+		}
+	done
 	;;
 
 
@@ -124,7 +134,25 @@
 	/sbin/iptables -D INPUT -i $iface -d $ip -j DROP 2> /dev/null
 	/sbin/iptables -I INPUT -i $iface -d $ip -j DROP
 	kill_tcp_connections $ip
+
+	# the ip tool will delete all secondary IPs if this is the primary. To work around
+	# this _very_ annoying behaviour we have to keep a record of the secondaries and re-add
+	# them afterwards. yuck
+	secondaries=""
+	if /sbin/ip addr list dev $iface primary | grep "inet $ip/$maskbits " > /dev/null; then
+	    secondaries=`/sbin/ip addr list dev $iface secondary | grep " inet " | awk '{print $2}'`
+	fi
 	/sbin/ip addr del $ip/$maskbits dev $iface || failed=1
+	[ -z "$secondaries" ] || {
+	    for i in $secondaries; do
+		if /sbin/ip addr list dev $iface | grep "inet $i" > /dev/null; then
+		    echo "`date` kept secondary $i on dev $iface"
+		else 
+		    echo "`date` re-adding secondary address $i to dev $iface"
+		    /sbin/ip addr add $i dev $iface || failed=1		
+		fi
+	    done
+	}
 	/sbin/iptables -D INPUT -i $iface -d $ip -j DROP
 	[ $failed = 0 ] || {
 		 echo "`/bin/date` Failed to del $ip on dev $iface"

=== modified file 'server/ctdb_recoverd.c'
--- a/server/ctdb_recoverd.c	2007-09-13 23:49:12 +0000
+++ b/server/ctdb_recoverd.c	2007-09-14 01:56:40 +0000
@@ -21,6 +21,7 @@
 #include "lib/events/events.h"
 #include "system/filesys.h"
 #include "system/time.h"
+#include "system/network.h"
 #include "popt.h"
 #include "cmdline.h"
 #include "../include/ctdb.h"
@@ -1423,6 +1424,7 @@
 	struct ctdb_vnn_map *remote_vnnmap=NULL;
 	int i, j, ret;
 	struct ctdb_recoverd *rec;
+	struct ctdb_all_public_ips *ips;
 
 	rec = talloc_zero(ctdb, struct ctdb_recoverd);
 	CTDB_NO_MEMORY_FATAL(ctdb, rec);
@@ -1533,7 +1535,34 @@
 		force_election(rec, mem_ctx, pnn, nodemap);
 		goto again;
 	}
-	
+
+	/* verify that the public ip address allocation is consistent */
+	if (ctdb->vnn != NULL) {
+		ret = ctdb_ctrl_get_public_ips(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE, mem_ctx, &ips);
+		if (ret != 0) {
+			DEBUG(0, ("Unable to get public ips from node %u\n", i));
+			goto again;
+		}
+		for (j=0; j<ips->num; j++) {
+			/* verify that we have the ip addresses we should have
+			   and we dont have ones we shouldnt have.
+			   if we find an inconsistency we set recmode to
+			   active on the local node and wait for the recmaster
+			   to do a full blown recovery
+			*/
+			if (ips->ips[j].pnn == pnn) {
+				if (!ctdb_sys_have_ip(ips->ips[j].sin)) {
+					DEBUG(0,("Public address '%s' is missing and we should serve this ip\n", inet_ntoa(ips->ips[j].sin.sin_addr)));
+					ctdb_ctrl_setrecmode(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE, CTDB_RECOVERY_ACTIVE);
+				}
+			} else {
+				if (ctdb_sys_have_ip(ips->ips[j].sin)) {
+					DEBUG(0,("We are still serving a public address '%s' that we should not be serving.\n", inet_ntoa(ips->ips[j].sin.sin_addr)));
+					ctdb_ctrl_setrecmode(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE, CTDB_RECOVERY_ACTIVE);
+				}
+			}
+		}
+	}
 
 	/* if we are not the recmaster then we do not need to check
 	   if recovery is needed



More information about the samba-cvs mailing list