Rev 625: make sure all public IPs are removed at startup in
http://samba.org/~tridge/ctdb
tridge at samba.org
tridge at samba.org
Fri Sep 14 01:56:41 GMT 2007
------------------------------------------------------------
revno: 625
revision-id: tridge at samba.org-20070914015640-8wfbokl3itj39eth
parent: tridge at samba.org-20070913234912-u44tuok7s24eklh9
parent: sahlberg at ronnie-20070914003710-0cin31uddj49nitz
committer: Andrew Tridgell <tridge at samba.org>
branch nick: tridge.test2
timestamp: Fri 2007-09-14 11:56:40 +1000
message:
make sure all public IPs are removed at startup
modified:
config/events.d/10.interface 10.interface-20070604050809-s21zslfirn07zjt8-1
server/ctdb_recoverd.c recoverd.c-20070503213540-bvxuyd9jm1f7ig90-1
------------------------------------------------------------
revno: 432.1.283
revision-id: sahlberg at ronnie-20070914003710-0cin31uddj49nitz
parent: sahlberg at ronnie-20070914001636-32qjmvbp13zd6ccj
committer: Ronnie Sahlberg <sahlberg at ronnie>
branch nick: ctdb
timestamp: Fri 2007-09-14 10:37:10 +1000
message:
during startup make sure to delete any public addresses from any
interface
modified:
config/events.d/10.interface 10.interface-20070604050809-s21zslfirn07zjt8-1
------------------------------------------------------------
revno: 432.1.282
revision-id: sahlberg at ronnie-20070914001636-32qjmvbp13zd6ccj
parent: sahlberg at ronnie-20070913225627-sdkux18lovqbuuv5
committer: Ronnie Sahlberg <sahlberg at ronnie>
branch nick: ctdb
timestamp: Fri 2007-09-14 10:16:36 +1000
message:
let each node verify that they have a correct assignment of public ip
addresses (i.e. htey hold those they should hold and they dont hold
any of those they shouldnt hold)
if an inconsistency is found, mark the local node as recovery mode
active
and wait for the recovery master to trigger a full blown recovery
modified:
server/ctdb_recoverd.c recoverd.c-20070503213540-bvxuyd9jm1f7ig90-1
=== modified file 'config/events.d/10.interface'
--- a/config/events.d/10.interface 2007-09-13 04:28:18 +0000
+++ b/config/events.d/10.interface 2007-09-14 01:56:40 +0000
@@ -35,6 +35,7 @@
destip=`echo $dest | cut -d: -f1`
destport=`echo $dest | cut -d: -f2`
ctdb killtcp $srcip:$srcport $destip:$destport >/dev/null 2>&1 || _failed=1
+ echo "`date` Killing TCP connection $srcip:$srcport $destip:$destport"
_killcount=`expr $_killcount + 1`
done < $connfile
/bin/rm -f $connfile
@@ -66,6 +67,15 @@
[ -f /proc/sys/net/ipv4/conf/all/arp_filter ] && {
echo 1 > /proc/sys/net/ipv4/conf/all/arp_filter
}
+ cat "$CTDB_PUBLIC_ADDRESSES" | cut -d/ -f1 | while read _IP; do
+ _IP_HELD=`/sbin/ip addr show | grep "inet $_IP/"`
+ [ -z "$_IP_HELD" ] || {
+ _IFACE=`echo $_IP_HELD | sed -e "s/.*\s//"`
+ _NM=`echo $_IP_HELD | sed -e "s/.*$_IP\///" -e "s/\s.*//"`
+ echo "`date` Removing public address $_IP/$_NM from device $_IFACE"
+ /sbin/ip addr del $_IP/$_NM dev $_IFACE
+ }
+ done
;;
@@ -124,7 +134,25 @@
/sbin/iptables -D INPUT -i $iface -d $ip -j DROP 2> /dev/null
/sbin/iptables -I INPUT -i $iface -d $ip -j DROP
kill_tcp_connections $ip
+
+ # the ip tool will delete all secondary IPs if this is the primary. To work around
+ # this _very_ annoying behaviour we have to keep a record of the secondaries and re-add
+ # them afterwards. yuck
+ secondaries=""
+ if /sbin/ip addr list dev $iface primary | grep "inet $ip/$maskbits " > /dev/null; then
+ secondaries=`/sbin/ip addr list dev $iface secondary | grep " inet " | awk '{print $2}'`
+ fi
/sbin/ip addr del $ip/$maskbits dev $iface || failed=1
+ [ -z "$secondaries" ] || {
+ for i in $secondaries; do
+ if /sbin/ip addr list dev $iface | grep "inet $i" > /dev/null; then
+ echo "`date` kept secondary $i on dev $iface"
+ else
+ echo "`date` re-adding secondary address $i to dev $iface"
+ /sbin/ip addr add $i dev $iface || failed=1
+ fi
+ done
+ }
/sbin/iptables -D INPUT -i $iface -d $ip -j DROP
[ $failed = 0 ] || {
echo "`/bin/date` Failed to del $ip on dev $iface"
=== modified file 'server/ctdb_recoverd.c'
--- a/server/ctdb_recoverd.c 2007-09-13 23:49:12 +0000
+++ b/server/ctdb_recoverd.c 2007-09-14 01:56:40 +0000
@@ -21,6 +21,7 @@
#include "lib/events/events.h"
#include "system/filesys.h"
#include "system/time.h"
+#include "system/network.h"
#include "popt.h"
#include "cmdline.h"
#include "../include/ctdb.h"
@@ -1423,6 +1424,7 @@
struct ctdb_vnn_map *remote_vnnmap=NULL;
int i, j, ret;
struct ctdb_recoverd *rec;
+ struct ctdb_all_public_ips *ips;
rec = talloc_zero(ctdb, struct ctdb_recoverd);
CTDB_NO_MEMORY_FATAL(ctdb, rec);
@@ -1533,7 +1535,34 @@
force_election(rec, mem_ctx, pnn, nodemap);
goto again;
}
-
+
+ /* verify that the public ip address allocation is consistent */
+ if (ctdb->vnn != NULL) {
+ ret = ctdb_ctrl_get_public_ips(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE, mem_ctx, &ips);
+ if (ret != 0) {
+ DEBUG(0, ("Unable to get public ips from node %u\n", i));
+ goto again;
+ }
+ for (j=0; j<ips->num; j++) {
+ /* verify that we have the ip addresses we should have
+ and we dont have ones we shouldnt have.
+ if we find an inconsistency we set recmode to
+ active on the local node and wait for the recmaster
+ to do a full blown recovery
+ */
+ if (ips->ips[j].pnn == pnn) {
+ if (!ctdb_sys_have_ip(ips->ips[j].sin)) {
+ DEBUG(0,("Public address '%s' is missing and we should serve this ip\n", inet_ntoa(ips->ips[j].sin.sin_addr)));
+ ctdb_ctrl_setrecmode(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE, CTDB_RECOVERY_ACTIVE);
+ }
+ } else {
+ if (ctdb_sys_have_ip(ips->ips[j].sin)) {
+ DEBUG(0,("We are still serving a public address '%s' that we should not be serving.\n", inet_ntoa(ips->ips[j].sin.sin_addr)));
+ ctdb_ctrl_setrecmode(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE, CTDB_RECOVERY_ACTIVE);
+ }
+ }
+ }
+ }
/* if we are not the recmaster then we do not need to check
if recovery is needed
More information about the samba-cvs
mailing list