[SCM] Samba Shared Repository - branch master updated

Amitay Isaacs amitay at samba.org
Wed Jan 13 22:28:02 UTC 2016


The branch, master has been updated
       via  39bc356 ctdb-ipalloc: Document the steps involved in a takeover run
       via  e320725 ctdb-ipalloc: Split IP allocation into its own build subsystem
      from  19d3fd1 s4-rpc_server: Add missing include for ROLE_ACTIVE_DIRECTORY_DC

https://git.samba.org/?p=samba.git;a=shortlog;h=master


- Log -----------------------------------------------------------------
commit 39bc356ccb3fdfb9bd69c33ccf0fb1cb76f3c090
Author: Martin Schwenke <martin at meltin.net>
Date:   Fri Dec 11 16:07:51 2015 +1100

    ctdb-ipalloc: Document the steps involved in a takeover run
    
    Signed-off-by: Martin Schwenke <martin at meltin.net>
    Reviewed-by: Amitay Isaacs <amitay at gmail.com>
    
    Autobuild-User(master): Amitay Isaacs <amitay at samba.org>
    Autobuild-Date(master): Wed Jan 13 23:27:01 CET 2016 on sn-devel-144

commit e320725f0206c56f5fe8b8b580d677c2aa56ca47
Author: Martin Schwenke <martin at meltin.net>
Date:   Mon Nov 23 16:18:16 2015 +1100

    ctdb-ipalloc: Split IP allocation into its own build subsystem
    
    Signed-off-by: Martin Schwenke <martin at meltin.net>
    Reviewed-by: Amitay Isaacs <amitay at gmail.com>

-----------------------------------------------------------------------

Summary of changes:
 ctdb/server/ctdb_takeover.c            | 879 +--------------------------------
 ctdb/server/ipalloc.c                  |  53 ++
 ctdb/server/ipalloc.h                  |  63 +++
 ctdb/server/ipalloc_common.c           | 206 ++++++++
 ctdb/server/ipalloc_deterministic.c    |  62 +++
 ctdb/server/ipalloc_lcp2.c             | 515 +++++++++++++++++++
 ctdb/server/ipalloc_nondeterministic.c | 147 ++++++
 ctdb/server/ipalloc_private.h          |  43 ++
 ctdb/tests/src/ctdbd_test.c            |   5 +
 ctdb/wscript                           |  16 +-
 10 files changed, 1135 insertions(+), 854 deletions(-)
 create mode 100644 ctdb/server/ipalloc.c
 create mode 100644 ctdb/server/ipalloc.h
 create mode 100644 ctdb/server/ipalloc_common.c
 create mode 100644 ctdb/server/ipalloc_deterministic.c
 create mode 100644 ctdb/server/ipalloc_lcp2.c
 create mode 100644 ctdb/server/ipalloc_nondeterministic.c
 create mode 100644 ctdb/server/ipalloc_private.h


Changeset truncated at 500 lines:

diff --git a/ctdb/server/ctdb_takeover.c b/ctdb/server/ctdb_takeover.c
index 227bd16..a613aa0 100644
--- a/ctdb/server/ctdb_takeover.c
+++ b/ctdb/server/ctdb_takeover.c
@@ -41,34 +41,13 @@
 #include "common/common.h"
 #include "common/logging.h"
 
+#include "server/ipalloc.h"
 
 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
 
 #define CTDB_ARP_INTERVAL 1
 #define CTDB_ARP_REPEAT   3
 
-/* Flags used in IP allocation algorithms. */
-enum ipalloc_algorithm {
-	IPALLOC_DETERMINISTIC,
-	IPALLOC_NONDETERMINISTIC,
-	IPALLOC_LCP2,
-};
-
-struct ipalloc_state {
-	uint32_t num;
-
-	/* Arrays with data for each node */
-	struct ctdb_public_ip_list_old **known_public_ips;
-	struct ctdb_public_ip_list_old **available_public_ips;
-	bool *noiptakeover;
-	bool *noiphost;
-
-	struct public_ip_list *all_ips;
-	enum ipalloc_algorithm algorithm;
-	uint32_t no_ip_failback;
-	uint32_t *force_rebalance_nodes;
-};
-
 struct ctdb_interface {
 	struct ctdb_interface *prev, *next;
 	const char *name;
@@ -1249,138 +1228,6 @@ int ctdb_set_single_public_ip(struct ctdb_context *ctdb,
 	return 0;
 }
 
-struct public_ip_list {
-	struct public_ip_list *next;
-	uint32_t pnn;
-	ctdb_sock_addr addr;
-};
-
-/* Given a physical node, return the number of
-   public addresses that is currently assigned to this node.
-*/
-static int node_ip_coverage(int32_t pnn, struct public_ip_list *ips)
-{
-	int num=0;
-
-	for (;ips;ips=ips->next) {
-		if (ips->pnn == pnn) {
-			num++;
-		}
-	}
-	return num;
-}
-
-
-/* Can the given node host the given IP: is the public IP known to the
- * node and is NOIPHOST unset?
-*/
-static bool can_node_host_ip(struct ipalloc_state *ipalloc_state,
-			     int32_t pnn,
-			     struct public_ip_list *ip)
-{
-	struct ctdb_public_ip_list_old *public_ips;
-	int i;
-
-	if (ipalloc_state->noiphost[pnn]) {
-		return false;
-	}
-
-	public_ips = ipalloc_state->available_public_ips[pnn];
-
-	if (public_ips == NULL) {
-		return false;
-	}
-
-	for (i=0; i<public_ips->num; i++) {
-		if (ctdb_same_ip(&ip->addr, &public_ips->ips[i].addr)) {
-			/* yes, this node can serve this public ip */
-			return true;
-		}
-	}
-
-	return false;
-}
-
-static bool can_node_takeover_ip(struct ipalloc_state *ipalloc_state,
-				 int32_t pnn,
-				 struct public_ip_list *ip)
-{
-	if (ipalloc_state->noiptakeover[pnn]) {
-		return false;
-	}
-
-	return can_node_host_ip(ipalloc_state, pnn, ip);
-}
-
-/* search the node lists list for a node to takeover this ip.
-   pick the node that currently are serving the least number of ips
-   so that the ips get spread out evenly.
-*/
-static int find_takeover_node(struct ipalloc_state *ipalloc_state,
-			      struct public_ip_list *ip)
-{
-	int pnn, min=0, num;
-	int i, numnodes;
-
-	numnodes = ipalloc_state->num;
-	pnn    = -1;
-	for (i=0; i<numnodes; i++) {
-		/* verify that this node can serve this ip */
-		if (!can_node_takeover_ip(ipalloc_state, i, ip)) {
-			/* no it couldnt   so skip to the next node */
-			continue;
-		}
-
-		num = node_ip_coverage(i, ipalloc_state->all_ips);
-		/* was this the first node we checked ? */
-		if (pnn == -1) {
-			pnn = i;
-			min  = num;
-		} else {
-			if (num < min) {
-				pnn = i;
-				min  = num;
-			}
-		}
-	}
-	if (pnn == -1) {
-		DEBUG(DEBUG_WARNING,(__location__ " Could not find node to take over public address '%s'\n",
-			ctdb_addr_to_str(&ip->addr)));
-
-		return -1;
-	}
-
-	ip->pnn = pnn;
-	return 0;
-}
-
-#define IP_KEYLEN	4
-static uint32_t *ip_key(ctdb_sock_addr *ip)
-{
-	static uint32_t key[IP_KEYLEN];
-
-	bzero(key, sizeof(key));
-
-	switch (ip->sa.sa_family) {
-	case AF_INET:
-		key[3]	= htonl(ip->ip.sin_addr.s_addr);
-		break;
-	case AF_INET6: {
-		uint32_t *s6_a32 = (uint32_t *)&(ip->ip6.sin6_addr.s6_addr);
-		key[0]	= htonl(s6_a32[0]);
-		key[1]	= htonl(s6_a32[1]);
-		key[2]	= htonl(s6_a32[2]);
-		key[3]	= htonl(s6_a32[3]);
-		break;
-	}
-	default:
-		DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", ip->sa.sa_family));
-		return key;
-	}
-
-	return key;
-}
-
 static void *add_ip_callback(void *parm, void *data)
 {
 	struct public_ip_list *this_ip = parm;
@@ -1518,679 +1365,6 @@ create_merged_ip_list(struct ctdb_context *ctdb, struct ipalloc_state *ipalloc_s
 	return ip_list;
 }
 
-/* 
- * This is the length of the longtest common prefix between the IPs.
- * It is calculated by XOR-ing the 2 IPs together and counting the
- * number of leading zeroes.  The implementation means that all
- * addresses end up being 128 bits long.
- *
- * FIXME? Should we consider IPv4 and IPv6 separately given that the
- * 12 bytes of 0 prefix padding will hurt the algorithm if there are
- * lots of nodes and IP addresses?
- */
-static uint32_t ip_distance(ctdb_sock_addr *ip1, ctdb_sock_addr *ip2)
-{
-	uint32_t ip1_k[IP_KEYLEN];
-	uint32_t *t;
-	int i;
-	uint32_t x;
-
-	uint32_t distance = 0;
-
-	memcpy(ip1_k, ip_key(ip1), sizeof(ip1_k));
-	t = ip_key(ip2);
-	for (i=0; i<IP_KEYLEN; i++) {
-		x = ip1_k[i] ^ t[i];
-		if (x == 0) {
-			distance += 32;
-		} else {
-			/* Count number of leading zeroes. 
-			 * FIXME? This could be optimised...
-			 */
-			while ((x & (1 << 31)) == 0) {
-				x <<= 1;
-				distance += 1;
-			}
-		}
-	}
-
-	return distance;
-}
-
-/* Calculate the IP distance for the given IP relative to IPs on the
-   given node.  The ips argument is generally the all_ips variable
-   used in the main part of the algorithm.
- */
-static uint32_t ip_distance_2_sum(ctdb_sock_addr *ip,
-				  struct public_ip_list *ips,
-				  int pnn)
-{
-	struct public_ip_list *t;
-	uint32_t d;
-
-	uint32_t sum = 0;
-
-	for (t = ips; t != NULL; t = t->next) {
-		if (t->pnn != pnn) {
-			continue;
-		}
-
-		/* Optimisation: We never calculate the distance
-		 * between an address and itself.  This allows us to
-		 * calculate the effect of removing an address from a
-		 * node by simply calculating the distance between
-		 * that address and all of the exitsing addresses.
-		 * Moreover, we assume that we're only ever dealing
-		 * with addresses from all_ips so we can identify an
-		 * address via a pointer rather than doing a more
-		 * expensive address comparison. */
-		if (&(t->addr) == ip) {
-			continue;
-		}
-
-		d = ip_distance(ip, &(t->addr));
-		sum += d * d;  /* Cheaper than pulling in math.h :-) */
-	}
-
-	return sum;
-}
-
-/* Return the LCP2 imbalance metric for addresses currently assigned
-   to the given node.
- */
-static uint32_t lcp2_imbalance(struct public_ip_list * all_ips, int pnn)
-{
-	struct public_ip_list *t;
-
-	uint32_t imbalance = 0;
-
-	for (t = all_ips; t != NULL; t = t->next) {
-		if (t->pnn != pnn) {
-			continue;
-		}
-		/* Pass the rest of the IPs rather than the whole
-		   all_ips input list.
-		*/
-		imbalance += ip_distance_2_sum(&(t->addr), t->next, pnn);
-	}
-
-	return imbalance;
-}
-
-/* Allocate any unassigned IPs just by looping through the IPs and
- * finding the best node for each.
- */
-static void basic_allocate_unassigned(struct ipalloc_state *ipalloc_state)
-{
-	struct public_ip_list *t;
-
-	/* loop over all ip's and find a physical node to cover for
-	   each unassigned ip.
-	*/
-	for (t = ipalloc_state->all_ips; t != NULL; t = t->next) {
-		if (t->pnn == -1) {
-			if (find_takeover_node(ipalloc_state, t)) {
-				DEBUG(DEBUG_WARNING,
-				      ("Failed to find node to cover ip %s\n",
-				       ctdb_addr_to_str(&t->addr)));
-			}
-		}
-	}
-}
-
-/* Basic non-deterministic rebalancing algorithm.
- */
-static void basic_failback(struct ipalloc_state *ipalloc_state,
-			   int num_ips)
-{
-	int i, numnodes;
-	int maxnode, maxnum, minnode, minnum, num, retries;
-	struct public_ip_list *t;
-
-	numnodes = ipalloc_state->num;
-	retries = 0;
-
-try_again:
-	maxnum=0;
-	minnum=0;
-
-	/* for each ip address, loop over all nodes that can serve
-	   this ip and make sure that the difference between the node
-	   serving the most and the node serving the least ip's are
-	   not greater than 1.
-	*/
-	for (t = ipalloc_state->all_ips; t != NULL; t = t->next) {
-		if (t->pnn == -1) {
-			continue;
-		}
-
-		/* Get the highest and lowest number of ips's served by any 
-		   valid node which can serve this ip.
-		*/
-		maxnode = -1;
-		minnode = -1;
-		for (i=0; i<numnodes; i++) {
-			/* only check nodes that can actually serve this ip */
-			if (!can_node_takeover_ip(ipalloc_state, i,
-						  t)) {
-				/* no it couldnt   so skip to the next node */
-				continue;
-			}
-
-			num = node_ip_coverage(i, ipalloc_state->all_ips);
-			if (maxnode == -1) {
-				maxnode = i;
-				maxnum  = num;
-			} else {
-				if (num > maxnum) {
-					maxnode = i;
-					maxnum  = num;
-				}
-			}
-			if (minnode == -1) {
-				minnode = i;
-				minnum  = num;
-			} else {
-				if (num < minnum) {
-					minnode = i;
-					minnum  = num;
-				}
-			}
-		}
-		if (maxnode == -1) {
-			DEBUG(DEBUG_WARNING,
-			      (__location__ " Could not find maxnode. May not be able to serve ip '%s'\n",
-			       ctdb_addr_to_str(&t->addr)));
-
-			continue;
-		}
-
-		/* if the spread between the smallest and largest coverage by
-		   a node is >=2 we steal one of the ips from the node with
-		   most coverage to even things out a bit.
-		   try to do this a limited number of times since we dont
-		   want to spend too much time balancing the ip coverage.
-		*/
-		if ((maxnum > minnum+1) &&
-		    (retries < (num_ips + 5))){
-			struct public_ip_list *tt;
-
-			/* Reassign one of maxnode's VNNs */
-			for (tt = ipalloc_state->all_ips; tt != NULL; tt = tt->next) {
-				if (tt->pnn == maxnode) {
-					(void)find_takeover_node(ipalloc_state,
-								 tt);
-					retries++;
-					goto try_again;;
-				}
-			}
-		}
-	}
-}
-
-static bool lcp2_init(struct ipalloc_state *ipalloc_state,
-		      uint32_t **lcp2_imbalances,
-		      bool **rebalance_candidates)
-{
-	int i, numnodes;
-	struct public_ip_list *t;
-
-	numnodes = ipalloc_state->num;
-
-	*rebalance_candidates = talloc_array(ipalloc_state, bool, numnodes);
-	if (*rebalance_candidates == NULL) {
-		DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
-		return false;
-	}
-	*lcp2_imbalances = talloc_array(ipalloc_state, uint32_t, numnodes);
-	if (*lcp2_imbalances == NULL) {
-		DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
-		return false;
-	}
-
-	for (i=0; i<numnodes; i++) {
-		(*lcp2_imbalances)[i] =
-			lcp2_imbalance(ipalloc_state->all_ips, i);
-		/* First step: assume all nodes are candidates */
-		(*rebalance_candidates)[i] = true;
-	}
-
-	/* 2nd step: if a node has IPs assigned then it must have been
-	 * healthy before, so we remove it from consideration.  This
-	 * is overkill but is all we have because we don't maintain
-	 * state between takeover runs.  An alternative would be to
-	 * keep state and invalidate it every time the recovery master
-	 * changes.
-	 */
-	for (t = ipalloc_state->all_ips; t != NULL; t = t->next) {
-		if (t->pnn != -1) {
-			(*rebalance_candidates)[t->pnn] = false;
-		}
-	}
-
-	/* 3rd step: if a node is forced to re-balance then
-	   we allow failback onto the node */
-	if (ipalloc_state->force_rebalance_nodes == NULL) {
-		return true;
-	}
-	for (i = 0;
-	     i < talloc_array_length(ipalloc_state->force_rebalance_nodes);
-	     i++) {
-		uint32_t pnn = ipalloc_state->force_rebalance_nodes[i];
-		if (pnn >= numnodes) {
-			DEBUG(DEBUG_ERR,
-			      (__location__ "unknown node %u\n", pnn));
-			continue;
-		}
-
-		DEBUG(DEBUG_NOTICE,
-		      ("Forcing rebalancing of IPs to node %u\n", pnn));
-		(*rebalance_candidates)[pnn] = true;
-	}
-
-	return true;
-}
-
-/* Allocate any unassigned addresses using the LCP2 algorithm to find
- * the IP/node combination that will cost the least.
- */
-static void lcp2_allocate_unassigned(struct ipalloc_state *ipalloc_state,
-				     uint32_t *lcp2_imbalances)
-{
-	struct public_ip_list *t;
-	int dstnode, numnodes;
-
-	int minnode;
-	uint32_t mindsum, dstdsum, dstimbl, minimbl;
-	struct public_ip_list *minip;
-
-	bool should_loop = true;
-	bool have_unassigned = true;
-
-	numnodes = ipalloc_state->num;
-
-	while (have_unassigned && should_loop) {
-		should_loop = false;
-
-		DEBUG(DEBUG_DEBUG,(" ----------------------------------------\n"));
-		DEBUG(DEBUG_DEBUG,(" CONSIDERING MOVES (UNASSIGNED)\n"));
-
-		minnode = -1;
-		mindsum = 0;
-		minip = NULL;
-
-		/* loop over each unassigned ip. */
-		for (t = ipalloc_state->all_ips; t != NULL ; t = t->next) {
-			if (t->pnn != -1) {
-				continue;
-			}
-
-			for (dstnode = 0; dstnode < numnodes; dstnode++) {
-				/* only check nodes that can actually takeover this ip */
-				if (!can_node_takeover_ip(ipalloc_state,
-							  dstnode,
-							  t)) {
-					/* no it couldnt   so skip to the next node */
-					continue;
-				}
-
-				dstdsum = ip_distance_2_sum(&(t->addr),


-- 
Samba Shared Repository



More information about the samba-cvs mailing list