Rev 319: - up rx_cnt on all packet types in http://samba.org/~tridge/ctdb

tridge at samba.org tridge at samba.org
Fri May 18 13:23:36 GMT 2007


------------------------------------------------------------
revno: 319
revision-id: tridge at samba.org-20070518132336-gjpdua5y9j3ydutx
parent: tridge at samba.org-20070518121158-hay706mpx4tvajh2
committer: Andrew Tridgell <tridge at samba.org>
branch nick: tridge
timestamp: Fri 2007-05-18 23:23:36 +1000
message:
  - up rx_cnt on all packet types
  - notice when a node becomes available again
modified:
  common/ctdb.c                  ctdb.c-20061127094323-t50f58d65iaao5of-2
  common/ctdb_call.c             ctdb_call.c-20061128065342-to93h6eejj5kon81-1
  common/ctdb_monitor.c          ctdb_monitor.c-20070518100625-8jf4ft1mjzmb22ck-1
  include/ctdb_private.h         ctdb_private.h-20061117234101-o3qt14umlg9en8z0-13
=== modified file 'common/ctdb.c'
--- a/common/ctdb.c	2007-05-18 09:19:35 +0000
+++ b/common/ctdb.c	2007-05-18 13:23:36 +0000
@@ -116,8 +116,7 @@
 	node->name = talloc_asprintf(node, "%s:%u", 
 				     node->address.address, 
 				     node->address.port);
-	/* for now we just set the vnn to the line in the file - this
-	   will change! */
+	/* this assumes that the nodes are kept in sorted order, and no gaps */
 	node->vnn = ctdb->num_nodes;
 
 	if (ctdb->address.address &&
@@ -275,6 +274,11 @@
 		 "node %d to %d\n", hdr->reqid, hdr->operation, hdr->length,
 		 hdr->srcnode, hdr->destnode));
 
+	/* up the counter for this source node, so we know its alive */
+	if (ctdb_validate_vnn(ctdb, hdr->srcnode)) {
+		ctdb->nodes[hdr->srcnode]->rx_cnt++;
+	}
+
 	switch (hdr->operation) {
 	case CTDB_REQ_CALL:
 	case CTDB_REPLY_CALL:
@@ -345,7 +349,6 @@
 
 	case CTDB_REQ_KEEPALIVE:
 		ctdb->status.keepalive_packets_recv++;
-		ctdb_request_keepalive(ctdb, hdr);
 		break;
 
 	default:

=== modified file 'common/ctdb_call.c'
--- a/common/ctdb_call.c	2007-05-18 09:19:35 +0000
+++ b/common/ctdb_call.c	2007-05-18 13:23:36 +0000
@@ -785,13 +785,11 @@
 /* 
    send a keepalive packet to the other node
 */
-void ctdb_send_keepalive(struct ctdb_context *ctdb,
-				TALLOC_CTX *mem_ctx,
-				uint32_t destnode)
+void ctdb_send_keepalive(struct ctdb_context *ctdb, uint32_t destnode)
 {
 	struct ctdb_req_keepalive *r;
 	
-	r = ctdb_transport_allocate(ctdb, mem_ctx, CTDB_REQ_KEEPALIVE,
+	r = ctdb_transport_allocate(ctdb, ctdb, CTDB_REQ_KEEPALIVE,
 				    sizeof(struct ctdb_req_keepalive), 
 				    struct ctdb_req_keepalive);
 	CTDB_NO_MEMORY_FATAL(ctdb, r);

=== modified file 'common/ctdb_monitor.c'
--- a/common/ctdb_monitor.c	2007-05-18 10:06:29 +0000
+++ b/common/ctdb_monitor.c	2007-05-18 13:23:36 +0000
@@ -26,73 +26,55 @@
 #include "../include/ctdb_private.h"
 
 /*
-  called when a CTDB_REQ_KEEPALIVE packet comes in
-*/
-void ctdb_request_keepalive(struct ctdb_context *ctdb, struct ctdb_req_header *hdr)
-{
-	struct ctdb_req_keepalive *r = (struct ctdb_req_keepalive *)hdr;
-	struct ctdb_node *node = NULL;
-	int i;
-
-	for (i=0;i<ctdb->num_nodes;i++) {
-		if (ctdb->nodes[i]->vnn == r->hdr.srcnode) {
-			node = ctdb->nodes[i];
-			break;
-		}
-	}
-	if (!node) {
-		DEBUG(0,(__location__ " Keepalive received from node not in ctdb->nodes : %u\n", r->hdr.srcnode));
-		return;
-	}
-
-	node->rx_cnt++;
-}
-
-
+  see if any nodes are dead
+ */
 static void ctdb_check_for_dead_nodes(struct event_context *ev, struct timed_event *te, 
 			   struct timeval t, void *private_data)
 {
 	struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
 	int i;
-	TALLOC_CTX *mem_ctx = talloc_new(ctdb);
 
 	/* send a keepalive to all other nodes, unless */
 	for (i=0;i<ctdb->num_nodes;i++) {
-		if (!(ctdb->nodes[i]->flags & NODE_FLAGS_CONNECTED)) {
+		struct ctdb_node *node = ctdb->nodes[i];
+		if (node->vnn == ctdb->vnn) {
 			continue;
 		}
-		if (ctdb->nodes[i]->vnn == ctdb_get_vnn(ctdb)) {
-			continue;
+		
+		/* it might have come alive again */
+		if (!(node->flags & NODE_FLAGS_CONNECTED) && node->rx_cnt != 0) {
+			DEBUG(0,("Node %u is alive again - marking as connected\n", node->vnn));
+			node->flags |= NODE_FLAGS_CONNECTED;
 		}
 
-		if (ctdb->nodes[i]->rx_cnt == 0) {
-			ctdb->nodes[i]->dead_count++;
+		if (node->rx_cnt == 0) {
+			node->dead_count++;
 		} else {
-			ctdb->nodes[i]->dead_count = 0;
+			node->dead_count = 0;
 		}
 
-		if (ctdb->nodes[i]->dead_count>=3) {
-			ctdb->nodes[i]->flags &= ~NODE_FLAGS_CONNECTED;
-			/* should probably tell the transport layer
-			   to kill the sockets as well 
+		node->rx_cnt = 0;
+
+		if (node->dead_count >= CTDB_MONITORING_DEAD_COUNT) {
+			DEBUG(0,("Node %u is dead - marking as not connected\n", node->vnn));
+			node->flags &= ~NODE_FLAGS_CONNECTED;
+			/* maybe tell the transport layer to kill the
+			   sockets as well?
 			*/
 			continue;
 		}
 
-		ctdb_send_keepalive(ctdb, mem_ctx, i);
-		ctdb->nodes[i]->rx_cnt = 0;
+		ctdb_send_keepalive(ctdb, node->vnn);
 	}
-
-
-
 	
-	talloc_free(mem_ctx);
-
 	event_add_timed(ctdb->ev, ctdb, 
 			timeval_current_ofs(CTDB_MONITORING_TIMEOUT, 0), 
 			ctdb_check_for_dead_nodes, ctdb);
 }
 
+/*
+  start watching for nodes that might be dead
+ */
 int ctdb_start_monitoring(struct ctdb_context *ctdb)
 {
 	event_add_timed(ctdb->ev, ctdb, 

=== modified file 'include/ctdb_private.h'
--- a/include/ctdb_private.h	2007-05-18 09:19:35 +0000
+++ b/include/ctdb_private.h	2007-05-18 13:23:36 +0000
@@ -311,6 +311,9 @@
 /* timeout between dead-node monitoring events */
 #define CTDB_MONITORING_TIMEOUT 5
 
+/* number of monitoring timeouts before a node is considered dead */
+#define CTDB_MONITORING_DEAD_COUNT 3
+
 
 /* number of consecutive calls from the same node before we give them
    the record */
@@ -710,7 +713,6 @@
 void ctdb_reqid_remove(struct ctdb_context *ctdb, uint32_t reqid);
 
 void ctdb_request_control(struct ctdb_context *ctdb, struct ctdb_req_header *hdr);
-void ctdb_request_keepalive(struct ctdb_context *ctdb, struct ctdb_req_header *hdr);
 void ctdb_reply_control(struct ctdb_context *ctdb, struct ctdb_req_header *hdr);
 
 int ctdb_daemon_send_control(struct ctdb_context *ctdb, uint32_t destnode,
@@ -819,6 +821,6 @@
 uint32_t ctdb_get_num_connected_nodes(struct ctdb_context *ctdb);
 
 int ctdb_start_monitoring(struct ctdb_context *ctdb);
-void ctdb_send_keepalive(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, uint32_t destnode);
+void ctdb_send_keepalive(struct ctdb_context *ctdb, uint32_t destnode);
 
 #endif



More information about the samba-cvs mailing list