[PATCH] client: handle transient connection errors

David Disseldorp ddiss at suse.de
Tue Apr 5 07:40:36 MDT 2011


Client connections to the ctdbd unix domain socket may fail
intermittently while the server is under heavy load. This change
introduces a client connect retry loop.

During failure the client will retry for a maximum of 64 seconds, the
ctdb --timelimit option can be used to cap client runtime.
---
 client/ctdb_client.c |   34 +++++++++++++++++++++++++++++-----
 1 files changed, 29 insertions(+), 5 deletions(-)

diff --git a/client/ctdb_client.c b/client/ctdb_client.c
index 7caa5cb..ede4542 100644
--- a/client/ctdb_client.c
+++ b/client/ctdb_client.c
@@ -253,16 +253,40 @@ done:
 }
 
 /*
-  connect to a unix domain socket
+  connect with exponential backoff, thanks Stevens
 */
-int ctdb_socket_connect(struct ctdb_context *ctdb)
+#define CONNECT_MAXSLEEP 64
+static int ctdb_connect_retry(struct ctdb_context *ctdb)
 {
 	struct sockaddr_un addr;
+	int nsec;
+	int ret = 0;
 
 	memset(&addr, 0, sizeof(addr));
 	addr.sun_family = AF_UNIX;
 	strncpy(addr.sun_path, ctdb->daemon.name, sizeof(addr.sun_path));
 
+	for (nsec = 1; nsec <= CONNECT_MAXSLEEP; nsec <<= 1) {
+		ret = connect(ctdb->daemon.sd, (struct sockaddr *)&addr,
+			      sizeof(addr));
+		if ((ret == 0) || (errno != EAGAIN))
+			break;
+
+		if (nsec <= (CONNECT_MAXSLEEP / 2)) {
+			DEBUG(DEBUG_ERR,("connect failed: %s, retry in %d second(s)\n",
+					 strerror(errno), nsec));
+			sleep(nsec);
+		}
+	}
+
+	return ret;
+}
+
+/*
+  connect to a unix domain socket
+*/
+int ctdb_socket_connect(struct ctdb_context *ctdb)
+{
 	ctdb->daemon.sd = socket(AF_UNIX, SOCK_STREAM, 0);
 	if (ctdb->daemon.sd == -1) {
 		DEBUG(DEBUG_ERR,(__location__ " Failed to open client socket. Errno:%s(%d)\n", strerror(errno), errno));
@@ -271,11 +295,11 @@ int ctdb_socket_connect(struct ctdb_context *ctdb)
 
 	set_nonblocking(ctdb->daemon.sd);
 	set_close_on_exec(ctdb->daemon.sd);
-	
-	if (connect(ctdb->daemon.sd, (struct sockaddr *)&addr, sizeof(addr)) == -1) {
+
+	if (ctdb_connect_retry(ctdb) == -1) {
 		close(ctdb->daemon.sd);
 		ctdb->daemon.sd = -1;
-		DEBUG(DEBUG_ERR,(__location__ " Failed to connect client socket to daemon. Errno:%s(%d)\n", strerror(errno), errno));
+		DEBUG(DEBUG_ERR,(__location__ " Failed to connect client socket to daemon\n"));
 		return -1;
 	}
 
-- 
1.7.1



More information about the samba-technical mailing list