[PATCH] client: handle transient connection errors
David Disseldorp
ddiss at suse.de
Tue Apr 5 07:40:36 MDT 2011
Client connections to the ctdbd unix domain socket may fail
intermittently while the server is under heavy load. This change
introduces a client connect retry loop.
During failure the client will retry for a maximum of 64 seconds, the
ctdb --timelimit option can be used to cap client runtime.
---
client/ctdb_client.c | 34 +++++++++++++++++++++++++++++-----
1 files changed, 29 insertions(+), 5 deletions(-)
diff --git a/client/ctdb_client.c b/client/ctdb_client.c
index 7caa5cb..ede4542 100644
--- a/client/ctdb_client.c
+++ b/client/ctdb_client.c
@@ -253,16 +253,40 @@ done:
}
/*
- connect to a unix domain socket
+ connect with exponential backoff, thanks Stevens
*/
-int ctdb_socket_connect(struct ctdb_context *ctdb)
+#define CONNECT_MAXSLEEP 64
+static int ctdb_connect_retry(struct ctdb_context *ctdb)
{
struct sockaddr_un addr;
+ int nsec;
+ int ret = 0;
memset(&addr, 0, sizeof(addr));
addr.sun_family = AF_UNIX;
strncpy(addr.sun_path, ctdb->daemon.name, sizeof(addr.sun_path));
+ for (nsec = 1; nsec <= CONNECT_MAXSLEEP; nsec <<= 1) {
+ ret = connect(ctdb->daemon.sd, (struct sockaddr *)&addr,
+ sizeof(addr));
+ if ((ret == 0) || (errno != EAGAIN))
+ break;
+
+ if (nsec <= (CONNECT_MAXSLEEP / 2)) {
+ DEBUG(DEBUG_ERR,("connect failed: %s, retry in %d second(s)\n",
+ strerror(errno), nsec));
+ sleep(nsec);
+ }
+ }
+
+ return ret;
+}
+
+/*
+ connect to a unix domain socket
+*/
+int ctdb_socket_connect(struct ctdb_context *ctdb)
+{
ctdb->daemon.sd = socket(AF_UNIX, SOCK_STREAM, 0);
if (ctdb->daemon.sd == -1) {
DEBUG(DEBUG_ERR,(__location__ " Failed to open client socket. Errno:%s(%d)\n", strerror(errno), errno));
@@ -271,11 +295,11 @@ int ctdb_socket_connect(struct ctdb_context *ctdb)
set_nonblocking(ctdb->daemon.sd);
set_close_on_exec(ctdb->daemon.sd);
-
- if (connect(ctdb->daemon.sd, (struct sockaddr *)&addr, sizeof(addr)) == -1) {
+
+ if (ctdb_connect_retry(ctdb) == -1) {
close(ctdb->daemon.sd);
ctdb->daemon.sd = -1;
- DEBUG(DEBUG_ERR,(__location__ " Failed to connect client socket to daemon. Errno:%s(%d)\n", strerror(errno), errno));
+ DEBUG(DEBUG_ERR,(__location__ " Failed to connect client socket to daemon\n"));
return -1;
}
--
1.7.1
More information about the samba-technical
mailing list