[SCM] CTDB repository - branch master updated - 1.0.99-19-gf1b6ee4

Ronnie Sahlberg sahlberg at samba.org
Thu Oct 22 22:30:25 MDT 2009


The branch, master has been updated
       via  f1b6ee4a55cdca60f93d992f0431d91bf301af2c (commit)
       via  0785afba8e5cd501b9e0ecb4a6a44edf43b57ab0 (commit)
       via  38d7487bc68c8cf85980004aceeef24ae32d6f36 (commit)
      from  514a60c57557042e463efeff53dd11b9fec40561 (commit)

http://gitweb.samba.org/?p=sahlberg/ctdb.git;a=shortlog;h=master


- Log -----------------------------------------------------------------
commit f1b6ee4a55cdca60f93d992f0431d91bf301af2c
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Fri Oct 23 15:24:51 2009 +1100

    Add a mechanism where we can register notifications to be sent out to a SRVID when the client disconnects.
    
    The way to use this is from a client to :
    1, first create a message handle and bind it to a SRVID
       A special prefix for the srvid space has been set aside for samba :
       Only samba is allowed to use srvid's with the top 32 bits set like this.
       The lower 32 bits are for samba to use internally.
    
    2, register a "notification" using the new control :
                        CTDB_CONTROL_REGISTER_NOTIFY         = 114,
       This control takes as indata a structure like this :
    struct ctdb_client_notify_register {
            uint64_t srvid;
            uint32_t len;
            uint8_t notify_data[1];
    };
    
    srvid is the srvid used in the space set aside above.
    len and notify_data is an arbitrary blob.
    When notifications are later sent out to all clients, this is the payload of that notification message.
    
    If a client has registered with control 114 and then disconnects from ctdbd, ctdbd will broadcast a message to that srvid to all nodes/listeners in the cluster.
    
    A client can resister itself with as many different srvid's it want, but this is handled through a linked list from the client structure so it mainly designed for "few notifications per client".
    
    3, a client that no longer wants to have a notification set up can deregister using control
                        CTDB_CONTROL_DEREGISTER_NOTIFY       = 115,
    which takes this as arguments :
    struct ctdb_client_notify_deregister {
            uint64_t srvid;
    };
    
    When a client deregisters, there will no longer be sent a message to all other clients when this client disconnects from ctdbd.

commit 0785afba8e5cd501b9e0ecb4a6a44edf43b57ab0
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Fri Oct 23 13:55:21 2009 +1100

    when scripts timeout, log pstree to a file in /tmp and just log the filename in the messages file

commit 38d7487bc68c8cf85980004aceeef24ae32d6f36
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Fri Oct 23 13:54:45 2009 +1100

    set the eventscripts to timeout after 20 seconds
    change the ban count to 10 failures before we ban by default

-----------------------------------------------------------------------

Summary of changes:
 include/ctdb.h         |   13 ++++++
 include/ctdb_private.h |    8 +++-
 server/ctdb_control.c  |    7 +++
 server/ctdb_daemon.c   |  103 +++++++++++++++++++++++++++++++++++++++++++++++-
 server/ctdb_tunables.c |    4 +-
 server/eventscript.c   |   34 ++++------------
 6 files changed, 140 insertions(+), 29 deletions(-)


Changeset truncated at 500 lines:

diff --git a/include/ctdb.h b/include/ctdb.h
index abe09a3..1ede662 100644
--- a/include/ctdb.h
+++ b/include/ctdb.h
@@ -111,6 +111,10 @@ struct ctdb_call_info {
 */
 #define CTDB_SRVID_TAKEOVER_RUN_RESPONSE  0xFD00000000000000LL
 
+/* A port reserved for samba (top 32 bits)
+ */
+#define CTDB_SRVID_SAMBA_NOTIFY  0xFE00000000000000LL
+
 /* used on the domain socket, send a pdu to the local daemon */
 #define CTDB_CURRENT_NODE     0xF0000001
 /* send a broadcast to all nodes in the cluster, active or not */
@@ -144,6 +148,15 @@ struct ctdb_client_control_state {
 	} async;	
 };
 
+struct ctdb_client_notify_register {
+	uint64_t srvid;
+	uint32_t len;
+	uint8_t notify_data[1];
+};
+
+struct ctdb_client_notify_deregister {
+	uint64_t srvid;
+};
 
 struct event_context;
 
diff --git a/include/ctdb_private.h b/include/ctdb_private.h
index 2f4937e..ad84628 100644
--- a/include/ctdb_private.h
+++ b/include/ctdb_private.h
@@ -160,7 +160,6 @@ typedef void (*ctdb_control_callback_fn_t)(struct ctdb_context *,
 					   int32_t status, TDB_DATA data, 
 					   const char *errormsg,
 					   void *private_data);
-
 /*
   structure describing a connected client in the daemon
  */
@@ -173,6 +172,7 @@ struct ctdb_client {
 	struct ctdb_tcp_list *tcp_list;
 	uint32_t db_id;
 	uint32_t num_persistent_updates;
+	struct ctdb_client_notify_list *notify;
 };
 
 
@@ -612,6 +612,8 @@ enum ctdb_controls {CTDB_CONTROL_PROCESS_EXISTS          = 0,
 		    CTDB_CONTROL_SET_DB_PRIORITY         = 111,
 		    CTDB_CONTROL_GET_DB_PRIORITY         = 112,
 		    CTDB_CONTROL_TRANSACTION_CANCEL      = 113,
+		    CTDB_CONTROL_REGISTER_NOTIFY         = 114,
+		    CTDB_CONTROL_DEREGISTER_NOTIFY       = 115,
 };	
 
 /*
@@ -1501,4 +1503,8 @@ int32_t ctdb_control_set_ban_state(struct ctdb_context *ctdb, TDB_DATA indata);
 int32_t ctdb_control_get_ban_state(struct ctdb_context *ctdb, TDB_DATA *outdata);
 int32_t ctdb_control_set_db_priority(struct ctdb_context *ctdb, TDB_DATA indata);
 
+int32_t ctdb_control_register_notify(struct ctdb_context *ctdb, uint32_t client_id, TDB_DATA indata);
+
+int32_t ctdb_control_deregister_notify(struct ctdb_context *ctdb, uint32_t client_id, TDB_DATA indata);
+
 #endif
diff --git a/server/ctdb_control.c b/server/ctdb_control.c
index 24d22d0..904bebe 100644
--- a/server/ctdb_control.c
+++ b/server/ctdb_control.c
@@ -552,6 +552,13 @@ static int32_t ctdb_control_dispatch(struct ctdb_context *ctdb,
 		CHECK_CONTROL_DATA_SIZE(0);
 		return ctdb_control_transaction_cancel(ctdb);
 
+	case CTDB_CONTROL_REGISTER_NOTIFY:
+		return ctdb_control_register_notify(ctdb, client_id, indata);
+
+	case CTDB_CONTROL_DEREGISTER_NOTIFY:
+		CHECK_CONTROL_DATA_SIZE(sizeof(struct ctdb_client_notify_deregister));
+		return ctdb_control_deregister_notify(ctdb, client_id, indata);
+
 	default:
 		DEBUG(DEBUG_CRIT,(__location__ " Unknown CTDB control opcode %u\n", opcode));
 		return -1;
diff --git a/server/ctdb_daemon.c b/server/ctdb_daemon.c
index 532887c..a8dc651 100644
--- a/server/ctdb_daemon.c
+++ b/server/ctdb_daemon.c
@@ -31,7 +31,6 @@
 
 static void daemon_incoming_packet(void *, struct ctdb_req_header *);
 
-
 static void print_exit_message(void)
 {
 	DEBUG(DEBUG_NOTICE,("CTDB daemon shutting down\n"));
@@ -1043,3 +1042,105 @@ int ctdb_daemon_send_message(struct ctdb_context *ctdb, uint32_t pnn,
 	return 0;
 }
 
+
+
+struct ctdb_client_notify_list {
+	struct ctdb_client_notify_list *next, *prev;
+	struct ctdb_context *ctdb;
+	uint64_t srvid;
+	TDB_DATA data;
+};
+
+
+static int ctdb_client_notify_destructor(struct ctdb_client_notify_list *nl)
+{
+	int ret;
+
+	DEBUG(DEBUG_ERR,("Sending client notify message for srvid:%llu\n", (unsigned long long)nl->srvid));
+
+	ret = ctdb_daemon_send_message(nl->ctdb, CTDB_BROADCAST_CONNECTED, (unsigned long long)nl->srvid, nl->data);
+	if (ret != 0) {
+		DEBUG(DEBUG_ERR,("Failed to send client notify message\n"));
+	}
+
+	return 0;
+}
+
+int32_t ctdb_control_register_notify(struct ctdb_context *ctdb, uint32_t client_id, TDB_DATA indata)
+{
+	struct ctdb_client_notify_register *notify = (struct ctdb_client_notify_register *)indata.dptr;
+        struct ctdb_client *client = ctdb_reqid_find(ctdb, client_id, struct ctdb_client); 
+	struct ctdb_client_notify_list *nl;
+
+	DEBUG(DEBUG_ERR,("Register srvid %llu for client %d\n", (unsigned long long)notify->srvid, client_id));
+
+	if (indata.dsize < offsetof(struct ctdb_client_notify_register, notify_data)) {
+		DEBUG(DEBUG_ERR,(__location__ " Too little data in control : %d\n", (int)indata.dsize));
+		return -1;
+	}
+
+	if (indata.dsize != (notify->len + offsetof(struct ctdb_client_notify_register, notify_data))) {
+		DEBUG(DEBUG_ERR,(__location__ " Wrong amount of data in control. Got %d, expected %d\n", (int)indata.dsize, (int)(notify->len + offsetof(struct ctdb_client_notify_register, notify_data))));
+		return -1;
+	}
+
+
+        if (client == NULL) {
+                DEBUG(DEBUG_ERR,(__location__ " Could not find client parent structure. You can not send this control to a remote node\n"));
+                return -1;
+        }
+
+	for(nl=client->notify; nl; nl=nl->next) {
+		if (nl->srvid == notify->srvid) {
+			break;
+		}
+	}
+	if (nl != NULL) {
+                DEBUG(DEBUG_ERR,(__location__ " Notification for srvid:%llu already exists for this client\n", (unsigned long long)notify->srvid));
+                return -1;
+        }
+
+	nl = talloc(client, struct ctdb_client_notify_list);
+	CTDB_NO_MEMORY(ctdb, nl);
+	nl->ctdb       = ctdb;
+	nl->srvid      = notify->srvid;
+	nl->data.dsize = notify->len;
+	nl->data.dptr  = talloc_size(nl, nl->data.dsize);
+	CTDB_NO_MEMORY(ctdb, nl->data.dptr);
+	memcpy(nl->data.dptr, notify->notify_data, nl->data.dsize);
+	
+	DLIST_ADD(client->notify, nl);
+	talloc_set_destructor(nl, ctdb_client_notify_destructor);
+
+	return 0;
+}
+
+int32_t ctdb_control_deregister_notify(struct ctdb_context *ctdb, uint32_t client_id, TDB_DATA indata)
+{
+	struct ctdb_client_notify_deregister *notify = (struct ctdb_client_notify_deregister *)indata.dptr;
+        struct ctdb_client *client = ctdb_reqid_find(ctdb, client_id, struct ctdb_client); 
+	struct ctdb_client_notify_list *nl;
+
+	DEBUG(DEBUG_ERR,("Deregister srvid %llu for client %d\n", (unsigned long long)notify->srvid, client_id));
+
+        if (client == NULL) {
+                DEBUG(DEBUG_ERR,(__location__ " Could not find client parent structure. You can not send this control to a remote node\n"));
+                return -1;
+        }
+
+	for(nl=client->notify; nl; nl=nl->next) {
+		if (nl->srvid == notify->srvid) {
+			break;
+		}
+	}
+	if (nl == NULL) {
+                DEBUG(DEBUG_ERR,(__location__ " No notification for srvid:%llu found for this client\n", (unsigned long long)notify->srvid));
+                return -1;
+        }
+
+	DLIST_REMOVE(client->notify, nl);
+	talloc_set_destructor(nl, NULL);
+	talloc_free(nl);
+
+	return 0;
+}
diff --git a/server/ctdb_tunables.c b/server/ctdb_tunables.c
index c461bca..8ad4cde 100644
--- a/server/ctdb_tunables.c
+++ b/server/ctdb_tunables.c
@@ -37,8 +37,8 @@ static const struct {
 	{ "TakeoverTimeout",      5,  offsetof(struct ctdb_tunable, takeover_timeout) },
 	{ "MonitorInterval",     15,  offsetof(struct ctdb_tunable, monitor_interval) },
 	{ "TickleUpdateInterval",20,  offsetof(struct ctdb_tunable, tickle_update_interval) },
-	{ "EventScriptTimeout",  60,  offsetof(struct ctdb_tunable, script_timeout) },
-	{ "EventScriptBanCount",  5,  offsetof(struct ctdb_tunable, script_ban_count) },
+	{ "EventScriptTimeout",  20,  offsetof(struct ctdb_tunable, script_timeout) },
+	{ "EventScriptBanCount", 10,  offsetof(struct ctdb_tunable, script_ban_count) },
 	{ "EventScriptUnhealthyOnTimeout", 0, offsetof(struct ctdb_tunable, script_unhealthy_on_timeout) },
 	{ "RecoveryGracePeriod", 120,  offsetof(struct ctdb_tunable, recovery_grace_period) },
 	{ "RecoveryBanPeriod",  300,  offsetof(struct ctdb_tunable, recovery_ban_period) },
diff --git a/server/eventscript.c b/server/eventscript.c
index aae5eef..3b86615 100644
--- a/server/eventscript.c
+++ b/server/eventscript.c
@@ -18,6 +18,7 @@
 */
 
 #include "includes.h"
+#include <time.h>
 #include "system/filesys.h"
 #include "system/wait.h"
 #include "system/dir.h"
@@ -36,36 +37,19 @@ static struct {
  */
 static void sigterm(int sig)
 {
-	FILE *p;
+	char tbuf[100], buf[200];
+	time_t t;
 
 	DEBUG(DEBUG_ERR,("Timed out running script '%s' after %.1f seconds pid :%d\n", 
 		 child_state.script_running, timeval_elapsed(&child_state.start), getpid()));
 
-	p = popen("pstree -p", "r");
-	if (p == NULL) {
-		DEBUG(DEBUG_ERR,("Failed popen to collect pstree for hung script\n"));
-	} else {
-		char buf[256];
-		int count;
-
-		DEBUG(DEBUG_ERR,("PSTREE:\n"));
-		while(!feof(p)){
-			count=fread(buf, 1, 255, p);
-			if (count == EOF) {
-				break;
-			}
-			if (count < 0) {
-				break;
-			}
-			if (count == 0) {
-				break;
-			}
-			buf[count] = 0;
-			DEBUG(DEBUG_ERR,("%s", buf)); 
-		}
-		DEBUG(DEBUG_ERR,("END OF PSTREE OUTPUT\n"));
-		pclose(p);
-	}
+	t = time(NULL);
+
+	strftime(tbuf, sizeof(tbuf)-1, "%Y%m%d%H%M%S", 	localtime(&t));
+	sprintf(buf, "pstree -p >/tmp/ctdb.event.%s.%d", tbuf, getpid());
+	system(buf);
+
+	DEBUG(DEBUG_ERR,("Logged timedout eventscript : %s\n", buf));
 
 	/* all the child processes will be running in the same process group */
 	kill(-getpgrp(), SIGKILL);


-- 
CTDB repository


More information about the samba-cvs mailing list