[SCM] CTDB repository - branch master updated - 1.0.99-19-gf1b6ee4
Ronnie Sahlberg
sahlberg at samba.org
Thu Oct 22 22:30:25 MDT 2009
The branch, master has been updated
via f1b6ee4a55cdca60f93d992f0431d91bf301af2c (commit)
via 0785afba8e5cd501b9e0ecb4a6a44edf43b57ab0 (commit)
via 38d7487bc68c8cf85980004aceeef24ae32d6f36 (commit)
from 514a60c57557042e463efeff53dd11b9fec40561 (commit)
http://gitweb.samba.org/?p=sahlberg/ctdb.git;a=shortlog;h=master
- Log -----------------------------------------------------------------
commit f1b6ee4a55cdca60f93d992f0431d91bf301af2c
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date: Fri Oct 23 15:24:51 2009 +1100
Add a mechanism where we can register notifications to be sent out to a SRVID when the client disconnects.
The way to use this is from a client to :
1, first create a message handle and bind it to a SRVID
A special prefix for the srvid space has been set aside for samba :
Only samba is allowed to use srvid's with the top 32 bits set like this.
The lower 32 bits are for samba to use internally.
2, register a "notification" using the new control :
CTDB_CONTROL_REGISTER_NOTIFY = 114,
This control takes as indata a structure like this :
struct ctdb_client_notify_register {
uint64_t srvid;
uint32_t len;
uint8_t notify_data[1];
};
srvid is the srvid used in the space set aside above.
len and notify_data is an arbitrary blob.
When notifications are later sent out to all clients, this is the payload of that notification message.
If a client has registered with control 114 and then disconnects from ctdbd, ctdbd will broadcast a message to that srvid to all nodes/listeners in the cluster.
A client can resister itself with as many different srvid's it want, but this is handled through a linked list from the client structure so it mainly designed for "few notifications per client".
3, a client that no longer wants to have a notification set up can deregister using control
CTDB_CONTROL_DEREGISTER_NOTIFY = 115,
which takes this as arguments :
struct ctdb_client_notify_deregister {
uint64_t srvid;
};
When a client deregisters, there will no longer be sent a message to all other clients when this client disconnects from ctdbd.
commit 0785afba8e5cd501b9e0ecb4a6a44edf43b57ab0
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date: Fri Oct 23 13:55:21 2009 +1100
when scripts timeout, log pstree to a file in /tmp and just log the filename in the messages file
commit 38d7487bc68c8cf85980004aceeef24ae32d6f36
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date: Fri Oct 23 13:54:45 2009 +1100
set the eventscripts to timeout after 20 seconds
change the ban count to 10 failures before we ban by default
-----------------------------------------------------------------------
Summary of changes:
include/ctdb.h | 13 ++++++
include/ctdb_private.h | 8 +++-
server/ctdb_control.c | 7 +++
server/ctdb_daemon.c | 103 +++++++++++++++++++++++++++++++++++++++++++++++-
server/ctdb_tunables.c | 4 +-
server/eventscript.c | 34 ++++------------
6 files changed, 140 insertions(+), 29 deletions(-)
Changeset truncated at 500 lines:
diff --git a/include/ctdb.h b/include/ctdb.h
index abe09a3..1ede662 100644
--- a/include/ctdb.h
+++ b/include/ctdb.h
@@ -111,6 +111,10 @@ struct ctdb_call_info {
*/
#define CTDB_SRVID_TAKEOVER_RUN_RESPONSE 0xFD00000000000000LL
+/* A port reserved for samba (top 32 bits)
+ */
+#define CTDB_SRVID_SAMBA_NOTIFY 0xFE00000000000000LL
+
/* used on the domain socket, send a pdu to the local daemon */
#define CTDB_CURRENT_NODE 0xF0000001
/* send a broadcast to all nodes in the cluster, active or not */
@@ -144,6 +148,15 @@ struct ctdb_client_control_state {
} async;
};
+struct ctdb_client_notify_register {
+ uint64_t srvid;
+ uint32_t len;
+ uint8_t notify_data[1];
+};
+
+struct ctdb_client_notify_deregister {
+ uint64_t srvid;
+};
struct event_context;
diff --git a/include/ctdb_private.h b/include/ctdb_private.h
index 2f4937e..ad84628 100644
--- a/include/ctdb_private.h
+++ b/include/ctdb_private.h
@@ -160,7 +160,6 @@ typedef void (*ctdb_control_callback_fn_t)(struct ctdb_context *,
int32_t status, TDB_DATA data,
const char *errormsg,
void *private_data);
-
/*
structure describing a connected client in the daemon
*/
@@ -173,6 +172,7 @@ struct ctdb_client {
struct ctdb_tcp_list *tcp_list;
uint32_t db_id;
uint32_t num_persistent_updates;
+ struct ctdb_client_notify_list *notify;
};
@@ -612,6 +612,8 @@ enum ctdb_controls {CTDB_CONTROL_PROCESS_EXISTS = 0,
CTDB_CONTROL_SET_DB_PRIORITY = 111,
CTDB_CONTROL_GET_DB_PRIORITY = 112,
CTDB_CONTROL_TRANSACTION_CANCEL = 113,
+ CTDB_CONTROL_REGISTER_NOTIFY = 114,
+ CTDB_CONTROL_DEREGISTER_NOTIFY = 115,
};
/*
@@ -1501,4 +1503,8 @@ int32_t ctdb_control_set_ban_state(struct ctdb_context *ctdb, TDB_DATA indata);
int32_t ctdb_control_get_ban_state(struct ctdb_context *ctdb, TDB_DATA *outdata);
int32_t ctdb_control_set_db_priority(struct ctdb_context *ctdb, TDB_DATA indata);
+int32_t ctdb_control_register_notify(struct ctdb_context *ctdb, uint32_t client_id, TDB_DATA indata);
+
+int32_t ctdb_control_deregister_notify(struct ctdb_context *ctdb, uint32_t client_id, TDB_DATA indata);
+
#endif
diff --git a/server/ctdb_control.c b/server/ctdb_control.c
index 24d22d0..904bebe 100644
--- a/server/ctdb_control.c
+++ b/server/ctdb_control.c
@@ -552,6 +552,13 @@ static int32_t ctdb_control_dispatch(struct ctdb_context *ctdb,
CHECK_CONTROL_DATA_SIZE(0);
return ctdb_control_transaction_cancel(ctdb);
+ case CTDB_CONTROL_REGISTER_NOTIFY:
+ return ctdb_control_register_notify(ctdb, client_id, indata);
+
+ case CTDB_CONTROL_DEREGISTER_NOTIFY:
+ CHECK_CONTROL_DATA_SIZE(sizeof(struct ctdb_client_notify_deregister));
+ return ctdb_control_deregister_notify(ctdb, client_id, indata);
+
default:
DEBUG(DEBUG_CRIT,(__location__ " Unknown CTDB control opcode %u\n", opcode));
return -1;
diff --git a/server/ctdb_daemon.c b/server/ctdb_daemon.c
index 532887c..a8dc651 100644
--- a/server/ctdb_daemon.c
+++ b/server/ctdb_daemon.c
@@ -31,7 +31,6 @@
static void daemon_incoming_packet(void *, struct ctdb_req_header *);
-
static void print_exit_message(void)
{
DEBUG(DEBUG_NOTICE,("CTDB daemon shutting down\n"));
@@ -1043,3 +1042,105 @@ int ctdb_daemon_send_message(struct ctdb_context *ctdb, uint32_t pnn,
return 0;
}
+
+
+struct ctdb_client_notify_list {
+ struct ctdb_client_notify_list *next, *prev;
+ struct ctdb_context *ctdb;
+ uint64_t srvid;
+ TDB_DATA data;
+};
+
+
+static int ctdb_client_notify_destructor(struct ctdb_client_notify_list *nl)
+{
+ int ret;
+
+ DEBUG(DEBUG_ERR,("Sending client notify message for srvid:%llu\n", (unsigned long long)nl->srvid));
+
+ ret = ctdb_daemon_send_message(nl->ctdb, CTDB_BROADCAST_CONNECTED, (unsigned long long)nl->srvid, nl->data);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,("Failed to send client notify message\n"));
+ }
+
+ return 0;
+}
+
+int32_t ctdb_control_register_notify(struct ctdb_context *ctdb, uint32_t client_id, TDB_DATA indata)
+{
+ struct ctdb_client_notify_register *notify = (struct ctdb_client_notify_register *)indata.dptr;
+ struct ctdb_client *client = ctdb_reqid_find(ctdb, client_id, struct ctdb_client);
+ struct ctdb_client_notify_list *nl;
+
+ DEBUG(DEBUG_ERR,("Register srvid %llu for client %d\n", (unsigned long long)notify->srvid, client_id));
+
+ if (indata.dsize < offsetof(struct ctdb_client_notify_register, notify_data)) {
+ DEBUG(DEBUG_ERR,(__location__ " Too little data in control : %d\n", (int)indata.dsize));
+ return -1;
+ }
+
+ if (indata.dsize != (notify->len + offsetof(struct ctdb_client_notify_register, notify_data))) {
+ DEBUG(DEBUG_ERR,(__location__ " Wrong amount of data in control. Got %d, expected %d\n", (int)indata.dsize, (int)(notify->len + offsetof(struct ctdb_client_notify_register, notify_data))));
+ return -1;
+ }
+
+
+ if (client == NULL) {
+ DEBUG(DEBUG_ERR,(__location__ " Could not find client parent structure. You can not send this control to a remote node\n"));
+ return -1;
+ }
+
+ for(nl=client->notify; nl; nl=nl->next) {
+ if (nl->srvid == notify->srvid) {
+ break;
+ }
+ }
+ if (nl != NULL) {
+ DEBUG(DEBUG_ERR,(__location__ " Notification for srvid:%llu already exists for this client\n", (unsigned long long)notify->srvid));
+ return -1;
+ }
+
+ nl = talloc(client, struct ctdb_client_notify_list);
+ CTDB_NO_MEMORY(ctdb, nl);
+ nl->ctdb = ctdb;
+ nl->srvid = notify->srvid;
+ nl->data.dsize = notify->len;
+ nl->data.dptr = talloc_size(nl, nl->data.dsize);
+ CTDB_NO_MEMORY(ctdb, nl->data.dptr);
+ memcpy(nl->data.dptr, notify->notify_data, nl->data.dsize);
+
+ DLIST_ADD(client->notify, nl);
+ talloc_set_destructor(nl, ctdb_client_notify_destructor);
+
+ return 0;
+}
+
+int32_t ctdb_control_deregister_notify(struct ctdb_context *ctdb, uint32_t client_id, TDB_DATA indata)
+{
+ struct ctdb_client_notify_deregister *notify = (struct ctdb_client_notify_deregister *)indata.dptr;
+ struct ctdb_client *client = ctdb_reqid_find(ctdb, client_id, struct ctdb_client);
+ struct ctdb_client_notify_list *nl;
+
+ DEBUG(DEBUG_ERR,("Deregister srvid %llu for client %d\n", (unsigned long long)notify->srvid, client_id));
+
+ if (client == NULL) {
+ DEBUG(DEBUG_ERR,(__location__ " Could not find client parent structure. You can not send this control to a remote node\n"));
+ return -1;
+ }
+
+ for(nl=client->notify; nl; nl=nl->next) {
+ if (nl->srvid == notify->srvid) {
+ break;
+ }
+ }
+ if (nl == NULL) {
+ DEBUG(DEBUG_ERR,(__location__ " No notification for srvid:%llu found for this client\n", (unsigned long long)notify->srvid));
+ return -1;
+ }
+
+ DLIST_REMOVE(client->notify, nl);
+ talloc_set_destructor(nl, NULL);
+ talloc_free(nl);
+
+ return 0;
+}
diff --git a/server/ctdb_tunables.c b/server/ctdb_tunables.c
index c461bca..8ad4cde 100644
--- a/server/ctdb_tunables.c
+++ b/server/ctdb_tunables.c
@@ -37,8 +37,8 @@ static const struct {
{ "TakeoverTimeout", 5, offsetof(struct ctdb_tunable, takeover_timeout) },
{ "MonitorInterval", 15, offsetof(struct ctdb_tunable, monitor_interval) },
{ "TickleUpdateInterval",20, offsetof(struct ctdb_tunable, tickle_update_interval) },
- { "EventScriptTimeout", 60, offsetof(struct ctdb_tunable, script_timeout) },
- { "EventScriptBanCount", 5, offsetof(struct ctdb_tunable, script_ban_count) },
+ { "EventScriptTimeout", 20, offsetof(struct ctdb_tunable, script_timeout) },
+ { "EventScriptBanCount", 10, offsetof(struct ctdb_tunable, script_ban_count) },
{ "EventScriptUnhealthyOnTimeout", 0, offsetof(struct ctdb_tunable, script_unhealthy_on_timeout) },
{ "RecoveryGracePeriod", 120, offsetof(struct ctdb_tunable, recovery_grace_period) },
{ "RecoveryBanPeriod", 300, offsetof(struct ctdb_tunable, recovery_ban_period) },
diff --git a/server/eventscript.c b/server/eventscript.c
index aae5eef..3b86615 100644
--- a/server/eventscript.c
+++ b/server/eventscript.c
@@ -18,6 +18,7 @@
*/
#include "includes.h"
+#include <time.h>
#include "system/filesys.h"
#include "system/wait.h"
#include "system/dir.h"
@@ -36,36 +37,19 @@ static struct {
*/
static void sigterm(int sig)
{
- FILE *p;
+ char tbuf[100], buf[200];
+ time_t t;
DEBUG(DEBUG_ERR,("Timed out running script '%s' after %.1f seconds pid :%d\n",
child_state.script_running, timeval_elapsed(&child_state.start), getpid()));
- p = popen("pstree -p", "r");
- if (p == NULL) {
- DEBUG(DEBUG_ERR,("Failed popen to collect pstree for hung script\n"));
- } else {
- char buf[256];
- int count;
-
- DEBUG(DEBUG_ERR,("PSTREE:\n"));
- while(!feof(p)){
- count=fread(buf, 1, 255, p);
- if (count == EOF) {
- break;
- }
- if (count < 0) {
- break;
- }
- if (count == 0) {
- break;
- }
- buf[count] = 0;
- DEBUG(DEBUG_ERR,("%s", buf));
- }
- DEBUG(DEBUG_ERR,("END OF PSTREE OUTPUT\n"));
- pclose(p);
- }
+ t = time(NULL);
+
+ strftime(tbuf, sizeof(tbuf)-1, "%Y%m%d%H%M%S", localtime(&t));
+ sprintf(buf, "pstree -p >/tmp/ctdb.event.%s.%d", tbuf, getpid());
+ system(buf);
+
+ DEBUG(DEBUG_ERR,("Logged timedout eventscript : %s\n", buf));
/* all the child processes will be running in the same process group */
kill(-getpgrp(), SIGKILL);
--
CTDB repository
More information about the samba-cvs
mailing list