[SCM] CTDB repository - branch master updated - ctdb-1.0.114-322-ge6e16fc
Ronnie Sahlberg
sahlberg at samba.org
Tue Sep 28 20:15:14 MDT 2010
The branch, master has been updated
via e6e16fcd5a45ebd3739a8160c8fb5f44494edb9e (commit)
via 09e5a2fb47c312f71f455cdbf8d9cabcca1041a4 (commit)
via 2e648df890e5713bc575965d87937827b068d0d7 (commit)
from b4c00b4ac30ec215629f44f802ce9660abcd7a48 (commit)
http://gitweb.samba.org/?p=sahlberg/ctdb.git;a=shortlog;h=master
- Log -----------------------------------------------------------------
commit e6e16fcd5a45ebd3739a8160c8fb5f44494edb9e
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date: Wed Sep 29 12:13:05 2010 +1000
Add rolling statistics that are collected across 10 second intervals.
Add a new command "ctdb stats [num]" that prints the [num] most recent statistics intervals collected.
commit 09e5a2fb47c312f71f455cdbf8d9cabcca1041a4
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date: Wed Sep 29 10:58:18 2010 +1000
Add a new statistics structure to keep the current running statistics
commit 2e648df890e5713bc575965d87937827b068d0d7
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date: Wed Sep 29 10:38:41 2010 +1000
Create macros to update the statistics counters and use these macros
everywhere instead of manipulating the coutenrs directly.
-----------------------------------------------------------------------
Summary of changes:
Makefile.in | 2 +-
client/ctdb_client.c | 20 ++++++++
common/ctdb_util.c | 34 -------------
include/ctdb_client.h | 3 +
include/ctdb_private.h | 122 +++++++++++++++++++++++++---------------------
include/ctdb_protocol.h | 65 ++++++++++++++++++++++++
server/ctdb_call.c | 6 +--
server/ctdb_control.c | 8 ++-
server/ctdb_daemon.c | 72 ++++++++++-----------------
server/ctdb_lockwait.c | 18 +++---
server/ctdb_persistent.c | 18 +++---
server/ctdb_recover.c | 4 +-
server/ctdb_server.c | 20 ++++----
server/ctdb_statistics.c | 77 +++++++++++++++++++++++++++++
server/ctdb_traverse.c | 2 +-
server/ctdbd.c | 2 +-
tools/ctdb.c | 33 ++++++++++++
17 files changed, 332 insertions(+), 174 deletions(-)
create mode 100644 server/ctdb_statistics.c
Changeset truncated at 500 lines:
diff --git a/Makefile.in b/Makefile.in
index 369cec0..849d626 100755
--- a/Makefile.in
+++ b/Makefile.in
@@ -63,7 +63,7 @@ CTDB_SERVER_OBJ = server/ctdbd.o server/ctdb_daemon.o server/ctdb_lockwait.o \
server/ctdb_traverse.o server/eventscript.o server/ctdb_takeover.o \
server/ctdb_serverids.o server/ctdb_persistent.o \
server/ctdb_keepalive.o server/ctdb_logging.o server/ctdb_uptime.o \
- server/ctdb_vacuum.o server/ctdb_banning.o \
+ server/ctdb_vacuum.o server/ctdb_banning.o server/ctdb_statistics.o \
$(CTDB_CLIENT_OBJ) $(CTDB_TCP_OBJ) @INFINIBAND_WRAPPER_OBJ@
TEST_BINS=tests/bin/ctdb_bench tests/bin/ctdb_fetch tests/bin/ctdb_fetch_one \
diff --git a/client/ctdb_client.c b/client/ctdb_client.c
index 6215dc0..4c770fd 100644
--- a/client/ctdb_client.c
+++ b/client/ctdb_client.c
@@ -4216,3 +4216,23 @@ int ctdb_ctrl_get_db_priority(struct ctdb_context *ctdb, struct timeval timeout,
return 0;
}
+
+int ctdb_ctrl_getstathistory(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, TALLOC_CTX *mem_ctx, struct ctdb_statistics_wire **stats)
+{
+ int ret;
+ TDB_DATA outdata;
+ int32_t res;
+
+ ret = ctdb_control(ctdb, destnode, 0,
+ CTDB_CONTROL_GET_STAT_HISTORY, 0, tdb_null,
+ mem_ctx, &outdata, &res, &timeout, NULL);
+ if (ret != 0 || res != 0 || outdata.dsize == 0) {
+ DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getstathistory failed ret:%d res:%d\n", ret, res));
+ return -1;
+ }
+
+ *stats = (struct ctdb_statistics_wire *)talloc_memdup(mem_ctx, outdata.dptr, outdata.dsize);
+ talloc_free(outdata.dptr);
+
+ return 0;
+}
diff --git a/common/ctdb_util.c b/common/ctdb_util.c
index 46c737a..4acfa3f 100644
--- a/common/ctdb_util.c
+++ b/common/ctdb_util.c
@@ -123,40 +123,6 @@ static void *_idr_find_type(struct idr_context *idp, int id, const char *type, c
return p;
}
-
-/*
- update a max latency number
- */
-void ctdb_latency(struct ctdb_db_context *ctdb_db, const char *name, double *latency, struct timeval t)
-{
- double l = timeval_elapsed(&t);
- if (l > *latency) {
- *latency = l;
- }
-
- if (ctdb_db->ctdb->tunable.log_latency_ms !=0) {
- if (l*1000 > ctdb_db->ctdb->tunable.log_latency_ms) {
- DEBUG(DEBUG_WARNING, ("High latency %.6fs for operation %s on database %s\n", l, name, ctdb_db->db_name));
- }
- }
-}
-
-/*
- update a reclock latency number
- */
-void ctdb_reclock_latency(struct ctdb_context *ctdb, const char *name, double *latency, double l)
-{
- if (l > *latency) {
- *latency = l;
- }
-
- if (ctdb->tunable.reclock_latency_ms !=0) {
- if (l*1000 > ctdb->tunable.reclock_latency_ms) {
- DEBUG(DEBUG_ERR, ("High RECLOCK latency %fs for operation %s\n", l, name));
- }
- }
-}
-
uint32_t ctdb_reqid_new(struct ctdb_context *ctdb, void *state)
{
int id = idr_get_new_above(ctdb->idr, state, ctdb->lastid+1, INT_MAX);
diff --git a/include/ctdb_client.h b/include/ctdb_client.h
index 2b709a0..b2f65ee 100644
--- a/include/ctdb_client.h
+++ b/include/ctdb_client.h
@@ -588,4 +588,7 @@ struct ctdb_db_priority {
int ctdb_ctrl_set_db_priority(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, struct ctdb_db_priority *db_prio);
int ctdb_ctrl_get_db_priority(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t db_id, uint32_t *priority);
+int ctdb_ctrl_getstathistory(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, TALLOC_CTX *mem_ctx, struct ctdb_statistics_wire **stats);
+
+
#endif /* _CTDB_CLIENT_H */
diff --git a/include/ctdb_private.h b/include/ctdb_private.h
index cd6aeec..2394b44 100644
--- a/include/ctdb_private.h
+++ b/include/ctdb_private.h
@@ -276,59 +276,63 @@ struct ctdb_daemon_data {
struct ctdb_queue *queue;
};
-/*
- ctdb status information
- */
-struct ctdb_statistics {
- uint32_t num_clients;
- uint32_t frozen;
- uint32_t recovering;
- uint32_t client_packets_sent;
- uint32_t client_packets_recv;
- uint32_t node_packets_sent;
- uint32_t node_packets_recv;
- uint32_t keepalive_packets_sent;
- uint32_t keepalive_packets_recv;
- struct {
- uint32_t req_call;
- uint32_t reply_call;
- uint32_t req_dmaster;
- uint32_t reply_dmaster;
- uint32_t reply_error;
- uint32_t req_message;
- uint32_t req_control;
- uint32_t reply_control;
- } node;
- struct {
- uint32_t req_call;
- uint32_t req_message;
- uint32_t req_control;
- } client;
- struct {
- uint32_t call;
- uint32_t control;
- uint32_t traverse;
- } timeouts;
- struct {
- double ctdbd;
- double recd;
- } reclock;
- uint32_t total_calls;
- uint32_t pending_calls;
- uint32_t lockwait_calls;
- uint32_t pending_lockwait_calls;
- uint32_t childwrite_calls;
- uint32_t pending_childwrite_calls;
- uint32_t memory_used;
- uint32_t __last_counter; /* hack for control_statistics_all */
- uint32_t max_hop_count;
- double max_call_latency;
- double max_lockwait_latency;
- double max_childwrite_latency;
- uint32_t num_recoveries;
- struct timeval statistics_start_time;
- struct timeval statistics_current_time;
-};
+
+#define CTDB_UPDATE_STAT(ctdb, counter, value) \
+ { \
+ if (value > ctdb->statistics.counter) { \
+ ctdb->statistics.counter = c->hopcount; \
+ } \
+ if (value > ctdb->statistics_current.counter) { \
+ ctdb->statistics_current.counter = c->hopcount; \
+ } \
+ }
+
+#define CTDB_INCREMENT_STAT(ctdb, counter) \
+ { \
+ ctdb->statistics.counter++; \
+ ctdb->statistics_current.counter++; \
+ }
+
+#define CTDB_DECREMENT_STAT(ctdb, counter) \
+ { \
+ if (ctdb->statistics.counter > 0) \
+ ctdb->statistics.counter--; \
+ if (ctdb->statistics_current.counter > 0) \
+ ctdb->statistics_current.counter--; \
+ }
+
+#define CTDB_UPDATE_RECLOCK_LATENCY(ctdb, name, counter, value) \
+ { \
+ if (value > ctdb->statistics.counter) \
+ ctdb->statistics.counter = value; \
+ if (value > ctdb->statistics_current.counter) \
+ ctdb->statistics_current.counter = value; \
+ \
+ if (ctdb->tunable.reclock_latency_ms != 0) { \
+ if (value*1000 > ctdb->tunable.reclock_latency_ms) { \
+ DEBUG(DEBUG_ERR, ("High RECLOCK latency %fs for operation %s\n", value, name)); \
+ } \
+ } \
+ }
+
+
+#define CTDB_UPDATE_LATENCY(ctdb, db, operation, counter, t) \
+ { \
+ double l = timeval_elapsed(&t); \
+ if (l > ctdb->statistics.counter) \
+ ctdb->statistics.counter = l; \
+ if (l > ctdb->statistics_current.counter) \
+ ctdb->statistics_current.counter = l; \
+ \
+ if (ctdb->tunable.log_latency_ms !=0) { \
+ if (l*1000 > ctdb->tunable.log_latency_ms) { \
+ DEBUG(DEBUG_WARNING, ("High latency %.6fs for operation %s on database %s\n", l, operation, db->db_name));\
+ } \
+ } \
+ }
+
+
+
#define INVALID_GENERATION 1
@@ -418,6 +422,9 @@ struct ctdb_context {
struct ctdb_message_list *message_list;
struct ctdb_daemon_data daemon;
struct ctdb_statistics statistics;
+ struct ctdb_statistics statistics_current;
+#define MAX_STAT_HISTORY 100
+ struct ctdb_statistics statistics_history[MAX_STAT_HISTORY];
struct ctdb_vnn_map *vnn_map;
uint32_t num_clients;
uint32_t recovery_master;
@@ -748,9 +755,6 @@ void ctdb_recv_raw_pkt(void *p, uint8_t *data, uint32_t length);
int ctdb_socket_connect(struct ctdb_context *ctdb);
-void ctdb_latency(struct ctdb_db_context *ctdb_db, const char *name, double *latency, struct timeval t);
-void ctdb_reclock_latency(struct ctdb_context *ctdb, const char *name, double *latency, double l);
-
#define CTDB_BAD_REQID ((uint32_t)-1)
uint32_t ctdb_reqid_new(struct ctdb_context *ctdb, void *state);
void *_ctdb_reqid_find(struct ctdb_context *ctdb, uint32_t reqid, const char *type, const char *location);
@@ -1339,4 +1343,10 @@ int update_ip_assignment_tree(struct ctdb_context *ctdb,
int ctdb_init_tevent_logging(struct ctdb_context *ctdb);
+int ctdb_statistics_init(struct ctdb_context *ctdb);
+
+int32_t ctdb_control_get_stat_history(struct ctdb_context *ctdb,
+ struct ctdb_req_control *c,
+ TDB_DATA *outdata);
+
#endif
diff --git a/include/ctdb_protocol.h b/include/ctdb_protocol.h
index f04b3f3..750f1f0 100644
--- a/include/ctdb_protocol.h
+++ b/include/ctdb_protocol.h
@@ -357,6 +357,7 @@ enum ctdb_controls {CTDB_CONTROL_PROCESS_EXISTS = 0,
CTDB_CONTROL_GET_IFACES = 124,
CTDB_CONTROL_SET_IFACE_LINK_STATE = 125,
CTDB_CONTROL_TCP_ADD_DELAYED_UPDATE = 126,
+ CTDB_CONTROL_GET_STAT_HISTORY = 127,
};
/*
@@ -534,4 +535,68 @@ struct ctdb_all_public_ips {
struct ctdb_public_ip ips[1];
};
+
+/*
+ ctdb statistics information
+ */
+struct ctdb_statistics {
+ uint32_t num_clients;
+ uint32_t frozen;
+ uint32_t recovering;
+ uint32_t client_packets_sent;
+ uint32_t client_packets_recv;
+ uint32_t node_packets_sent;
+ uint32_t node_packets_recv;
+ uint32_t keepalive_packets_sent;
+ uint32_t keepalive_packets_recv;
+ struct {
+ uint32_t req_call;
+ uint32_t reply_call;
+ uint32_t req_dmaster;
+ uint32_t reply_dmaster;
+ uint32_t reply_error;
+ uint32_t req_message;
+ uint32_t req_control;
+ uint32_t reply_control;
+ } node;
+ struct {
+ uint32_t req_call;
+ uint32_t req_message;
+ uint32_t req_control;
+ } client;
+ struct {
+ uint32_t call;
+ uint32_t control;
+ uint32_t traverse;
+ } timeouts;
+ struct {
+ double ctdbd;
+ double recd;
+ } reclock;
+ uint32_t total_calls;
+ uint32_t pending_calls;
+ uint32_t lockwait_calls;
+ uint32_t pending_lockwait_calls;
+ uint32_t childwrite_calls;
+ uint32_t pending_childwrite_calls;
+ uint32_t memory_used;
+ uint32_t __last_counter; /* hack for control_statistics_all */
+ uint32_t max_hop_count;
+ double max_call_latency;
+ double max_lockwait_latency;
+ double max_childwrite_latency;
+ uint32_t num_recoveries;
+ struct timeval statistics_start_time;
+ struct timeval statistics_current_time;
+};
+
+/*
+ * wire format for statistics history
+ */
+struct ctdb_statistics_wire {
+ uint32_t num;
+ struct ctdb_statistics stats[1];
+};
+
+
#endif
diff --git a/server/ctdb_call.c b/server/ctdb_call.c
index 84a8c78..0bb7902 100644
--- a/server/ctdb_call.c
+++ b/server/ctdb_call.c
@@ -463,9 +463,7 @@ void ctdb_request_call(struct ctdb_context *ctdb, struct ctdb_req_header *hdr)
return;
}
- if (c->hopcount > ctdb->statistics.max_hop_count) {
- ctdb->statistics.max_hop_count = c->hopcount;
- }
+ CTDB_UPDATE_STAT(ctdb, max_hop_count, c->hopcount);
/* if this nodes has done enough consecutive calls on the same record
then give them the record
@@ -827,7 +825,7 @@ void ctdb_send_keepalive(struct ctdb_context *ctdb, uint32_t destnode)
r->hdr.destnode = destnode;
r->hdr.reqid = 0;
- ctdb->statistics.keepalive_packets_sent++;
+ CTDB_INCREMENT_STAT(ctdb, keepalive_packets_sent);
ctdb_queue_packet(ctdb, &r->hdr);
diff --git a/server/ctdb_control.c b/server/ctdb_control.c
index bfb7bd1..3356ba1 100644
--- a/server/ctdb_control.c
+++ b/server/ctdb_control.c
@@ -461,7 +461,7 @@ static int32_t ctdb_control_dispatch(struct ctdb_context *ctdb,
case CTDB_CONTROL_RECD_RECLOCK_LATENCY:
CHECK_CONTROL_DATA_SIZE(sizeof(double));
- ctdb_reclock_latency(ctdb, "recd reclock", &ctdb->statistics.reclock.recd, *((double *)indata.dptr));
+ CTDB_UPDATE_RECLOCK_LATENCY(ctdb, "recd reclock", reclock.recd, *((double *)indata.dptr));
return 0;
case CTDB_CONTROL_GET_RECLOCK_FILE:
CHECK_CONTROL_DATA_SIZE(0);
@@ -600,6 +600,10 @@ static int32_t ctdb_control_dispatch(struct ctdb_context *ctdb,
CHECK_CONTROL_DATA_SIZE(sizeof(struct ctdb_control_iface_info));
return ctdb_control_set_iface_link(ctdb, c, indata);
+ case CTDB_CONTROL_GET_STAT_HISTORY:
+ CHECK_CONTROL_DATA_SIZE(0);
+ return ctdb_control_get_stat_history(ctdb, c, outdata);
+
default:
DEBUG(DEBUG_CRIT,(__location__ " Unknown CTDB control opcode %u\n", opcode));
return -1;
@@ -720,7 +724,7 @@ static void ctdb_control_timeout(struct event_context *ev, struct timed_event *t
struct ctdb_control_state *state = talloc_get_type(private_data, struct ctdb_control_state);
TALLOC_CTX *tmp_ctx = talloc_new(ev);
- state->ctdb->statistics.timeouts.control++;
+ CTDB_INCREMENT_STAT(state->ctdb, timeouts.control);
talloc_steal(tmp_ctx, state);
diff --git a/server/ctdb_daemon.c b/server/ctdb_daemon.c
index 418d91e..16ccaac 100644
--- a/server/ctdb_daemon.c
+++ b/server/ctdb_daemon.c
@@ -97,7 +97,7 @@ static void block_signal(int signum)
*/
static int daemon_queue_send(struct ctdb_client *client, struct ctdb_req_header *hdr)
{
- client->ctdb->statistics.client_packets_sent++;
+ CTDB_INCREMENT_STAT(client->ctdb, client_packets_sent);
if (hdr->operation == CTDB_REQ_MESSAGE) {
if (ctdb_queue_length(client->queue) > client->ctdb->tunable.max_queue_depth_drop_msg) {
DEBUG(DEBUG_ERR,("CTDB_REQ_MESSAGE queue full - killing client connection.\n"));
@@ -184,9 +184,7 @@ static int ctdb_client_destructor(struct ctdb_client *client)
ctdb_takeover_client_destructor_hook(client);
ctdb_reqid_remove(client->ctdb, client->client_id);
- if (client->ctdb->statistics.num_clients) {
- client->ctdb->statistics.num_clients--;
- }
+ CTDB_DECREMENT_STAT(client->ctdb, num_clients);
if (client->num_persistent_updates != 0) {
DEBUG(DEBUG_ERR,(__location__ " Client disconnecting with %u persistent updates in flight. Starting recovery\n", client->num_persistent_updates));
@@ -258,10 +256,9 @@ static void daemon_call_from_client_callback(struct ctdb_call_state *state)
res = ctdb_daemon_call_recv(state, dstate->call);
if (res != 0) {
DEBUG(DEBUG_ERR, (__location__ " ctdbd_call_recv() returned error\n"));
- if (client->ctdb->statistics.pending_calls > 0) {
- client->ctdb->statistics.pending_calls--;
- }
- ctdb_latency(ctdb_db, "call_from_client_cb 1", &client->ctdb->statistics.max_call_latency, dstate->start_time);
+ CTDB_DECREMENT_STAT(client->ctdb, pending_calls);
+
+ CTDB_UPDATE_LATENCY(client->ctdb, ctdb_db, "call_from_client_cb 1", max_call_latency, dstate->start_time);
return;
}
@@ -270,10 +267,8 @@ static void daemon_call_from_client_callback(struct ctdb_call_state *state)
length, struct ctdb_reply_call);
if (r == NULL) {
DEBUG(DEBUG_ERR, (__location__ " Failed to allocate reply_call in ctdb daemon\n"));
- if (client->ctdb->statistics.pending_calls > 0) {
- client->ctdb->statistics.pending_calls--;
- }
- ctdb_latency(ctdb_db, "call_from_client_cb 2", &client->ctdb->statistics.max_call_latency, dstate->start_time);
+ CTDB_DECREMENT_STAT(client->ctdb, pending_calls);
+ CTDB_UPDATE_LATENCY(client->ctdb, ctdb_db, "call_from_client_cb 2", max_call_latency, dstate->start_time);
return;
}
r->hdr.reqid = dstate->reqid;
@@ -288,11 +283,9 @@ static void daemon_call_from_client_callback(struct ctdb_call_state *state)
if (res != 0) {
DEBUG(DEBUG_ERR, (__location__ " Failed to queue packet from daemon to client\n"));
}
- ctdb_latency(ctdb_db, "call_from_client_cb 3", &client->ctdb->statistics.max_call_latency, dstate->start_time);
+ CTDB_UPDATE_LATENCY(client->ctdb, ctdb_db, "call_from_client_cb 3", max_call_latency, dstate->start_time);
+ CTDB_DECREMENT_STAT(client->ctdb, pending_calls);
talloc_free(dstate);
- if (client->ctdb->statistics.pending_calls > 0) {
- client->ctdb->statistics.pending_calls--;
- }
}
struct ctdb_daemon_packet_wrap {
@@ -344,18 +337,14 @@ static void daemon_request_call_from_client(struct ctdb_client *client,
struct ctdb_context *ctdb = client->ctdb;
struct ctdb_daemon_packet_wrap *w;
- ctdb->statistics.total_calls++;
- if (client->ctdb->statistics.pending_calls > 0) {
- ctdb->statistics.pending_calls++;
- }
+ CTDB_INCREMENT_STAT(ctdb, total_calls);
+ CTDB_DECREMENT_STAT(ctdb, pending_calls);
ctdb_db = find_ctdb_db(client->ctdb, c->db_id);
if (!ctdb_db) {
DEBUG(DEBUG_ERR, (__location__ " Unknown database in request. db_id==0x%08x",
c->db_id));
- if (client->ctdb->statistics.pending_calls > 0) {
- ctdb->statistics.pending_calls--;
- }
+ CTDB_DECREMENT_STAT(ctdb, pending_calls);
return;
}
@@ -383,9 +372,7 @@ static void daemon_request_call_from_client(struct ctdb_client *client,
daemon_incoming_packet_wrap, w, True);
if (ret == -2) {
/* will retry later */
- if (client->ctdb->statistics.pending_calls > 0) {
- ctdb->statistics.pending_calls--;
- }
+ CTDB_DECREMENT_STAT(ctdb, pending_calls);
return;
}
@@ -393,9 +380,7 @@ static void daemon_request_call_from_client(struct ctdb_client *client,
if (ret != 0) {
DEBUG(DEBUG_ERR,(__location__ " Unable to fetch record\n"));
- if (client->ctdb->statistics.pending_calls > 0) {
- ctdb->statistics.pending_calls--;
- }
+ CTDB_DECREMENT_STAT(ctdb, pending_calls);
return;
}
@@ -407,9 +392,7 @@ static void daemon_request_call_from_client(struct ctdb_client *client,
}
--
CTDB repository
More information about the samba-cvs
mailing list