Rev 94: ctdb/ib: added statistics + some cleanup in
http://samba.org/~tridge/psomogyi/
psomogyi at gamax.hu
psomogyi at gamax.hu
Fri May 11 11:25:20 GMT 2007
------------------------------------------------------------
revno: 94
revision-id: psomogyi at gamax.hu-20070511112450-wjf21qva3pjifxy9
parent: psomogyi at gamax.hu-20070502152756-cgv2olqgrryuhpvq
committer: Peter Somogyi <psomogyi at gamax.hu>
branch nick: ctdb
timestamp: Fri 2007-05-11 13:24:50 +0200
message:
ctdb/ib: added statistics + some cleanup
modified:
ib/ibwrapper.c ibwrapper.c-20061204130028-0125b4f5a72f4b11
ib/ibwrapper_internal.h ibwrapper_internal.h-20061204130028-47f0a7e658b16ca2
=== modified file 'ib/ibwrapper.c'
--- a/ib/ibwrapper.c 2007-05-02 15:27:56 +0000
+++ b/ib/ibwrapper.c 2007-05-11 11:24:50 +0000
@@ -48,11 +48,6 @@
#define IBW_LASTERR_BUFSIZE 512
static char ibw_lasterr[IBW_LASTERR_BUFSIZE];
-#define IBW_MAX_SEND_WR 256
-#define IBW_MAX_RECV_WR 1024
-#define IBW_RECV_BUFSIZE 256
-#define IBW_RECV_THRESHOLD (1 * 1024 * 1024)
-
static void ibw_event_handler_verbs(struct event_context *ev,
struct fd_event *fde, uint16_t flags, void *private_data);
static int ibw_fill_cq(struct ibw_conn *conn);
@@ -79,6 +74,13 @@
return NULL;
}
+ #ifdef IBW_ENABLE_STATS
+ /* increment statistics */
+ pctx->stats.n_alloc_mr++;
+ if (n>pctx->stats.n_max_largebuf_size)
+ pctx->stats.n_max_largebuf_size = n;
+ #endif
+
return buf;
}
@@ -340,15 +342,18 @@
DEBUG(10, ("ibw_refill_cq_recv(cmid: %p)\n", pconn->cm_id));
- list.addr = (uintptr_t) pconn->buf_recv + pctx->opts.recv_bufsize * pconn->recv_index;
- wr.wr_id = pconn->recv_index;
- pconn->recv_index = (pconn->recv_index + 1) % pctx->opts.max_recv_wr;
-
- rc = ibv_post_recv(pconn->cm_id->qp, &wr, &bad_wr);
- if (rc) {
- sprintf(ibw_lasterr, "refill/ibv_post_recv failed with %d\n", rc);
- DEBUG(0, (ibw_lasterr));
- return -2;
+ while(pconn->nrecv) {
+ list.addr = (uintptr_t) pconn->buf_recv + pctx->opts.recv_bufsize * pconn->recv_index;
+ wr.wr_id = pconn->recv_index;
+ pconn->recv_index = (pconn->recv_index + 1) % pctx->opts.max_recv_wr;
+
+ rc = ibv_post_recv(pconn->cm_id->qp, &wr, &bad_wr);
+ if (rc) {
+ sprintf(ibw_lasterr, "refill/ibv_post_recv failed with %d\n", rc);
+ DEBUG(0, (ibw_lasterr));
+ return -2;
+ }
+ pconn->nrecv--;
}
return 0;
@@ -585,6 +590,39 @@
return;
}
+static int ibw_wc_handler(struct ibw_conn *conn, struct ibv_wc *wc)
+{
+ if (wc->status) {
+ sprintf(ibw_lasterr, "cq completion failed status=%d, opcode=%d\n",
+ wc->status, wc->opcode);
+ return -1;
+ }
+
+ switch(wc->opcode) {
+ case IBV_WC_SEND:
+ DEBUG(10, ("send completion\n"));
+ return ibw_wc_send(conn, wc);
+
+ case IBV_WC_RDMA_WRITE:
+ DEBUG(10, ("rdma write completion\n"));
+ break;
+
+ case IBV_WC_RDMA_READ:
+ DEBUG(10, ("rdma read completion\n"));
+ break;
+
+ case IBV_WC_RECV:
+ DEBUG(10, ("recv completion\n"));
+ return ibw_wc_recv(conn, wc);
+
+ default:
+ break; /* see below */
+ }
+
+ sprintf(ibw_lasterr, "unknown completion %d\n", wc->opcode);
+ return -1;
+}
+
static void ibw_event_handler_verbs(struct event_context *ev,
struct fd_event *fde, uint16_t flags, void *private_data)
{
@@ -592,12 +630,15 @@
struct ibw_conn_priv *pconn = talloc_get_type(conn->internal, struct ibw_conn_priv);
struct ibw_ctx_priv *pctx = talloc_get_type(conn->ctx->internal, struct ibw_ctx_priv);
- struct ibv_wc wc;
- int rc;
+ struct ibv_wc wc[IBW_MAX_WC_POLL];
+ int rc, i;
struct ibv_cq *ev_cq;
void *ev_ctx;
DEBUG(10, ("ibw_event_handler_verbs(%u)\n", (uint32_t)flags));
+ #ifdef IBW_ENABLE_STATS
+ pctx->stats.n_events++;
+ #endif
/* TODO: check whether if it's good to have more channels here... */
rc = ibv_get_cq_event(pconn->verbs_channel, &ev_cq, &ev_ctx);
@@ -615,44 +656,21 @@
goto error;
}
- while((rc=ibv_poll_cq(pconn->cq, 1, &wc))==1) {
- if (wc.status) {
- sprintf(ibw_lasterr, "cq completion failed status=%d, opcode=%d, rc=%d\n",
- wc.status, wc.opcode, rc);
- goto error;
- }
-
- switch(wc.opcode) {
- case IBV_WC_SEND:
- DEBUG(10, ("send completion\n"));
- if (ibw_wc_send(conn, &wc))
- goto error;
- break;
-
- case IBV_WC_RDMA_WRITE:
- DEBUG(10, ("rdma write completion\n"));
- break;
-
- case IBV_WC_RDMA_READ:
- DEBUG(10, ("rdma read completion\n"));
- break;
-
- case IBV_WC_RECV:
- DEBUG(10, ("recv completion\n"));
- if (ibw_wc_recv(conn, &wc))
- goto error;
- break;
-
- default:
- sprintf(ibw_lasterr, "unknown completion %d\n", wc.opcode);
- goto error;
- }
+ /* we _have_ to use 'while' here not to loose the event */
+ while((rc=ibv_poll_cq(pconn->cq, IBW_MAX_WC_POLL, wc))>0) {
+ for(i=0; i<rc; i++)
+ if (ibw_wc_handler(conn, &wc[i]))
+ goto error;
}
- if (rc!=0) {
+ if (rc<0) {
sprintf(ibw_lasterr, "ibv_poll_cq error %d\n", rc);
goto error;
}
+ if (pconn->nrecv) {
+ ibw_refill_cq_recv(conn);
+ }
+
ibv_ack_cq_events(pconn->cq, 1);
return;
@@ -715,6 +733,9 @@
DEBUG(10, ("ibw_wc_send(cmid: %p, wr_id: %u, bl: %u)\n",
pconn->cm_id, (uint32_t)wc->wr_id, (uint32_t)wc->byte_len));
+ #ifdef IBW_ENABLE_STATS
+ pctx->stats.n_send_wc++;
+ #endif
assert(pconn->cm_id->qp->qp_num==wc->qp_num);
assert(wc->wr_id >= pctx->opts.max_recv_wr);
@@ -826,6 +847,11 @@
DEBUG(10, ("ibw_wc_recv: cmid=%p, wr_id: %u, bl: %u\n",
pconn->cm_id, (uint32_t)wc->wr_id, remain));
+ #ifdef IBW_ENABLE_STATS
+ pctx->stats.n_recv_wc++;
+ #endif
+
+ pconn->nrecv++;
assert(pconn->cm_id->qp->qp_num==wc->qp_num);
assert((int)wc->wr_id < pctx->opts.max_recv_wr);
@@ -891,9 +917,6 @@
}
} /* <remain> is always decreased at least by 1 */
- if (ibw_refill_cq_recv(conn))
- goto error;
-
return 0;
error:
@@ -1216,6 +1239,9 @@
goto error;
}
*buf = (void *)p->buf_large;
+ #ifdef IBW_ENABLE_STATS
+ pctx->stats.n_large_allocs++;
+ #endif
DLIST_REMOVE(pconn->extra_avail, p);
/* we don't have prepared index for this, so that
@@ -1312,6 +1338,9 @@
DEBUG(10, ("ibw_send#frag(cmid: %p, buf: %p, len: %u)\n",
pconn->cm_id, buf, len));
+ #ifdef IBW_ENABLE_STATS
+ pctx->stats.n_frags++;
+ #endif
/* single threaded => no race here: */
assert(p->ref_cnt==0);
@@ -1337,33 +1366,6 @@
return rc;
}
-int ibw_cancel_send_buf(struct ibw_conn *conn, void *buf, void *key)
-{
- struct ibw_ctx_priv *pctx = talloc_get_type(conn->ctx->internal, struct ibw_ctx_priv);
- struct ibw_conn_priv *pconn = talloc_get_type(conn->internal, struct ibw_conn_priv);
- struct ibw_wr *p = talloc_get_type(key, struct ibw_wr);
-
- assert(p!=NULL);
- assert(buf!=NULL);
- assert(conn!=NULL);
-
- if (p->buf_large!=NULL)
- ibw_free_mr(&p->buf_large, &p->mr_large);
-
- /* parallel case */
- if (p->wr_id < pctx->opts.max_send_wr) {
- DEBUG(10, ("ibw_cancel_send_buf#1 %u", (int)p->wr_id));
- DLIST_REMOVE(pconn->wr_list_used, p);
- DLIST_ADD(pconn->wr_list_avail, p);
- } else { /* "extra" packet */
- DEBUG(10, ("ibw_cancel_send_buf#2 %u", (int)p->wr_id));
- DLIST_REMOVE(pconn->extra_sent, p);
- DLIST_ADD(pconn->extra_avail, p);
- }
-
- return 0;
-}
-
const char *ibw_getLastError(void)
{
return ibw_lasterr;
=== modified file 'ib/ibwrapper_internal.h'
--- a/ib/ibwrapper_internal.h 2007-02-08 18:06:14 +0000
+++ b/ib/ibwrapper_internal.h 2007-05-11 11:24:50 +0000
@@ -21,6 +21,17 @@
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
+
+#define IBW_MAX_SEND_WR 256
+#define IBW_MAX_RECV_WR 1024
+#define IBW_RECV_BUFSIZE 256
+#define IBW_RECV_THRESHOLD (1 * 1024 * 1024)
+#define IBW_MAX_WC_POLL 10
+
+/* eats some CPU - can't afford here in production */
+#define talloc_get_type(ptr, type) (type *)ptr
+#define IBW_ENABLE_STATS
+
struct ibw_opts {
uint32_t max_send_wr;
uint32_t max_recv_wr;
@@ -45,6 +56,16 @@
struct ibw_wr *qnext, *qprev; /* in queue */
};
+struct ibw_stats {
+ uint32_t n_alloc_mr;
+ uint32_t n_large_allocs;
+ uint32_t n_frags;
+ uint32_t n_max_largebuf_size;
+ uint32_t n_send_wc;
+ uint32_t n_recv_wc;
+ uint32_t n_events;
+};
+
struct ibw_ctx_priv {
struct event_context *ectx;
@@ -59,6 +80,9 @@
ibw_receive_fn_t receive_func; /* see ibw_init */
long pagesize; /* sysconf result for memalign */
+
+ /* some stats */
+ struct ibw_stats stats;
};
struct ibw_part {
@@ -96,6 +120,7 @@
struct ibv_mr *mr_recv;
int recv_index; /* index of the next recv buffer when refilling */
struct ibw_part part;
+ int nrecv; /* received packets */
};
/* remove an element from a list - element doesn't have to be in list. */
More information about the samba-cvs
mailing list