[SCM] CTDB repository - branch master updated - ctdb-1.0.98-10-ge01ab46

Wed Oct 21 20:37:41 MDT 2009

The branch, master has been updated
       via  e01ab46bafad09a5e320d420734db129d35863bc (commit)
       via  27296a47b3d057a6729287acf128b2b67775ecde (commit)
       via  5b70fa8cfd5916d3c212823ad5cc1b251ae175ed (commit)
      from  befabc917edb036ca81f5216f65a6d62b26ee83e (commit)

http://gitweb.samba.org/?p=sahlberg/ctdb.git;a=shortlog;h=master


- Log -----------------------------------------------------------------
commit e01ab46bafad09a5e320d420734db129d35863bc
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Thu Oct 22 13:41:28 2009 +1100

    if a lock wait child died/finished, we could have released the lockwait handle and set it to NULL before we call the destructors for releaseing the waiters.
    
    The waiters reference the locakwait handle in order to remove itself from the li
    nked list which caused a SEGV.
    
    We dont actually need to remove ourselves from this list here since
    if the parent freeze_handle holding the list is freed, then all waiters are rele
    ased as well, and the only place we actually need to relink the waiter is in ctd
    b_freeze_lock_handler, where we want to respond back to the clients and release
    the waiters  but we still want to keep the freeze_handle hanging around.

commit 27296a47b3d057a6729287acf128b2b67775ecde
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Thu Oct 22 12:19:40 2009 +1100

    From Volker L
    Fix some warnings  and an incorrect check for a talloc failure

commit 5b70fa8cfd5916d3c212823ad5cc1b251ae175ed
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Thu Oct 22 07:58:44 2009 +1100

    From Wolfgang M.
    
    With the new vacuuming code, dont treat an invalid dmaster as fatal. Let it update to the new value insetad.

-----------------------------------------------------------------------

Summary of changes:
 server/ctdb_call.c     |   11 +++++++----
 server/ctdb_freeze.c   |   21 +++++++++++++++------
 server/ctdb_monitor.c  |    2 +-
 server/ctdb_recoverd.c |   15 +++++++++++----
 server/ctdb_takeover.c |    7 ++++++-
 server/eventscript.c   |    2 +-
 6 files changed, 41 insertions(+), 17 deletions(-)


Changeset truncated at 500 lines:

diff --git a/server/ctdb_call.c b/server/ctdb_call.c
index cd52867..1dac919 100644
--- a/server/ctdb_call.c
+++ b/server/ctdb_call.c
@@ -342,10 +342,13 @@ void ctdb_request_dmaster(struct ctdb_context *ctdb, struct ctdb_req_header *hdr
 			 ctdb_db->db_id, hdr->generation, ctdb->vnn_map->generation,
 			 (unsigned long long)c->rsn, (unsigned long long)header.rsn, c->hdr.reqid,
 			 (key.dsize >= 4)?(*(uint32_t *)key.dptr):0));
-		if (header.rsn != 0 || header.dmaster != ctdb->pnn) {
-			ctdb_fatal(ctdb, "ctdb_req_dmaster from non-master");
-			return;
-		}
+		/*
+		 * with the new vacuuming code there are conditions where a node has outdated
+		 * information about the real dmaster
+		 * since here we are lmaster and always know who is the real dmaster
+		 * we don't need to exit with a fatal error and we even don't have
+		 * to initiate a recovery
+		 */
 	}
 
 	if (header.rsn > c->rsn) {
diff --git a/server/ctdb_freeze.c b/server/ctdb_freeze.c
index 69e70ab..36d033d 100644
--- a/server/ctdb_freeze.c
+++ b/server/ctdb_freeze.c
@@ -151,9 +151,12 @@ static void ctdb_freeze_lock_handler(struct event_context *ev, struct fd_event *
 	h->ctdb->freeze_mode[h->priority] = CTDB_FREEZE_FROZEN;
 
 	/* notify the waiters */
-	while ((w = h->ctdb->freeze_handles[h->priority]->waiters)) {
+	if (h != h->ctdb->freeze_handles[h->priority]) {
+		DEBUG(DEBUG_ERR,("lockwait finished but h is not linked\n"));
+	}
+	while ((w = h->waiters)) {
 		w->status = status;
-		DLIST_REMOVE(h->ctdb->freeze_handles[h->priority]->waiters, w);
+		DLIST_REMOVE(h->waiters, w);
 		talloc_free(w);
 	}
 }
@@ -241,7 +244,6 @@ static struct ctdb_freeze_handle *ctdb_freeze_lock(struct ctdb_context *ctdb, ui
  */
 static int ctdb_freeze_waiter_destructor(struct ctdb_freeze_waiter *w)
 {
-	DLIST_REMOVE(w->ctdb->freeze_handles[w->priority]->waiters, w);
 	ctdb_request_control_reply(w->ctdb, w->c, NULL, w->status, NULL);
 	return 0;
 }
@@ -267,7 +269,7 @@ int ctdb_start_freeze(struct ctdb_context *ctdb, uint32_t priority)
 	}
 
 	/* if there isn't a freeze lock child then create one */
-	if (!ctdb->freeze_handles[priority]) {
+	if (ctdb->freeze_handles[priority] == NULL) {
 		ctdb->freeze_handles[priority] = ctdb_freeze_lock(ctdb, priority);
 		CTDB_NO_MEMORY(ctdb, ctdb->freeze_handles[priority]);
 		ctdb->freeze_mode[priority] = CTDB_FREEZE_PENDING;
@@ -309,6 +311,11 @@ int32_t ctdb_control_freeze(struct ctdb_context *ctdb, struct ctdb_req_control *
 	}
 
 	/* add ourselves to list of waiters */
+	if (ctdb->freeze_handles[priority] == NULL) {
+		DEBUG(DEBUG_ERR,("No freeze lock handle when adding a waiter\n"));
+		return -1;
+	}
+
 	w = talloc(ctdb->freeze_handles[priority], struct ctdb_freeze_waiter);
 	CTDB_NO_MEMORY(ctdb, w);
 	w->ctdb     = ctdb;
@@ -376,8 +383,10 @@ static void thaw_priority(struct ctdb_context *ctdb, uint32_t priority)
 	system("mkdir -p test.db.saved; /usr/bin/rsync --delete -a test.db/ test.db.saved/$$ 2>&1 > /dev/null");
 #endif
 
-	talloc_free(ctdb->freeze_handles[priority]);
-	ctdb->freeze_handles[priority] = NULL;
+	if (ctdb->freeze_handles[priority] != NULL) {
+		talloc_free(ctdb->freeze_handles[priority]);
+		ctdb->freeze_handles[priority] = NULL;
+	}
 }
 
 /*
diff --git a/server/ctdb_monitor.c b/server/ctdb_monitor.c
index 5e1f7ad..056d831 100644
--- a/server/ctdb_monitor.c
+++ b/server/ctdb_monitor.c
@@ -235,7 +235,7 @@ static void ctdb_check_health(struct event_context *ev, struct timed_event *te,
 			DEBUG(DEBUG_ERR,("Skip monitoring during recovery\n"));
 		}
 		for (i=1; i<=NUM_DB_PRIORITIES; i++) {
-			if (ctdb->freeze_handles[i] != 0) {
+			if (ctdb->freeze_handles[i] != NULL) {
 				DEBUG(DEBUG_ERR,("Skip monitoring since databases are frozen\n"));
 				skip_monitoring = 1;
 				break;
diff --git a/server/ctdb_recoverd.c b/server/ctdb_recoverd.c
index 08d7dc2..432af07 100644
--- a/server/ctdb_recoverd.c
+++ b/server/ctdb_recoverd.c
@@ -1791,7 +1791,9 @@ static void disable_ip_check_handler(struct ctdb_context *ctdb, uint64_t srvid,
 	}
 
 	if (data.dsize != sizeof(uint32_t)) {
-		DEBUG(DEBUG_ERR,(__location__ " Wrong size for data :%lu expexting %lu\n", data.dsize, sizeof(uint32_t)));
+		DEBUG(DEBUG_ERR,(__location__ " Wrong size for data :%lu "
+				 "expexting %lu\n", (long unsigned)data.dsize,
+				 (long unsigned)sizeof(uint32_t)));
 		return;
 	}
 	if (data.dptr == NULL) {
@@ -1827,7 +1829,7 @@ static void ip_reallocate_handler(struct ctdb_context *ctdb, uint64_t srvid,
 
 	if (rec->ip_reallocate_ctx == NULL) {
 		rec->ip_reallocate_ctx = talloc_new(rec);
-		CTDB_NO_MEMORY_FATAL(ctdb, caller);
+		CTDB_NO_MEMORY_FATAL(ctdb, rec->ip_reallocate_ctx);
 	}
 
 	caller = talloc(rec->ip_reallocate_ctx, struct ip_reallocate_list);
@@ -1853,10 +1855,15 @@ static void process_ipreallocate_requests(struct ctdb_context *ctdb, struct ctdb
 	result.dptr  = (uint8_t *)&ret;
 
 	for (callers=rec->reallocate_callers; callers; callers=callers->next) {
-		DEBUG(DEBUG_INFO,("Sending ip reallocate reply message to %u:%lu\n", callers->rd->pnn, callers->rd->srvid));
+		DEBUG(DEBUG_INFO,("Sending ip reallocate reply message to "
+				  "%u:%lu\n", (unsigned)callers->rd->pnn,
+				  (long unsigned)callers->rd->srvid));
 		ret = ctdb_send_message(ctdb, callers->rd->pnn, callers->rd->srvid, result);
 		if (ret != 0) {
-			DEBUG(DEBUG_ERR,("Failed to send ip reallocate reply message to %u:%lu\n", callers->rd->pnn, callers->rd->srvid));
+			DEBUG(DEBUG_ERR,("Failed to send ip reallocate reply "
+					 "message to %u:%lu\n",
+					 (unsigned)callers->rd->pnn,
+					 (long unsigned)callers->rd->srvid));
 		}
 	}
 
diff --git a/server/ctdb_takeover.c b/server/ctdb_takeover.c
index 6eb2acf..559f7fb 100644
--- a/server/ctdb_takeover.c
+++ b/server/ctdb_takeover.c
@@ -1061,7 +1061,12 @@ int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
 		tcp_sock = (struct ctdb_control_tcp_addr *)indata.dptr;
 		break;
 	default:
-		DEBUG(DEBUG_ERR,(__location__ " Invalid data structure passed to ctdb_control_tcp_client. size was %d but only allowed sizes are %lu and %lu\n", (int)indata.dsize, sizeof(struct ctdb_control_tcp), sizeof(struct ctdb_control_tcp_addr)));
+		DEBUG(DEBUG_ERR,(__location__ " Invalid data structure passed "
+				 "to ctdb_control_tcp_client. size was %d but "
+				 "only allowed sizes are %lu and %lu\n",
+				 (int)indata.dsize,
+				 (long unsigned)sizeof(struct ctdb_control_tcp),
+				 (long unsigned)sizeof(struct ctdb_control_tcp_addr)));
 		return -1;
 	}
 
diff --git a/server/eventscript.c b/server/eventscript.c
index 8a62af9..aae5eef 100644
--- a/server/eventscript.c
+++ b/server/eventscript.c
@@ -969,7 +969,7 @@ int32_t ctdb_run_eventscripts(struct ctdb_context *ctdb,
 	ret = ctdb_event_script_callback(ctdb, 
 			 timeval_current_ofs(ctdb->tunable.script_timeout, 0),
 			 state, run_eventscripts_callback, state,
-			 (const char *)indata.dptr);
+			 "%s", (const char *)indata.dptr);
 
 	if (ret != 0) {
 		ctdb_enable_monitoring(ctdb);


-- 
CTDB repository