[SCM] CTDB repository - branch master updated - 58e6dc722ad1e2415b71baf1d471885169dde14d

Andrew Tridgell tridge at samba.org
Thu Aug 7 14:51:54 GMT 2008


The branch, master has been updated
       via  58e6dc722ad1e2415b71baf1d471885169dde14d (commit)
       via  bcbac6724840cdabe55e38a4c62779f853bd09ba (commit)
       via  66c61137a5c01afcbae329ffbe121e78ae087399 (commit)
       via  84236e03e40bcf46fa634d106903277c149a734f (commit)
       via  14f2f719e6ddc266aafde4d4bf80ed3a01e145fe (commit)
       via  a9bce1ac9794f108825190948f404c864a533435 (commit)
       via  d1d48f8661d83b01de6c552ee70021acdc6384cb (commit)
       via  09aa91224fe7b835dc0a2c58868cce28ce54809f (commit)
      from  0592ba2a4fbd1b3b7a6bd0780eadbd6d449baaad (commit)

http://gitweb.samba.org/?p=tridge/ctdb.git;a=shortlog;h=master


- Log -----------------------------------------------------------------
commit 58e6dc722ad1e2415b71baf1d471885169dde14d
Merge: 0592ba2a4fbd1b3b7a6bd0780eadbd6d449baaad bcbac6724840cdabe55e38a4c62779f853bd09ba
Author: Andrew Tridgell <tridge at samba.org>
Date:   Fri Aug 8 00:48:19 2008 +1000

    Merge commit 'ronnie/1.0.53'

commit bcbac6724840cdabe55e38a4c62779f853bd09ba
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Thu Aug 7 18:57:24 2008 +1000

    new version 1.0.53
    
    this adds completely new transaction code for persistent databases

commit 66c61137a5c01afcbae329ffbe121e78ae087399
Merge: 84236e03e40bcf46fa634d106903277c149a734f c76c214be401cb116265ed17ffe6c77c979ded82
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Thu Aug 7 18:50:48 2008 +1000

    Merge git://git.samba.org/tridge/ctdb

commit 84236e03e40bcf46fa634d106903277c149a734f
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Wed Aug 6 11:52:26 2008 +1000

    remove the reclock file we store pnn counts in.
    This file creates additional locking stress on the backend filesystem and we may not need it anyway.

commit 14f2f719e6ddc266aafde4d4bf80ed3a01e145fe
Merge: a9bce1ac9794f108825190948f404c864a533435 b6d9a0396fb4b325778d3810dc656f719f31b9f1
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Wed Aug 6 09:17:41 2008 +1000

    Merge git://git.samba.org/tridge/ctdb

commit a9bce1ac9794f108825190948f404c864a533435
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Mon Aug 4 14:58:52 2008 +1000

    New version 1.0.52
    
    Signed-off-by: Ronnie Sahlberg <ronniesahlberg at gmail.com>

commit d1d48f8661d83b01de6c552ee70021acdc6384cb
Author: Andrew Tridgell <tridge at samba.org>
Date:   Fri Aug 1 14:23:15 2008 +1000

    we need an additional gratuitous arp before the NFS tickles

commit 09aa91224fe7b835dc0a2c58868cce28ce54809f
Author: Andrew Tridgell <tridge at samba.org>
Date:   Fri Aug 1 14:17:50 2008 +1000

    ensure we use killtcp on non-NFS/non-CIFS ports for faster failover of
    other protocols

-----------------------------------------------------------------------

Summary of changes:
 client/ctdb_client.c    |   23 -------
 include/ctdb_private.h  |    3 +-
 packaging/RPM/ctdb.spec |   11 +++-
 server/ctdb_control.c   |    4 -
 server/ctdb_recover.c   |   16 -----
 server/ctdb_recoverd.c  |  152 -----------------------------------------------
 tools/ctdb.c            |   55 -----------------
 7 files changed, 11 insertions(+), 253 deletions(-)


Changeset truncated at 500 lines:

diff --git a/client/ctdb_client.c b/client/ctdb_client.c
index 3717096..2b31d81 100644
--- a/client/ctdb_client.c
+++ b/client/ctdb_client.c
@@ -1232,29 +1232,6 @@ int ctdb_ctrl_getdbmap(struct ctdb_context *ctdb, struct timeval timeout, uint32
 }
 
 /*
-  get the reclock filename
- */
-int ctdb_ctrl_getreclock(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, 
-		       TALLOC_CTX *mem_ctx, const char **reclock)
-{
-	int ret;
-	TDB_DATA outdata;
-	int32_t res;
-
-	ret = ctdb_control(ctdb, destnode, 0, 
-			   CTDB_CONTROL_GET_RECLOCK_FILE, 0, tdb_null, 
-			   mem_ctx, &outdata, &res, &timeout, NULL);
-	if (ret != 0 || res != 0) {
-		DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getreclock failed\n"));
-		return -1;
-	}
-
-	*reclock = (const char *)talloc_steal(mem_ctx, outdata.dptr);
-
-	return 0;
-}
-
-/*
   get a list of nodes (vnn and flags ) from a remote node
  */
 int ctdb_ctrl_getnodemap(struct ctdb_context *ctdb, 
diff --git a/include/ctdb_private.h b/include/ctdb_private.h
index 79046aa..ff4d271 100644
--- a/include/ctdb_private.h
+++ b/include/ctdb_private.h
@@ -536,7 +536,7 @@ enum ctdb_controls {CTDB_CONTROL_PROCESS_EXISTS          = 0,
 		    CTDB_CONTROL_START_RECOVERY          = 70,
 		    CTDB_CONTROL_END_RECOVERY            = 71,
 		    CTDB_CONTROL_RELOAD_NODES_FILE       = 72,
-		    CTDB_CONTROL_GET_RECLOCK_FILE        = 73,
+		    /* #73 removed */
 		    CTDB_CONTROL_TRY_DELETE_RECORDS      = 74,
 		    CTDB_CONTROL_ENABLE_MONITOR          = 75,
 		    CTDB_CONTROL_DISABLE_MONITOR         = 76,
@@ -1240,7 +1240,6 @@ int32_t ctdb_control_get_tunable(struct ctdb_context *ctdb, TDB_DATA indata,
 				 TDB_DATA *outdata);
 int32_t ctdb_control_set_tunable(struct ctdb_context *ctdb, TDB_DATA indata);
 int32_t ctdb_control_list_tunables(struct ctdb_context *ctdb, TDB_DATA *outdata);
-int32_t ctdb_control_get_reclock_file(struct ctdb_context *ctdb, TDB_DATA *outdata);
 int32_t ctdb_control_try_delete_records(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata);
 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata);
 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA indata);
diff --git a/packaging/RPM/ctdb.spec b/packaging/RPM/ctdb.spec
index bff072b..fcaf2ee 100644
--- a/packaging/RPM/ctdb.spec
+++ b/packaging/RPM/ctdb.spec
@@ -5,7 +5,7 @@ Vendor: Samba Team
 Packager: Samba Team <samba at samba.org>
 Name: ctdb
 Version: 1.0
-Release: 51
+Release: 53
 Epoch: 0
 License: GNU GPL version 3
 Group: System Environment/Daemons
@@ -118,6 +118,15 @@ fi
 %{_includedir}/ctdb_private.h
 
 %changelog
+* Thu Aug 7 2008 : Version 1.0.53
+ - Remove the reclock.pnn file   it can cause gpfs to fail to umount
+ - New transaction code
+* Mon Aug 4 2008 : Version 1.0.52
+ - Send an explicit gratious arp when starting sending the tcp tickles.
+ - When doing failover, issue a killtcp to non-NFS/non-CIFS clients
+   so that they fail quickly. NFS and CIFS already fail and recover 
+   quickly.
+ - Update the test scripts to handle CTRL-C to kill off the test.
 * Mon Jul 28 2008 : Version 1.0.51
  - Strip off the vlan tag from bond devices before we check in /proc
    if the interface is up or not.
diff --git a/server/ctdb_control.c b/server/ctdb_control.c
index 3d22329..59b0657 100644
--- a/server/ctdb_control.c
+++ b/server/ctdb_control.c
@@ -378,10 +378,6 @@ static int32_t ctdb_control_dispatch(struct ctdb_context *ctdb,
 	case CTDB_CONTROL_END_RECOVERY:
 		return ctdb_control_end_recovery(ctdb, c, async_reply);
 
-	case CTDB_CONTROL_GET_RECLOCK_FILE:
-		CHECK_CONTROL_DATA_SIZE(0);
-		return ctdb_control_get_reclock_file(ctdb, outdata);
-
 	case CTDB_CONTROL_TRY_DELETE_RECORDS:
 		return ctdb_control_try_delete_records(ctdb, indata, outdata);
 
diff --git a/server/ctdb_recover.c b/server/ctdb_recover.c
index ead04ed..f0b97ac 100644
--- a/server/ctdb_recover.c
+++ b/server/ctdb_recover.c
@@ -866,22 +866,6 @@ int32_t ctdb_control_start_recovery(struct ctdb_context *ctdb,
 }
 
 /*
-  report the location for the reclock file
- */
-int32_t ctdb_control_get_reclock_file(struct ctdb_context *ctdb, TDB_DATA *outdata)
-{
-	char *reclock = NULL;
-
-	reclock = talloc_strdup(outdata, ctdb->recovery_lock_file);
-	CTDB_NO_MEMORY(ctdb, reclock);
-
-	outdata->dsize = strlen(reclock)+1;
-	outdata->dptr = (uint8_t *)reclock;
-
-	return 0;	
-}
-
-/*
  try to delete all these records as part of the vacuuming process
  and return the records we failed to delete
 */
diff --git a/server/ctdb_recoverd.c b/server/ctdb_recoverd.c
index 2e29fea..3295229 100644
--- a/server/ctdb_recoverd.c
+++ b/server/ctdb_recoverd.c
@@ -41,7 +41,6 @@ struct ban_state {
  */
 struct ctdb_recoverd {
 	struct ctdb_context *ctdb;
-	int rec_file_fd;
 	uint32_t recmaster;
 	uint32_t num_active;
 	uint32_t num_connected;
@@ -2143,148 +2142,6 @@ static enum monitor_result verify_recmaster(struct ctdb_recoverd *rec, struct ct
 	return status;
 }
 
-/*
-  this function writes the number of connected nodes we have for this pnn
-  to the pnn slot in the reclock file
-*/
-static void
-ctdb_recoverd_write_pnn_connect_count(struct ctdb_recoverd *rec)
-{
-	const char count = rec->num_connected;
-	struct ctdb_context *ctdb = talloc_get_type(rec->ctdb, struct ctdb_context);
-
-	if (rec->rec_file_fd == -1) {
-		DEBUG(DEBUG_CRIT,(__location__ " Unable to write pnn count. pnnfile is not open.\n"));
-		return;
-	} 
-
-	if (pwrite(rec->rec_file_fd, &count, 1, ctdb->pnn) == -1) {
-		DEBUG(DEBUG_CRIT, (__location__ " Failed to write pnn count\n"));
-		close(rec->rec_file_fd);
-		rec->rec_file_fd = -1;
-	}
-}
-
-/* 
-  this function opens the reclock file and sets a byterage lock for the single
-  byte at position pnn+1.
-  the existence/non-existence of such a lock provides an alternative mechanism
-  to know whether a remote node(recovery daemon) is running or not.
-*/
-static void
-ctdb_recoverd_get_pnn_lock(struct ctdb_recoverd *rec)
-{
-	struct ctdb_context *ctdb = talloc_get_type(rec->ctdb, struct ctdb_context);
-	struct flock lock;
-	char *pnnfile = NULL;
-
-	DEBUG(DEBUG_INFO, ("Setting PNN lock for pnn:%d\n", ctdb->pnn));
-
-	if (rec->rec_file_fd != -1) {
-		close(rec->rec_file_fd);
-		rec->rec_file_fd = -1;
-	}
-
-	pnnfile = talloc_asprintf(rec, "%s.pnn", ctdb->recovery_lock_file);
-	CTDB_NO_MEMORY_FATAL(ctdb, pnnfile);
-
-	rec->rec_file_fd = open(pnnfile, O_RDWR|O_CREAT, 0600);
-	if (rec->rec_file_fd == -1) {
-		DEBUG(DEBUG_CRIT,(__location__ " Unable to open %s - (%s)\n", 
-			 pnnfile, strerror(errno)));
-		talloc_free(pnnfile);
-		return;
-	}
-
-	set_close_on_exec(rec->rec_file_fd);
-	lock.l_type = F_WRLCK;
-	lock.l_whence = SEEK_SET;
-	lock.l_start = ctdb->pnn;
-	lock.l_len = 1;
-	lock.l_pid = 0;
-
-	if (fcntl(rec->rec_file_fd, F_SETLK, &lock) != 0) {
-		close(rec->rec_file_fd);
-		rec->rec_file_fd = -1;
-		DEBUG(DEBUG_CRIT,(__location__ " Failed to get pnn lock on '%s'\n", pnnfile));
-		talloc_free(pnnfile);
-		return;
-	}
-
-
-	DEBUG(DEBUG_NOTICE,(__location__ " Got pnn lock on '%s'\n", pnnfile));
-	talloc_free(pnnfile);
-
-	/* we start out with 0 connected nodes */
-	ctdb_recoverd_write_pnn_connect_count(rec);
-}
-
-/*
-  called when we need to do the periodical reclock pnn count update
- */
-static void ctdb_update_pnn_count(struct event_context *ev, struct timed_event *te, 
-				  struct timeval t, void *p)
-{
-	int i, count;
-	struct ctdb_recoverd *rec     = talloc_get_type(p, struct ctdb_recoverd);
-	struct ctdb_context *ctdb     = rec->ctdb;
-	struct ctdb_node_map *nodemap = rec->nodemap;
-
-	/* close and reopen the pnn lock file */
-	ctdb_recoverd_get_pnn_lock(rec);
-
-	ctdb_recoverd_write_pnn_connect_count(rec);
-
-	event_add_timed(rec->ctdb->ev, rec->ctdb,
-		timeval_current_ofs(ctdb->tunable.reclock_ping_period, 0), 
-		ctdb_update_pnn_count, rec);
-
-	/* check if there is a split cluster and yeld the recmaster role
-	   it the other half of the cluster is larger 
-	*/
-	DEBUG(DEBUG_DEBUG, ("CHECK FOR SPLIT CLUSTER\n"));
-	if (rec->nodemap == NULL) {
-		return;
-	}
-	if (rec->rec_file_fd == -1) {
-		return;
-	}
-	/* only test this if we think we are the recmaster */
-	if (ctdb->pnn != rec->recmaster) {
-		DEBUG(DEBUG_DEBUG, ("We are not recmaster, skip test\n"));
-		return;
-	}
-	if (ctdb->recovery_lock_fd == -1) {
-		DEBUG(DEBUG_ERR, (__location__ " Lost reclock pnn file. Yielding recmaster role\n"));
-		close(ctdb->recovery_lock_fd);
-		ctdb->recovery_lock_fd = -1;
-		force_election(rec, ctdb->pnn, rec->nodemap);
-		return;
-	}
-	for (i=0; i<nodemap->num; i++) {
-		/* we dont need to check ourself */
-		if (nodemap->nodes[i].pnn == ctdb->pnn) {
-			continue;
-		}
-		/* dont check nodes that are connected to us */
-		if (!(nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED)) {
-			continue;
-		}
-		/* check if the node is "connected" and how connected it it */
-		count = ctdb_read_pnn_lock(rec->rec_file_fd, nodemap->nodes[i].pnn);
-		if (count < 0) {
-			continue;
-		}
-		/* check if that node is more connected that us */
-		if (count > rec->num_connected) {
-			DEBUG(DEBUG_ERR, ("DISCONNECTED Node %u is more connected than we are, yielding recmaster role\n", nodemap->nodes[i].pnn));
-			close(ctdb->recovery_lock_fd);
-			ctdb->recovery_lock_fd = -1;
-			force_election(rec, ctdb->pnn, rec->nodemap);
-			return;
-		}
-	}
-}
 
 /* called to check that the allocation of public ip addresses is ok.
 */
@@ -2419,10 +2276,6 @@ static void monitor_cluster(struct ctdb_context *ctdb)
 
 	rec->priority_time = timeval_current();
 
-	/* open the rec file fd and lock our slot */
-	rec->rec_file_fd = -1;
-	ctdb_recoverd_get_pnn_lock(rec);
-
 	/* register a message port for sending memory dumps */
 	ctdb_set_message_handler(ctdb, CTDB_SRVID_MEM_DUMP, mem_dump_handler, rec);
 
@@ -2441,11 +2294,6 @@ static void monitor_cluster(struct ctdb_context *ctdb)
 	/* register a message port for vacuum fetch */
 	ctdb_set_message_handler(ctdb, CTDB_SRVID_VACUUM_FETCH, vacuum_fetch_handler, rec);
 
-	/* update the reclock pnn file connected count on a regular basis */
-	event_add_timed(ctdb->ev, ctdb,
-		timeval_current_ofs(ctdb->tunable.reclock_ping_period, 0), 
-		ctdb_update_pnn_count, rec);
-
 again:
 	if (mem_ctx) {
 		talloc_free(mem_ctx);
diff --git a/tools/ctdb.c b/tools/ctdb.c
index 22671a7..4a3aa87 100644
--- a/tools/ctdb.c
+++ b/tools/ctdb.c
@@ -1491,60 +1491,6 @@ static int control_getdbmap(struct ctdb_context *ctdb, int argc, const char **ar
 }
 
 /*
-  get the filename of the reclock file
- */
-static int control_getreclock(struct ctdb_context *ctdb, int argc, const char **argv)
-{
-	int i, ret, fd;
-	const char *reclock;
-	struct ctdb_node_map *nodemap=NULL;
-	char *pnnfile;
-
-	ret = ctdb_ctrl_getreclock(ctdb, TIMELIMIT(), options.pnn, ctdb, &reclock);
-	if (ret != 0) {
-		DEBUG(DEBUG_ERR, ("Unable to get reclock file from node %u\n", options.pnn));
-		return ret;
-	}
-
-	ret = ctdb_ctrl_getnodemap(ctdb, TIMELIMIT(), options.pnn, ctdb, &nodemap);
-	if (ret != 0) {
-		DEBUG(DEBUG_ERR, ("Unable to get nodemap from node %u\n", options.pnn));
-		return ret;
-	}
-
-
-	pnnfile = talloc_asprintf(ctdb, "%s.pnn", reclock);
-	CTDB_NO_MEMORY(ctdb, pnnfile);
-
-	fd = open(pnnfile, O_RDONLY);
-	if (fd == -1) {
-		DEBUG(DEBUG_CRIT,(__location__ " Failed to open reclock pnn file %s - (%s)\n", 
-			 pnnfile, strerror(errno)));
-		exit(10);
-	}
-
-
-	printf("Reclock file : %s\n", reclock);
-	for (i=0; i<nodemap->num; i++) {
-		int count;
-
-		count = ctdb_read_pnn_lock(fd, nodemap->nodes[i].pnn);
-
-		printf("pnn:%d %-16s", nodemap->nodes[i].pnn,
-		       inet_ntoa(nodemap->nodes[i].sin.sin_addr));
-		if (count == -1) {
-			printf(" NOT ACTIVE\n");
-		} else {
-			printf(" ACTIVE with %d connections\n", count);
-		}
-	}
-
-	close(fd);
-	return 0;
-}
-
-
-/*
   check if the local node is recmaster or not
   it will return 1 if this node is the recmaster and 0 if it is not
   or if the local ctdb daemon could not be contacted
@@ -2030,7 +1976,6 @@ static const struct {
 	{ "repack",          ctdb_repack,		false, "repack all databases", "[max_freelist]"},
 	{ "listnodes",       control_listnodes,		false, "list all nodes in the cluster"},
 	{ "reloadnodes",     control_reload_nodes_file,		false, "reload the nodes file and restart the transport on all nodes"},
-	{ "getreclock",      control_getreclock,        false,  "get the path to the reclock file" },
 	{ "moveip",          control_moveip,		false, "move/failover an ip address to another node", "<ip> <node>"},
 	{ "addip",           control_addip,		true, "add a ip address to a node", "<ip/mask> <iface>"},
 	{ "delip",           control_delip,		false, "delete an ip address from a node", "<ip>"},


-- 
CTDB repository


More information about the samba-cvs mailing list