[SCM] CTDB repository - branch 1.0.82 updated - ctdb-1.0.82-32-gcc816a0

Thu Oct 1 20:27:33 MDT 2009

The branch, 1.0.82 has been updated
       via  cc816a010b05a23ec28a6d66cf2f361686540ed5 (commit)
       via  09d61629b56eac3336a805f75abdd353abbe54fa (commit)
       via  50e9284a294d17f48c47996e91ed542f57b2a77e (commit)
       via  6e25de53bca25587d497f0d6ecdf43b5dedaea7e (commit)
       via  d01fe5ec78d5372d5f630d04f9c39e7a220c9448 (commit)
       via  dca1df5bb04e1742115160dff64a084bd02b47e9 (commit)
       via  5f54fbbee30727ca52fc0e582d2b8a807cd2766f (commit)
       via  bd7bc6b1aca214af4bcca127c9f894c28dd5b9b7 (commit)
       via  e374e3694a31794641d75135156e2c44e11f296e (commit)
       via  11c2428713421672f631fe25ed6228a5db4adf76 (commit)
       via  fad35c8037114e911768791c045df208857a9af7 (commit)
      from  a996a381094cfa75e4222498e68c4528cffebc56 (commit)

http://gitweb.samba.org/?p=sahlberg/ctdb.git;a=shortlog;h=1.0.82


- Log -----------------------------------------------------------------
commit cc816a010b05a23ec28a6d66cf2f361686540ed5
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Fri Oct 2 12:11:26 2009 +1000

    new version 1.0.82-8

commit 09d61629b56eac3336a805f75abdd353abbe54fa
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Tue Sep 29 13:20:18 2009 +1000

    From Wolfgang Mueller-Friedt
    
    Remove the explicit vacuum/repack commands from the 00.ctdb eventscript
    and implement this in the ctdb daemon.
    
    Combine vacuuming and repacking into one
    cheap read traverse to enumerate all candidate records
    and one write traverse that both repacks the database and also deletes the record locally where we are lmaster and where the records have already been deleted remotely.
    
    this code also adds initial autotuning heuristics for the vacuum intervals and how many records to delete in each iteration.
    
    minor stylish changes made by ronnie s

commit 50e9284a294d17f48c47996e91ed542f57b2a77e
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Wed Jul 29 13:31:12 2009 +1000

    change the defaults for repacking to repack once every 120 seconds and letting it work for 30 second before timing out.

commit 6e25de53bca25587d497f0d6ecdf43b5dedaea7e
Author: Wolfgang Mueller-Friedt <wolfmuel at de.ibm.com>
Date:   Tue Jul 28 23:09:28 2009 +0300

    repack limit tunable
    
    Signed-off-by: Wolfgang Mueller-Friedt <wolfmuel at de.ibm.com>

commit d01fe5ec78d5372d5f630d04f9c39e7a220c9448
Author: Wolfgang Mueller-Friedt <wolfmuel at de.ibm.com>
Date:   Tue Jul 28 17:49:41 2009 +0300

    remove repack from eventscript
    
    Signed-off-by: Wolfgang Mueller-Friedt <wolfmuel at de.ibm.com>

commit dca1df5bb04e1742115160dff64a084bd02b47e9
Author: Wolfgang Mueller-Friedt <wolfmuel at de.ibm.com>
Date:   Tue Jul 28 17:45:31 2009 +0300

    added event repacking
    
    Signed-off-by: Wolfgang Mueller-Friedt <wolfmuel at de.ibm.com>

commit 5f54fbbee30727ca52fc0e582d2b8a807cd2766f
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Thu Jul 23 16:03:39 2009 +1000

    vacuum event framework
    
    Signed-off-by: Ronnie Sahlberg <ronniesahlberg at gmail.com>
    Signed-off-by: Wolfgang Mueller-Friedt <wolfmuel at de.ibm.com>

commit bd7bc6b1aca214af4bcca127c9f894c28dd5b9b7
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Wed Jul 29 13:25:43 2009 +1000

    initial part of new vacuuming patch.
    
    create some new fields for ctdb_db and tunables

commit e374e3694a31794641d75135156e2c44e11f296e
Author: Martin Schwenke <martin at meltin.net>
Date:   Wed Sep 30 21:21:56 2009 +1000

    Minor fixes to 01.reclock eventscript.
    
    test -z really needs its argument to be quoted.  Simplified a status
    test.
    
    Signed-off-by: Martin Schwenke <martin at meltin.net>

commit 11c2428713421672f631fe25ed6228a5db4adf76
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Mon Sep 28 14:12:59 2009 +1000

    change the reclock fail count to 19 monitor intervals before we shut down ctdbd

commit fad35c8037114e911768791c045df208857a9af7
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Mon Sep 28 14:06:40 2009 +1000

        add a new eventscript 01.reclock
    
        if the reclock file has been set, then this script will test that the
        reclock file can actually be accessed.
        if the file does not exist, or if the attempts to stat the file hangs,
        the node will be marked unhealthy after the third failed monitoring event
        and after the tenth failure, ctdb itself will shutdown.

-----------------------------------------------------------------------

Summary of changes:
 Makefile.in                |    4 +-
 config/events.d/00.ctdb    |   23 --
 config/events.d/01.reclock |   57 +++
 include/ctdb_private.h     |    9 +
 packaging/RPM/ctdb.spec    |    6 +-
 server/ctdb_ltdb_server.c  |    8 +
 server/ctdb_tunables.c     |    6 +
 server/ctdb_vacuum.c       |  875 ++++++++++++++++++++++++++++++++++++++++++++
 8 files changed, 963 insertions(+), 25 deletions(-)
 create mode 100755 config/events.d/01.reclock
 create mode 100644 server/ctdb_vacuum.c


Changeset truncated at 500 lines:

diff --git a/Makefile.in b/Makefile.in
index 93d9438..a319840 100755
--- a/Makefile.in
+++ b/Makefile.in
@@ -55,7 +55,8 @@ CTDB_SERVER_OBJ = server/ctdbd.o server/ctdb_daemon.o server/ctdb_lockwait.o \
 	server/ctdb_control.o server/ctdb_call.o server/ctdb_ltdb_server.o \
 	server/ctdb_traverse.o server/eventscript.o server/ctdb_takeover.o \
 	server/ctdb_serverids.o server/ctdb_persistent.o \
-	server/ctdb_keepalive.o server/ctdb_logging.o server/ctdb_uptime.c \
+	server/ctdb_keepalive.o server/ctdb_logging.o server/ctdb_uptime.o \
+	server/ctdb_vacuum.o \
 	$(CTDB_CLIENT_OBJ) $(CTDB_TCP_OBJ) @INFINIBAND_WRAPPER_OBJ@
 
 TEST_BINS=tests/bin/ctdb_bench tests/bin/ctdb_fetch tests/bin/ctdb_store \
@@ -213,6 +214,7 @@ install: all
 	${INSTALLCMD} -m 644 config/events.d/README $(DESTDIR)$(docdir)/ctdb/README.eventscripts
 	${INSTALLCMD} -m 644 doc/recovery-process.txt $(DESTDIR)$(docdir)/ctdb/recovery-process.txt
 	${INSTALLCMD} -m 755 config/events.d/00.ctdb $(DESTDIR)$(etcdir)/ctdb/events.d
+	${INSTALLCMD} -m 755 config/events.d/01.reclock $(DESTDIR)$(etcdir)/ctdb/events.d
 	${INSTALLCMD} -m 755 config/events.d/10.interface $(DESTDIR)$(etcdir)/ctdb/events.d
 	${INSTALLCMD} -m 755 config/events.d/11.natgw $(DESTDIR)$(etcdir)/ctdb/events.d
 	${INSTALLCMD} -m 755 config/events.d/11.routing $(DESTDIR)$(etcdir)/ctdb/events.d
diff --git a/config/events.d/00.ctdb b/config/events.d/00.ctdb
index a248b4e..90fd92f 100755
--- a/config/events.d/00.ctdb
+++ b/config/events.d/00.ctdb
@@ -18,18 +18,6 @@ PATH=/bin:/usr/bin:$PATH
 cmd="$1"
 shift
 
-# set default samba cleanup period - in minutes
-[ -z "$CTDB_VACUUM_PERIOD" ] && {
-    CTDB_VACUUM_PERIOD=5
-}
-
-###########################
-# periodic vacuum function
-periodic_vacuum() {
-    # this cleans up dead records and repacks the databases
-    ( time ctdb vacuum 200000 -T 30; time ctdb repack -T 30 ) > $CTDB_BASE/state/vacuum.log 2>&1 &
-}
-
 case $cmd in 
      startup)
         # make sure we have a blank state directory for the scripts to work with
@@ -47,17 +35,6 @@ case $cmd in
 	;;
 
     monitor)
-	# Create a dummy file to track when we need to do periodic cleanup
-	# of samba databases
-	[ -f $CTDB_BASE/state/periodic_vacuum ] || {
-		touch $CTDB_BASE/state/periodic_vacuum
-	}
-	[ `/usr/bin/find $CTDB_BASE/state/periodic_vacuum -mmin +$CTDB_VACUUM_PERIOD | wc -l` -eq 1 ] && {
-		# vacuum the databases
-		touch $CTDB_BASE/state/periodic_vacuum
-	    	periodic_vacuum
-	}
-
 	# monitor that we are not running out of memory
 	[ -z "$CTDB_MONITOR_FREE_MEMORY" ] || {
 		FREE_MEM=`free -m | grep "buffers/cache" | while read A B C D ;do /bin/echo -n $D ; done`
diff --git a/config/events.d/01.reclock b/config/events.d/01.reclock
new file mode 100755
index 0000000..911f7dc
--- /dev/null
+++ b/config/events.d/01.reclock
@@ -0,0 +1,57 @@
+#!/bin/sh
+# script to check accessibility to the reclock file on a node
+
+. $CTDB_BASE/functions
+loadconfig ctdb
+
+cmd="$1"
+shift
+
+PATH=/usr/bin:/bin:/usr/sbin:/sbin:$PATH
+
+# The size of this file represents the number of intervals that have
+# passed when we have tried to but failed to stat the reclock file.
+# after third failure the node becomes unhealthy
+# after the twenteth failure the node we shutdown ctdbd
+RECLOCKCOUNT="$CTDB_BASE/state/reclock-fail-count"
+
+case $cmd in 
+     startup)
+	echo -n > $RECLOCKCOUNT
+	;;
+
+      monitor)
+	echo -n 1 >> $RECLOCKCOUNT
+
+	COUNT=`ls -ln $RECLOCKCOUNT | cut -d" " -f5`
+	[ $COUNT -gt 19 ] && {
+		echo "Reclock file can not be accessed. Shutting down."
+		sleep 1
+		ctdb shutdown
+	}
+
+	RECLOCKFILE=`ctdb -Y getreclock`
+	[ -z "$RECLOCKFILE" ] && {
+		# we are not using a reclock file
+		echo -n > $RECLOCKCOUNT
+		exit 0
+	}
+
+	# try stat the reclock file as a background process
+	# so that we dont block in case the cluster filesystem is unavailable
+	(
+		stat $RECLOCKFILE && {
+			# we could stat the file, reset the counter
+			echo -n > $RECLOCKCOUNT
+		}
+	) >/dev/null 2>/dev/null &
+
+
+	[ $COUNT -gt 2 ] && {
+		echo "Reclock file can not be accessed. Mark node UNHEALTHY."
+		exit 1;
+	}
+	;;
+esac
+
+exit 0
diff --git a/include/ctdb_private.h b/include/ctdb_private.h
index 5ecf8a6..2e3b472 100644
--- a/include/ctdb_private.h
+++ b/include/ctdb_private.h
@@ -108,6 +108,12 @@ struct ctdb_tunable {
 	uint32_t reclock_latency_ms;
 	uint32_t recovery_drop_all_ips;
 	uint32_t verify_recovery_lock;
+	uint32_t vacuum_default_interval;
+	uint32_t vacuum_max_run_time;
+	uint32_t repack_limit;
+	uint32_t vacuum_limit;
+	uint32_t vacuum_min_interval;
+	uint32_t vacuum_max_interval;
 };
 
 /*
@@ -436,6 +442,7 @@ struct ctdb_db_context {
 	uint32_t seqnum;
 	struct timed_event *te;
 	struct ctdb_traverse_local_handle *traverse;
+	struct ctdb_vacuum_handle *vacuum_handle;
 };
 
 
@@ -1440,4 +1447,6 @@ int32_t ctdb_control_get_event_script_status(struct ctdb_context *ctdb, TDB_DATA
 int ctdb_log_event_script_output(struct ctdb_context *ctdb, char *str, uint16_t len);
 int ctdb_ctrl_report_recd_lock_latency(struct ctdb_context *ctdb, struct timeval timeout, double latency);
 
+int ctdb_vacuum_init(struct ctdb_db_context *ctdb_db);
+
 #endif
diff --git a/packaging/RPM/ctdb.spec b/packaging/RPM/ctdb.spec
index ac36b51..7e3d1db 100644
--- a/packaging/RPM/ctdb.spec
+++ b/packaging/RPM/ctdb.spec
@@ -5,7 +5,7 @@ Vendor: Samba Team
 Packager: Samba Team <samba at samba.org>
 Name: ctdb
 Version: 1.0.82
-Release: 7
+Release: 8
 Epoch: 0
 License: GNU GPL version 3
 Group: System Environment/Daemons
@@ -104,6 +104,7 @@ fi
 %{_docdir}/ctdb/README.eventscripts
 %{_docdir}/ctdb/recovery-process.txt
 %{_sysconfdir}/ctdb/events.d/00.ctdb
+%{_sysconfdir}/ctdb/events.d/01.reclock
 %{_sysconfdir}/ctdb/events.d/10.interface
 %{_sysconfdir}/ctdb/events.d/11.natgw
 %{_sysconfdir}/ctdb/events.d/11.routing
@@ -132,6 +133,9 @@ fi
 %{_libdir}/pkgconfig/ctdb.pc
 
 %changelog
+* Fri Oct 2 2009 : Version 1.0.82-8
+ - Backported new vacuuming design from HEAD
+ - Backported new eventscript 01.reclock from HEAD
 * Mon Jul 27 2009 : Version 1.0.82-7
  - Better logmessages when the system is shutting down and fails to allocate a network packet.
  - Reorder the eventscripts for static routes to run before the service scripts
diff --git a/server/ctdb_ltdb_server.c b/server/ctdb_ltdb_server.c
index b330768..e76a50a 100644
--- a/server/ctdb_ltdb_server.c
+++ b/server/ctdb_ltdb_server.c
@@ -291,6 +291,14 @@ static int ctdb_local_attach(struct ctdb_context *ctdb, const char *db_name, boo
 		return -1;
 	}
 
+	ret = ctdb_vacuum_init(ctdb_db);
+	if (ret != 0) {
+		DEBUG(DEBUG_CRIT,("Failed to setup vacuuming for database '%s'\n", ctdb_db->db_name));
+		talloc_free(ctdb_db);
+		return -1;
+	}
+
+
 	DEBUG(DEBUG_INFO,("Attached to database '%s'\n", ctdb_db->db_path));
 	
 	/* success */
diff --git a/server/ctdb_tunables.c b/server/ctdb_tunables.c
index 6a8876f..77cfe5c 100644
--- a/server/ctdb_tunables.c
+++ b/server/ctdb_tunables.c
@@ -56,6 +56,12 @@ static const struct {
 	{ "RecLockLatencyMs",  1000,  offsetof(struct ctdb_tunable, reclock_latency_ms) },
 	{ "RecoveryDropAllIPs",  60,  offsetof(struct ctdb_tunable, recovery_drop_all_ips) },
 	{ "VerifyRecoveryLock",   1,  offsetof(struct ctdb_tunable, verify_recovery_lock) },
+	{ "VacuumDefaultInterval", 300,  offsetof(struct ctdb_tunable, vacuum_default_interval) },
+	{ "VacuumMaxRunTime",     30,  offsetof(struct ctdb_tunable, vacuum_max_run_time) },
+	{ "RepackLimit",      10000,  offsetof(struct ctdb_tunable, repack_limit) },
+	{ "VacuumLimit",       5000,  offsetof(struct ctdb_tunable, vacuum_limit) },
+	{ "VacuumMinInterval",   60,  offsetof(struct ctdb_tunable, vacuum_min_interval) },
+	{ "VacuumMaxInterval",  600,  offsetof(struct ctdb_tunable, vacuum_max_interval) }
 };
 
 /*
diff --git a/server/ctdb_vacuum.c b/server/ctdb_vacuum.c
new file mode 100644
index 0000000..69991b5
--- /dev/null
+++ b/server/ctdb_vacuum.c
@@ -0,0 +1,875 @@
+/*
+   ctdb vacuuming events
+
+   Copyright (C) Ronnie Sahlberg  2009
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "includes.h"
+#include "lib/events/events.h"
+#include "lib/tdb/include/tdb.h"
+#include "system/network.h"
+#include "system/filesys.h"
+#include "system/dir.h"
+#include "../include/ctdb_private.h"
+#include "db_wrap.h"
+#include "lib/util/dlinklist.h"
+#include "lib/events/events.h"
+#include "../include/ctdb_private.h"
+#include "../common/rb_tree.h"
+
+#define TIMELIMIT() timeval_current_ofs(10, 0)
+#define TUNINGDBNAME "vactune.tdb"
+
+enum vacuum_child_status { VACUUM_RUNNING, VACUUM_OK, VACUUM_ERROR, VACUUM_TIMEOUT};
+
+struct ctdb_vacuum_child_context {
+	struct ctdb_vacuum_handle *vacuum_handle;
+	int fd[2];
+	pid_t child_pid;
+	enum vacuum_child_status status;
+	struct timeval start_time;
+};
+
+struct ctdb_vacuum_handle {
+	struct ctdb_db_context *ctdb_db;
+	struct ctdb_vacuum_child_context *child_ctx;
+};
+
+
+/*  a list of records to possibly delete */
+struct vacuum_data {
+	uint32_t vacuum_limit;
+	uint32_t repack_limit;
+	struct ctdb_context *ctdb;
+	struct ctdb_db_context *ctdb_db;
+	struct tdb_context *dest_db;
+	trbt_tree_t *delete_tree;
+	uint32_t delete_count;
+	struct ctdb_marshall_buffer **list;
+	struct timeval start;
+	bool traverse_error;
+	bool vacuum;
+	uint32_t total;
+	uint32_t vacuumed;
+	uint32_t copied;
+};
+
+/* tuning information stored for every db */
+struct vacuum_tuning_data {
+	uint32_t last_num_repack;
+	uint32_t last_num_empty;
+	uint32_t last_interval;
+	uint32_t new_interval;
+	struct timeval last_start;
+	double   last_duration;
+};
+
+/* this structure contains the information for one record to be deleted */
+struct delete_record_data {
+	struct ctdb_context *ctdb;
+	struct ctdb_db_context *ctdb_db;
+	struct ctdb_ltdb_header hdr;
+	TDB_DATA key;
+};
+
+struct delete_records_list {
+	struct ctdb_marshall_buffer *records;
+};
+
+static void ctdb_vacuum_event(struct event_context *ev, struct timed_event *te, 
+							  struct timeval t, void *private_data);
+
+
+/*
+ * traverse function for gathering the records that can be deleted
+ */
+static int vacuum_traverse(struct tdb_context *tdb, TDB_DATA key, TDB_DATA data, void *private)
+{
+	struct vacuum_data *vdata = talloc_get_type(private, struct vacuum_data);
+	struct ctdb_context *ctdb = vdata->ctdb;
+	struct ctdb_db_context *ctdb_db = vdata->ctdb_db;
+	uint32_t lmaster;
+	struct ctdb_ltdb_header *hdr;
+	struct ctdb_rec_data *rec;
+	size_t old_size;
+	       
+	lmaster = ctdb_lmaster(ctdb, &key);
+	if (lmaster >= ctdb->vnn_map->size) {
+		return 0;
+	}
+
+	if (data.dsize != sizeof(struct ctdb_ltdb_header)) {
+		/* its not a deleted record */
+		return 0;
+	}
+
+	hdr = (struct ctdb_ltdb_header *)data.dptr;
+
+	if (hdr->dmaster != ctdb->pnn) {
+		return 0;
+	}
+
+	/* is this a records we could possibly delete? I.e.
+	   if the record is empty and also we are both lmaster
+	   and dmaster for the record we should be able to delete it
+	*/
+	if (lmaster == ctdb->pnn) {
+		uint32_t hash;
+
+		hash = ctdb_hash(&key);
+		if (trbt_lookup32(vdata->delete_tree, hash)) {
+			DEBUG(DEBUG_INFO, (__location__ " Hash collission when vacuuming, skipping this record.\n"));
+		} 
+		else {
+			struct delete_record_data *dd;
+
+			/* store key and header indexed by the key hash */
+			dd = talloc_zero(vdata->delete_tree, struct delete_record_data);
+			if (dd == NULL) {
+				DEBUG(DEBUG_ERR,(__location__ " Out of memory\n"));
+				return -1;
+			}
+			dd->ctdb      = ctdb;
+			dd->ctdb_db   = ctdb_db;
+			dd->key.dsize = key.dsize;
+			dd->key.dptr  = talloc_memdup(dd, key.dptr, key.dsize);
+			if (dd->key.dptr == NULL) {
+				DEBUG(DEBUG_ERR,(__location__ " Out of memory\n"));
+				return -1;
+			}
+
+			dd->hdr = *hdr;
+	
+			trbt_insert32(vdata->delete_tree, hash, dd);
+
+			vdata->delete_count++;
+		}
+	}
+
+	/* add the record to the blob ready to send to the nodes */
+	rec = ctdb_marshall_record(vdata->list[lmaster], ctdb->pnn, key, NULL, tdb_null);
+	if (rec == NULL) {
+		DEBUG(DEBUG_ERR,(__location__ " Out of memory\n"));
+		vdata->traverse_error = true;
+		return -1;
+	}
+	old_size = talloc_get_size(vdata->list[lmaster]);
+	vdata->list[lmaster] = talloc_realloc_size(NULL, vdata->list[lmaster], 
+						   old_size + rec->length);
+	if (vdata->list[lmaster] == NULL) {
+		DEBUG(DEBUG_ERR,(__location__ " Failed to expand\n"));
+		vdata->traverse_error = true;
+		return -1;
+	}
+	vdata->list[lmaster]->count++;
+	memcpy(old_size+(uint8_t *)vdata->list[lmaster], rec, rec->length);
+	talloc_free(rec);
+
+	vdata->total++;
+
+	return 0;
+}
+
+/*
+ * traverse the tree of records to delete and marshall them into
+ * a blob
+ */
+static void delete_traverse(void *param, void *data)
+{
+	struct delete_record_data *dd = talloc_get_type(data, struct delete_record_data);
+	struct delete_records_list *recs = talloc_get_type(param, struct delete_records_list);
+	struct ctdb_rec_data *rec;
+	size_t old_size;
+
+	rec = ctdb_marshall_record(dd, recs->records->db_id, dd->key, &dd->hdr, tdb_null);
+	if (rec == NULL) {
+		DEBUG(DEBUG_ERR, (__location__ " failed to marshall record\n"));
+		return;
+	}
+
+	old_size = talloc_get_size(recs->records);
+	recs->records = talloc_realloc_size(NULL, recs->records, old_size + rec->length);
+	if (recs->records == NULL) {
+		DEBUG(DEBUG_ERR,(__location__ " Failed to expand\n"));
+		return;
+	}
+	recs->records->count++;
+	memcpy(old_size+(uint8_t *)(recs->records), rec, rec->length);
+}
+
+/* 
+ * read-only traverse the database in order to find
+ * records that can be deleted and try to delete these
+ * records on the other nodes
+ * this executes in the child context
+ */
+static int ctdb_vacuum_db(struct ctdb_db_context *ctdb_db, struct vacuum_data *vdata)
+{
+	struct ctdb_context *ctdb = ctdb_db->ctdb;
+	const char *name = ctdb_db->db_name;
+	int ret, i, pnn;
+
+	ret = ctdb_ctrl_getvnnmap(ctdb, TIMELIMIT(), CTDB_CURRENT_NODE, ctdb, &ctdb->vnn_map);
+	if (ret != 0) {
+		DEBUG(DEBUG_ERR, ("Unable to get vnnmap from local node\n"));
+		return ret;
+	}
+
+	pnn = ctdb_ctrl_getpnn(ctdb, TIMELIMIT(), CTDB_CURRENT_NODE);
+	if (pnn == -1) {
+		DEBUG(DEBUG_ERR, ("Unable to get pnn from local node\n"));
+		return -1;
+	}
+
+	ctdb->pnn = pnn;
+	/* the list needs to be of length num_nodes */
+	vdata->list = talloc_array(vdata, struct ctdb_marshall_buffer *, ctdb->vnn_map->size);
+	if (vdata->list == NULL) {
+		DEBUG(DEBUG_ERR,(__location__ " Out of memory\n"));
+		return -1;
+	}
+	for (i = 0; i < ctdb->vnn_map->size; i++) {
+		vdata->list[i] = (struct ctdb_marshall_buffer *)
+			talloc_zero_size(vdata->list, 
+							 offsetof(struct ctdb_marshall_buffer, data));
+		if (vdata->list[i] == NULL) {
+			DEBUG(DEBUG_ERR,(__location__ " Out of memory\n"));
+			return -1;
+		}
+		vdata->list[i]->db_id = ctdb_db->db_id;
+	}
+
+	/* read-only traverse, looking for records that might be able to be vacuumed */
+	if (tdb_traverse_read(ctdb_db->ltdb->tdb, vacuum_traverse, vdata) == -1 ||
+	    vdata->traverse_error) {
+		DEBUG(DEBUG_ERR,(__location__ " Traverse error in vacuuming '%s'\n", name));
+		return -1;		
+	}
+
+	for ( i = 0; i < ctdb->vnn_map->size; i++) {
+		if (vdata->list[i]->count == 0) {
+			continue;
+		}
+
+		/* for records where we are not the lmaster, tell the lmaster to fetch the record */
+		if (ctdb->vnn_map->map[i] != ctdb->pnn) {
+			TDB_DATA data;


-- 
CTDB repository