Rev 773: more efficient freelist allocation in http://samba.org/~tridge/ctdb

tridge at samba.org tridge at samba.org
Fri Jan 18 02:33:03 GMT 2008


------------------------------------------------------------
revno: 773
revision-id:tridge at samba.org-20080118023254-ub38q3s5eqfbhu74
parent: tridge at samba.org-20080117054656-1kya0oikkyj8ocdp
committer: Andrew Tridgell <tridge at samba.org>
branch nick: tridge.stable
timestamp: Fri 2008-01-18 13:32:54 +1100
message:
  more efficient freelist allocation
  This takes advantage of the fact that we can do left merges but not right merges
  By allocating data from the end of the freelist entry rather than the beginning
  we can guarantee that if we immediately free the record again it will be merged
  with the previous freelist entry, reducing fragmentation
modified:
  lib/tdb/common/freelist.c      freelist.c-20070220022425-m1wibgjq7n5hahs6-4
  lib/tdb/common/tdb_private.h   tdb_private.h-20070220022425-m1wibgjq7n5hahs6-10
=== modified file 'lib/tdb/common/freelist.c'
--- a/lib/tdb/common/freelist.c	2008-01-09 22:42:44 +0000
+++ b/lib/tdb/common/freelist.c	2008-01-18 02:32:54 +0000
@@ -208,62 +208,61 @@
 }
 
 
+
 /* 
    the core of tdb_allocate - called when we have decided which
    free list entry to use
+
+   Note that we try to allocate by grabbing data from the end of an existing record,
+   not the beginning. This is so the left merge in a free is more likely to be
+   able to free up the record without fragmentation
  */
-static tdb_off_t tdb_allocate_ofs(struct tdb_context *tdb, tdb_len_t length, tdb_off_t rec_ptr,
-				struct list_struct *rec, tdb_off_t last_ptr)
+static tdb_off_t tdb_allocate_ofs(struct tdb_context *tdb, 
+				  tdb_len_t length, tdb_off_t rec_ptr,
+				  struct list_struct *rec, tdb_off_t last_ptr)
 {
-	struct list_struct newrec;
-	tdb_off_t newrec_ptr;
-
-	memset(&newrec, '\0', sizeof(newrec));
-
-	/* found it - now possibly split it up  */
-	if (rec->rec_len > length + MIN_REC_SIZE) {
-		/* Length of left piece */
-		length = TDB_ALIGN(length, TDB_ALIGNMENT);
-		
-		/* Right piece to go on free list */
-		newrec.rec_len = rec->rec_len - (sizeof(*rec) + length);
-		newrec_ptr = rec_ptr + sizeof(*rec) + length;
-		
-		/* And left record is shortened */
-		rec->rec_len = length;
-	} else {
-		newrec_ptr = 0;
-	}
-	
-	/* Remove allocated record from the free list */
-	if (tdb_ofs_write(tdb, last_ptr, &rec->next) == -1) {
-		return 0;
-	}
-	
-	/* Update header: do this before we drop alloc
-	   lock, otherwise tdb_free() might try to
-	   merge with us, thinking we're free.
-	   (Thanks Jeremy Allison). */
+#define MIN_REC_SIZE (sizeof(struct list_struct) + sizeof(tdb_off_t) + 8)
+
+	if (rec->rec_len < length + MIN_REC_SIZE) {
+		/* we have to grab the whole record */
+
+		/* unlink it from the previous record */
+		if (tdb_ofs_write(tdb, last_ptr, &rec->next) == -1) {
+			return 0;
+		}
+
+		/* mark it not free */
+		rec->magic = TDB_MAGIC;
+		if (tdb_rec_write(tdb, rec_ptr, rec) == -1) {
+			return 0;
+		}
+		return rec_ptr;
+	}
+
+	/* we're going to just shorten the existing record */
+	rec->rec_len -= (length + sizeof(*rec));
+	if (tdb_rec_write(tdb, rec_ptr, rec) == -1) {
+		return 0;
+	}
+	if (update_tailer(tdb, rec_ptr, rec) == -1) {
+		return 0;
+	}
+
+	/* and setup the new record */
+	rec_ptr += sizeof(*rec) + rec->rec_len;	
+
+	memset(rec, '\0', sizeof(*rec));
+	rec->rec_len = length;
 	rec->magic = TDB_MAGIC;
+
 	if (tdb_rec_write(tdb, rec_ptr, rec) == -1) {
 		return 0;
 	}
-	
-	/* Did we create new block? */
-	if (newrec_ptr) {
-		/* Update allocated record tailer (we
-		   shortened it). */
-		if (update_tailer(tdb, rec_ptr, rec) == -1) {
-			return 0;
-		}
-		
-		/* Free new record */
-		if (tdb_free(tdb, newrec_ptr, &newrec) == -1) {
-			return 0;
-		}
+
+	if (update_tailer(tdb, rec_ptr, rec) == -1) {
+		return 0;
 	}
-	
-	/* all done - return the new record offset */
+
 	return rec_ptr;
 }
 
@@ -287,6 +286,7 @@
 
 	/* Extra bytes required for tailer */
 	length += sizeof(tdb_off_t);
+	length = TDB_ALIGN(length, TDB_ALIGNMENT);
 
  again:
 	last_ptr = FREELIST_TOP;
@@ -343,7 +343,8 @@
 			goto fail;
 		}
 
-		newrec_ptr = tdb_allocate_ofs(tdb, length, bestfit.rec_ptr, rec, bestfit.last_ptr);
+		newrec_ptr = tdb_allocate_ofs(tdb, length, bestfit.rec_ptr, 
+					      rec, bestfit.last_ptr);
 		tdb_unlock(tdb, -1, F_WRLCK);
 		return newrec_ptr;
 	}

=== modified file 'lib/tdb/common/tdb_private.h'
--- a/lib/tdb/common/tdb_private.h	2008-01-06 01:33:57 +0000
+++ b/lib/tdb/common/tdb_private.h	2008-01-18 02:32:54 +0000
@@ -49,7 +49,6 @@
 #define TDB_DEAD_MAGIC (0xFEE1DEAD)
 #define TDB_RECOVERY_MAGIC (0xf53bc0e7U)
 #define TDB_ALIGNMENT 4
-#define MIN_REC_SIZE (2*sizeof(struct list_struct) + TDB_ALIGNMENT)
 #define DEFAULT_HASH_SIZE 131
 #define FREELIST_TOP (sizeof(struct tdb_header))
 #define TDB_ALIGN(x,a) (((x) + (a)-1) & ~((a)-1))



More information about the samba-cvs mailing list