New version of source/lib/bitmap.c

okuyamak at dd.iij4u.or.jp okuyamak at dd.iij4u.or.jp
Fri Nov 3 12:23:52 GMT 2000


Dear all,

I've created new version of ./source/lib/bitmap.c
Current bitmap.c did not have any bug, except for performance.

I've passed patche about this routine to SUGJ group before, and
thank to SUGJ, it's been selected.  But this version is even more
faster.

The new version have several points from original bitmap.c:

1) bitmap_allocate() now allocates bitmap in single malloc().
  This will reduce malloc() call at least once,
  and also, because memory area of bm and bm->b is now even more
  closer, cache hit ratio of accession to bm->b will raise ( a
  little bit ).

# This is same with old patch I gave to SUGJ.


2) bitmap_find() now uses ffs(), which is built in function for gcc.
  For the case you're not using ffs(), I also implemented 
  ffs() function for those who's not using ffs().
# Please look at FSF's page or somewhere in internet for 
# ffs() manual page.

  This ffs() will only do 6 case sensitive branch per bit, while
  original does lot more. So, even with newer ffs(), bitmap_find()
  should work lot faster.

  But if you could use built in ffs() of gcc, it'll be even more
  faster, at least in case of PowerPC/Power2, and ix86 machines,
  for they have special instructions for this kind of treatment.


There's one key hack that you must understand to know what ffs()
of my implement is doing. It's this single line:

    val_with_only_lsb	= val & (( ~val )+ 1 );

To understand this, first, think of case when val is 0.
         val == 0x00000000
        ~val == 0xffffffff
( ~val ) + 1 == 0x00000000

and so, 

val & (( ~val )+ 1 ) = 0x00000000 & 0x00000000 == 0


For other values, think like this. Suppose we had val with something like

         val == 0bxxxxxxxx1000000

where x can be 0 or 1, individually. Then, 

        ~val == 0byyyyyyyy0111111

where individual y is invert of corresponding x.

(~val ) + 1  == 0byyyyyyyy1000000

And since each x & y == 0 always ( if x is 1, y is 0. if x is 0, y is 1 ),

val & (( ~val )+ 1 ) = 0bxxxxxxxx1000000 & 0byyyyyyyy1000000
		    == 0b000000001000000

And so, only lsb with bit 1 is selected.

# I've saw this hack in source code of X11R3, for the first
# time. And I'm looking for who( or where ) was this hack first
# created. Anyone have any information about it?


Once 'val_with_only_lsb' was created, we all know that in case if
val_with_only_lsb containd 0, 1 or 2 , we only need to return itself.


Here comes the diff files, and best regards,
---- 
Kenichi Okuyama at Tokyo Research Lab. IBM-Japan, Co.

Index: bitmap.c
===================================================================
RCS file: /cvsroot/samba/source/lib/bitmap.c,v
retrieving revision 1.6
diff -u -b -B -r1.6 bitmap.c
--- bitmap.c	1999/12/13 13:27:13	1.6
+++ bitmap.c	2000/11/03 11:49:02
@@ -26,26 +26,83 @@
 /* these functions provide a simple way to allocate integers from a
    pool without repitition */
 
+#ifdef __GNUC__
+/* let's simply use built in ffs(), for it's usually equivalent, or
+   faster */
+#define HAVE_FFS
+#else
+#undef  HAVE_FFS
+#endif
+
+#define UINT32BITLEN	32
 
+
+
+#ifndef HAVE_FFS
+/*
+ * ffs() will return where lsb of val with 1 is.
+ * if val == 0, then 0 is returned.
+ * if 0x01 then 1, etc.
+ */
+
+static int ffs( unsigned long val )
+{
+    unsigned long       val_with_only_lsb;
+    int                 ret;
+
+    val_with_only_lsb	= val & (( ~val )+ 1 );
+
+    switch ( val_with_only_lsb ) {
+    case 0x00000000:
+        return 0;
+    case 0x00000002:
+    case 0x00000001:
+        return val_with_only_lsb;
+    default:
+        ret	= 0;
+    }
+
+    if ( val_with_only_lsb > 0xffff ) {
+        ret	+= 16;
+        val_with_only_lsb	>>= 16;
+    }
+    if ( val_with_only_lsb > 0xff ) {
+        ret	+= 8;
+        val_with_only_lsb	>>= 8;
+    }
+    if ( val_with_only_lsb > 0xf ) {
+        ret	+= 4;
+        val_with_only_lsb	>>= 4;
+    }
+    if ( val_with_only_lsb > 0xc ) {
+        ret	+= 2;
+        val_with_only_lsb	>>= 2;
+    }
+    return ret + val_with_only_lsb;
+}
+#endif
+
+
 /****************************************************************************
 allocate a bitmap of the specified size
 ****************************************************************************/
 struct bitmap *bitmap_allocate(int n)
 {
 	struct bitmap *bm;
-
-	bm = (struct bitmap *)malloc(sizeof(*bm));
+    int	total_size;
 
-	if (!bm) return NULL;
+    total_size	= sizeof( struct bitmap )
+        + sizeof( uint32 ) * (( n + UINT32BITLEN - 1 ) / UINT32BITLEN );
 	
-	bm->n = n;
-	bm->b = (uint32 *)malloc(sizeof(bm->b[0])*(n+31)/32);
-	if (!bm->b) {
-		free(bm);
+    bm	= ( struct bitmap *)malloc( total_size );
+    if ( bm == NULL ) {
 		return NULL;
 	}
+
+    bm->b	= (uint32 *)( bm + 1 );
+    bm->n   = n;
 
-	memset(bm->b, 0, sizeof(bm->b[0])*(n+31)/32);
+    memset(bm->b, 0, sizeof(bm->b[0])*(n+UINT32BITLEN)/ UINT32BITLEN );
 
 	return bm;
 }
@@ -74,7 +131,7 @@
 		      i, bm->n));
 		return False;
 	}
-	bm->b[i/32] &= ~(1<<(i%32));
+	bm->b[i/UINT32BITLEN] &= ~(1<<(i%UINT32BITLEN));
 	return True;
 }
 
@@ -84,7 +141,7 @@
 BOOL bitmap_query(struct bitmap *bm, unsigned i)
 {
 	if (i >= bm->n) return False;
-	if (bm->b[i/32] & (1<<(i%32))) {
+	if (bm->b[i/UINT32BITLEN] & (1<<(i%UINT32BITLEN))) {
 		return True;
 	}
 	return False;
@@ -96,35 +153,40 @@
 ****************************************************************************/
 int bitmap_find(struct bitmap *bm, unsigned ofs)
 {
-	int i, j;
+    int		slide_word, maxwords, startword;
+    uint32	bf_first, bf_last, firstmask;
+    int		i, pos;
+
+    if (ofs > bm->n) {
+        ofs = 0;
+    }
+
+    maxwords	= ( bm->n + UINT32BITLEN - 1 ) / UINT32BITLEN;
+    startword	= ofs / UINT32BITLEN;
+    firstmask	= ( ~0  ) << ( ofs % UINT32BITLEN );
 
-	if (ofs > bm->n) ofs = 0;
+    bf_first	= bm->b[startword] & firstmask;
+    bf_last     = bm->b[startword] & ( ~firstmask );
 
-	i = ofs;
-	while (i < bm->n) {
-		if (~(bm->b[i/32])) {
-			j = i;
-			do {
-				if (!bitmap_query(bm, j)) return j;
-				j++;
-			} while (j & 31 && j < bm->n);
-		}
-		i += 32;
-		i &= ~31;
-	}
-
-	i = 0;
-	while (i < ofs) {
-		if (~(bm->b[i/32])) {
-			j = i;
-			do {
-				if (!bitmap_query(bm, j)) return j;
-				j++;
-			} while (j & 31 && j < bm->n);
+    if ( ~bf_first ) {
+        pos	= ffs( ~bf_first );
+        return pos + ( startword * UINT32BITLEN ) - 1;
 		}
-		i += 32;
-		i &= ~31;
+    for ( i = startword+1; i < maxwords; i++ ) {
+        if ( ~( bm->b[i] ) ) {
+            pos	= ffs( ~( bm->b[i] ));
+            return pos + ( i * UINT32BITLEN ) - 1;
 	}
-
+    }
+    for ( i = 0; i < startword; i++ ) {
+        if ( ~( bm->b[i] )) {
+            pos	= ffs( ~( bm->b[i] ));
+            return pos + ( i * UINT32BITLEN ) - 1;
+        }
+    }
+    if ( ~bf_last ) {
+        pos	= ffs( ~bf_last );
+        return pos + ( startword * UINT32BITLEN ) - 1;
+    }
 	return -1;
 }




More information about the samba-technical mailing list