svn commit: samba r13347 - in branches/SAMBA_4_0/source/lib: . ldb/common

idra at samba.org idra at samba.org
Sat Feb 4 16:44:28 GMT 2006


Author: idra
Date: 2006-02-04 16:44:27 +0000 (Sat, 04 Feb 2006)
New Revision: 13347

WebSVN: http://websvn.samba.org/cgi-bin/viewcvs.cgi?view=rev&root=samba&rev=13347

Log:

- Now we compare values with an optimized utf8
safe function if the user provides an utf8
compliant casefold function to ldb.

- Fix toupper_m and tolower_m to not crash if
the case tables are not found

- Let load_case_table() search into the correct
directory in the search tree for the case
tables so that we can test utf8

Simo


Modified:
   branches/SAMBA_4_0/source/lib/ldb/common/attrib_handlers.c
   branches/SAMBA_4_0/source/lib/util_unistr.c


Changeset:
Modified: branches/SAMBA_4_0/source/lib/ldb/common/attrib_handlers.c
===================================================================
--- branches/SAMBA_4_0/source/lib/ldb/common/attrib_handlers.c	2006-02-04 14:08:24 UTC (rev 13346)
+++ branches/SAMBA_4_0/source/lib/ldb/common/attrib_handlers.c	2006-02-04 16:44:27 UTC (rev 13347)
@@ -145,15 +145,24 @@
   compare two case insensitive strings, ignoring multiple whitespaces
   and leading and trailing whitespaces
   see rfc2252 section 8.1
+	
+  try to optimize for the ascii case,
+  but if we find out an utf8 codepoint revert to slower but correct function
 */
 static int ldb_comparison_fold(struct ldb_context *ldb, void *mem_ctx,
 			       const struct ldb_val *v1, const struct ldb_val *v2)
 {
 	const char *s1=(const char *)v1->data, *s2=(const char *)v2->data;
+	char *b1, *b2, *u1, *u2;
+	int ret;
 	while (*s1 == ' ') s1++;
 	while (*s2 == ' ') s2++;
 	/* TODO: make utf8 safe, possibly with helper function from application */
 	while (*s1 && *s2) {
+		/* the first 127 (0x7F) chars are ascii and utf8 guarantes they
+		 * never appear in multibyte sequences */
+		if (((unsigned char)s1[0]) & 0x80) goto utf8str;
+		if (((unsigned char)s2[0]) & 0x80) goto utf8str;
 		if (toupper((unsigned char)*s1) != toupper((unsigned char)*s2))
 			break;
 		if (*s1 == ' ') {
@@ -163,7 +172,7 @@
 		s1++; s2++;
 	}
 	if (! (*s1 && *s2)) {
-		/* remove trailing spaces only if one of the pointers
+		/* check for trailing spaces only if one of the pointers
 		 * has reached the end of the strings otherwise we
 		 * can mistakenly match.
 		 * ex. "domain users" <-> "domainUpdates"
@@ -172,6 +181,30 @@
 		while (*s2 == ' ') s2++;
 	}
 	return (int)(toupper(*s1)) - (int)(toupper(*s2));
+
+utf8str:
+	/* non need to recheck from the start, just from the first utf8 char found */
+	b1 = u1 = ldb_casefold(ldb, mem_ctx, s1);
+	b2 = u2 = ldb_casefold(ldb, mem_ctx, s2);
+	
+	while (*u1 & *u2) {
+		if (*u1 != *u2)
+			break;
+		if (*u1 == ' ') {
+			while (u1[0] == u1[1]) u1++;
+			while (u2[0] == u2[1]) u2++;
+		}
+		u1++; u2++;
+	}
+	if (! (*u1 && *u2)) {
+		while (*u1 == ' ') u1++;
+		while (*u2 == ' ') u2++;
+	}
+	ret = (int)(*u1 - *u2);
+	talloc_free(b1);
+	talloc_free(b2);
+
+	return ret;
 }
 
 /*

Modified: branches/SAMBA_4_0/source/lib/util_unistr.c
===================================================================
--- branches/SAMBA_4_0/source/lib/util_unistr.c	2006-02-04 14:08:24 UTC (rev 13346)
+++ branches/SAMBA_4_0/source/lib/util_unistr.c	2006-02-04 16:44:27 UTC (rev 13347)
@@ -43,10 +43,18 @@
 	lowcase_table = map_file(lib_path(mem_ctx, "lowcase.dat"), 0x20000);
 	talloc_free(mem_ctx);
 	if (upcase_table == NULL) {
-		upcase_table = (void *)-1;
+		/* try also under codepages for testing purposes */
+		upcase_table = map_file("codepages/upcase.dat", 0x20000);
+		if (upcase_table == NULL) {
+			upcase_table = (void *)-1;
+		}
 	}
 	if (lowcase_table == NULL) {
-		lowcase_table = (void *)-1;
+		/* try also under codepages for testing purposes */
+		lowcase_table = map_file("codepages/lowcase.dat", 0x20000);
+		if (lowcase_table == NULL) {
+			lowcase_table = (void *)-1;
+		}
 	}
 }
 
@@ -58,12 +66,12 @@
 	if (val < 128) {
 		return toupper(val);
 	}
+	if (upcase_table == NULL) {
+		load_case_tables();
+	}
 	if (upcase_table == (void *)-1) {
 		return val;
 	}
-	if (upcase_table == NULL) {
-		load_case_tables();
-	}
 	if (val & 0xFFFF0000) {
 		return val;
 	}
@@ -78,12 +86,12 @@
 	if (val < 128) {
 		return tolower(val);
 	}
+	if (lowcase_table == NULL) {
+		load_case_tables();
+	}
 	if (lowcase_table == (void *)-1) {
 		return val;
 	}
-	if (lowcase_table == NULL) {
-		load_case_tables();
-	}
 	if (val & 0xFFFF0000) {
 		return val;
 	}



More information about the samba-cvs mailing list