[SCM] Samba Shared Repository - branch master updated

Andrew Bartlett abartlet at samba.org
Wed Mar 23 01:22:01 MDT 2011


The branch, master has been updated
       via  41051fd lib/util: Merge basic string length and comparison functions
       via  89fd1cb s3-lib prepare Samba3 to use common codepoint based string functions
      from  d73db40 s3-safe_string: Add checked_strlcpy()

http://gitweb.samba.org/?p=samba.git;a=shortlog;h=master


- Log -----------------------------------------------------------------
commit 41051fd3d3ac7450771518aa12b660867ed7e819
Author: Andrew Bartlett <abartlet at samba.org>
Date:   Fri Mar 18 19:10:23 2011 +1100

    lib/util: Merge basic string length and comparison functions
    
    These functions now use the codepoints for more accurate string
    handling and now form common code.
    
    Andrew Bartlett
    
    Autobuild-User: Andrew Bartlett <abartlet at samba.org>
    Autobuild-Date: Wed Mar 23 08:21:54 CET 2011 on sn-devel-104

commit 89fd1cb767c966d5ba60323bdb04a6baf973e4a3
Author: Andrew Bartlett <abartlet at samba.org>
Date:   Wed Mar 23 17:00:02 2011 +1100

    s3-lib prepare Samba3 to use common codepoint based string functions
    
    This patch changes the source3 util_str.c functions so that the next
    patch just contains the move into common code, without code changes.
    
    Andrew Bartlett

-----------------------------------------------------------------------

Summary of changes:
 lib/util/charset/charset.h       |    2 +-
 lib/util/charset/tests/charset.c |   16 +-
 lib/util/charset/util_str.c      |  416 ++++++++++++++++++++++++++++++++++++++
 lib/util/charset/util_unistr.c   |  334 +------------------------------
 lib/util/charset/wscript_build   |    2 +-
 source3/Makefile.in              |    2 +-
 source3/lib/util_str.c           |  318 -----------------------------
 7 files changed, 428 insertions(+), 662 deletions(-)
 create mode 100644 lib/util/charset/util_str.c


Changeset truncated at 500 lines:

diff --git a/lib/util/charset/charset.h b/lib/util/charset/charset.h
index 474d77e..943bfa4 100644
--- a/lib/util/charset/charset.h
+++ b/lib/util/charset/charset.h
@@ -128,7 +128,7 @@ size_t strlen_m_term_null(const char *s);
 size_t strlen_m(const char *s);
 char *alpha_strcpy(char *dest, const char *src, const char *other_safe_chars, size_t maxlength);
 void string_replace_m(char *s, char oldc, char newc);
-bool strcsequal_m(const char *s1,const char *s2);
+bool strcsequal(const char *s1,const char *s2);
 bool strequal_m(const char *s1, const char *s2);
 int strncasecmp_m(const char *s1, const char *s2, size_t n);
 bool next_token(const char **ptr,char *buff, const char *sep, size_t bufsize);
diff --git a/lib/util/charset/tests/charset.c b/lib/util/charset/tests/charset.c
index 72fd11b..351b91c 100644
--- a/lib/util/charset/tests/charset.c
+++ b/lib/util/charset/tests/charset.c
@@ -69,14 +69,14 @@ static bool test_strequal_m(struct torture_context *tctx)
 	return true;
 }
 
-static bool test_strcsequal_m(struct torture_context *tctx)
+static bool test_strcsequal(struct torture_context *tctx)
 {
-	torture_assert(tctx, !strcsequal_m("foo", "bar"), "different strings");
-	torture_assert(tctx, strcsequal_m("foo", "foo"), "same case strings");
-	torture_assert(tctx, !strcsequal_m("foo", "Foo"), "different case strings");
-	torture_assert(tctx, !strcsequal_m(NULL, "Foo"), "one NULL");
-	torture_assert(tctx, !strcsequal_m("foo", NULL), "other NULL");
-	torture_assert(tctx, strcsequal_m(NULL, NULL), "both NULL");
+	torture_assert(tctx, !strcsequal("foo", "bar"), "different strings");
+	torture_assert(tctx, strcsequal("foo", "foo"), "same case strings");
+	torture_assert(tctx, !strcsequal("foo", "Foo"), "different case strings");
+	torture_assert(tctx, !strcsequal(NULL, "Foo"), "one NULL");
+	torture_assert(tctx, !strcsequal("foo", NULL), "other NULL");
+	torture_assert(tctx, strcsequal(NULL, NULL), "both NULL");
 	return true;
 }
 
@@ -253,7 +253,7 @@ struct torture_suite *torture_local_charset(TALLOC_CTX *mem_ctx)
 	torture_suite_add_simple_test(suite, "codepoint_cmpi", test_codepoint_cmpi);
 	torture_suite_add_simple_test(suite, "strcasecmp_m", test_strcasecmp_m);
 	torture_suite_add_simple_test(suite, "strequal_m", test_strequal_m);
-	torture_suite_add_simple_test(suite, "strcsequal_m", test_strcsequal_m);
+	torture_suite_add_simple_test(suite, "strcsequal", test_strcsequal);
 	torture_suite_add_simple_test(suite, "string_replace_m", test_string_replace_m);
 	torture_suite_add_simple_test(suite, "strncasecmp_m", test_strncasecmp_m);
 	torture_suite_add_simple_test(suite, "next_token", test_next_token);
diff --git a/lib/util/charset/util_str.c b/lib/util/charset/util_str.c
new file mode 100644
index 0000000..597b031
--- /dev/null
+++ b/lib/util/charset/util_str.c
@@ -0,0 +1,416 @@
+/*
+   Unix SMB/CIFS implementation.
+   Samba utility functions
+   Copyright (C) Andrew Tridgell 1992-2001
+   Copyright (C) Simo Sorce 2001
+   Copyright (C) Andrew Bartlett 2011
+   Copyright (C) Jeremy Allison  1992-2007
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "includes.h"
+#include "system/locale.h"
+
+#ifdef strcasecmp
+#undef strcasecmp
+#endif
+
+/**
+ Case insensitive string compararison
+**/
+_PUBLIC_ int strcasecmp_m(const char *s1, const char *s2)
+{
+	codepoint_t c1=0, c2=0;
+	size_t size1, size2;
+	struct smb_iconv_convenience *iconv_convenience = get_iconv_convenience();
+
+	/* handle null ptr comparisons to simplify the use in qsort */
+	if (s1 == s2) return 0;
+	if (s1 == NULL) return -1;
+	if (s2 == NULL) return 1;
+
+	while (*s1 && *s2) {
+		c1 = next_codepoint_convenience(iconv_convenience, s1, &size1);
+		c2 = next_codepoint_convenience(iconv_convenience, s2, &size2);
+
+		s1 += size1;
+		s2 += size2;
+
+		if (c1 == c2) {
+			continue;
+		}
+
+		if (c1 == INVALID_CODEPOINT ||
+		    c2 == INVALID_CODEPOINT) {
+			/* what else can we do?? */
+			return strcasecmp(s1, s2);
+		}
+
+		if (toupper_m(c1) != toupper_m(c2)) {
+			return c1 - c2;
+		}
+	}
+
+	return *s1 - *s2;
+}
+
+/**
+ Case insensitive string compararison, length limited
+**/
+_PUBLIC_ int strncasecmp_m(const char *s1, const char *s2, size_t n)
+{
+	codepoint_t c1=0, c2=0;
+	size_t size1, size2;
+	struct smb_iconv_convenience *iconv_convenience = get_iconv_convenience();
+
+	/* handle null ptr comparisons to simplify the use in qsort */
+	if (s1 == s2) return 0;
+	if (s1 == NULL) return -1;
+	if (s2 == NULL) return 1;
+
+	while (*s1 && *s2 && n) {
+		n--;
+
+		c1 = next_codepoint_convenience(iconv_convenience, s1, &size1);
+		c2 = next_codepoint_convenience(iconv_convenience, s2, &size2);
+
+		s1 += size1;
+		s2 += size2;
+
+		if (c1 == c2) {
+			continue;
+		}
+
+		if (c1 == INVALID_CODEPOINT ||
+		    c2 == INVALID_CODEPOINT) {
+			/* what else can we do?? */
+			return strcasecmp(s1, s2);
+		}
+
+		if (toupper_m(c1) != toupper_m(c2)) {
+			return c1 - c2;
+		}
+	}
+
+	if (n == 0) {
+		return 0;
+	}
+
+	return *s1 - *s2;
+}
+
+/**
+ * Compare 2 strings.
+ *
+ * @note The comparison is case-insensitive.
+ **/
+_PUBLIC_ bool strequal_m(const char *s1, const char *s2)
+{
+	return strcasecmp_m(s1,s2) == 0;
+}
+
+/**
+ Compare 2 strings (case sensitive).
+**/
+_PUBLIC_ bool strcsequal(const char *s1,const char *s2)
+{
+	if (s1 == s2)
+		return true;
+	if (!s1 || !s2)
+		return false;
+
+	return strcmp(s1,s2) == 0;
+}
+
+/**
+ * Calculate the number of units (8 or 16-bit, depending on the
+ * destination charset), that would be needed to convert the input
+ * string which is expected to be in in src_charset encoding to the
+ * destination charset (which should be a unicode charset).
+ */
+_PUBLIC_ size_t strlen_m_ext(const char *s, charset_t src_charset, charset_t dst_charset)
+{
+	size_t count = 0;
+	struct smb_iconv_convenience *ic = get_iconv_convenience();
+
+	if (!s) {
+		return 0;
+	}
+
+	while (*s && !(((uint8_t)*s) & 0x80)) {
+		s++;
+		count++;
+	}
+
+	if (!*s) {
+		return count;
+	}
+
+	while (*s) {
+		size_t c_size;
+		codepoint_t c = next_codepoint_convenience_ext(ic, s, src_charset, &c_size);
+		s += c_size;
+
+		switch (dst_charset) {
+		case CH_UTF16LE:
+		case CH_UTF16BE:
+		case CH_UTF16MUNGED:
+			if (c < 0x10000) {
+				/* Unicode char fits into 16 bits. */
+				count += 1;
+			} else {
+				/* Double-width unicode char - 32 bits. */
+				count += 2;
+			}
+			break;
+		case CH_UTF8:
+			/*
+			 * this only checks ranges, and does not
+			 * check for invalid codepoints
+			 */
+			if (c < 0x80) {
+				count += 1;
+			} else if (c < 0x800) {
+				count += 2;
+			} else if (c < 0x1000) {
+				count += 3;
+			} else {
+				count += 4;
+			}
+			break;
+		default:
+			/*
+			 * non-unicode encoding:
+			 * assume that each codepoint fits into
+			 * one unit in the destination encoding.
+			 */
+			count += 1;
+		}
+	}
+
+	return count;
+}
+
+_PUBLIC_ size_t strlen_m_ext_term(const char *s, const charset_t src_charset,
+				  const charset_t dst_charset)
+{
+	if (!s) {
+		return 0;
+	}
+	return strlen_m_ext(s, src_charset, dst_charset) + 1;
+}
+
+/**
+ * Calculate the number of 16-bit units that would be needed to convert
+ * the input string which is expected to be in CH_UNIX encoding to UTF16.
+ *
+ * This will be the same as the number of bytes in a string for single
+ * byte strings, but will be different for multibyte.
+ */
+_PUBLIC_ size_t strlen_m(const char *s)
+{
+	return strlen_m_ext(s, CH_UNIX, CH_UTF16LE);
+}
+
+/**
+   Work out the number of multibyte chars in a string, including the NULL
+   terminator.
+**/
+_PUBLIC_ size_t strlen_m_term(const char *s)
+{
+	if (!s) {
+		return 0;
+	}
+
+	return strlen_m(s) + 1;
+}
+
+/*
+ * Weird helper routine for the winreg pipe: If nothing is around, return 0,
+ * if a string is there, include the terminator.
+ */
+
+_PUBLIC_ size_t strlen_m_term_null(const char *s)
+{
+	size_t len;
+	if (!s) {
+		return 0;
+	}
+	len = strlen_m(s);
+	if (len == 0) {
+		return 0;
+	}
+
+	return len+1;
+}
+
+/**
+ Strchr and strrchr_m are a bit complex on general multi-byte strings.
+**/
+_PUBLIC_ char *strchr_m(const char *src, char c)
+{
+	const char *s;
+	struct smb_iconv_convenience *ic = get_iconv_convenience();
+	if (src == NULL) {
+		return NULL;
+	}
+	/* characters below 0x3F are guaranteed to not appear in
+	   non-initial position in multi-byte charsets */
+	if ((c & 0xC0) == 0) {
+		return strchr(src, c);
+	}
+
+	/* this is quite a common operation, so we want it to be
+	   fast. We optimise for the ascii case, knowing that all our
+	   supported multi-byte character sets are ascii-compatible
+	   (ie. they match for the first 128 chars) */
+
+	for (s = src; *s && !(((unsigned char)s[0]) & 0x80); s++) {
+		if (*s == c)
+			return (char *)s;
+	}
+
+	if (!*s)
+		return NULL;
+
+#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
+	/* With compose characters we must restart from the beginning. JRA. */
+	s = src;
+#endif
+
+	while (*s) {
+		size_t size;
+		codepoint_t c2 = next_codepoint_convenience(ic, s, &size);
+		if (c2 == c) {
+			return discard_const_p(char, s);
+		}
+		s += size;
+	}
+
+	return NULL;
+}
+
+/**
+ * Multibyte-character version of strrchr
+ */
+_PUBLIC_ char *strrchr_m(const char *s, char c)
+{
+	struct smb_iconv_convenience *ic = get_iconv_convenience();
+	char *ret = NULL;
+
+	if (s == NULL) {
+		return NULL;
+	}
+
+	/* characters below 0x3F are guaranteed to not appear in
+	   non-initial position in multi-byte charsets */
+	if ((c & 0xC0) == 0) {
+		return strrchr(s, c);
+	}
+
+	/* this is quite a common operation, so we want it to be
+	   fast. We optimise for the ascii case, knowing that all our
+	   supported multi-byte character sets are ascii-compatible
+	   (ie. they match for the first 128 chars). Also, in Samba
+	   we only search for ascii characters in 'c' and that
+	   in all mb character sets with a compound character
+	   containing c, if 'c' is not a match at position
+	   p, then p[-1] > 0x7f. JRA. */
+
+	{
+		size_t len = strlen(s);
+		const char *cp = s;
+		bool got_mb = false;
+
+		if (len == 0)
+			return NULL;
+		cp += (len - 1);
+		do {
+			if (c == *cp) {
+				/* Could be a match. Part of a multibyte ? */
+				if ((cp > s) &&
+					(((unsigned char)cp[-1]) & 0x80)) {
+					/* Yep - go slow :-( */
+					got_mb = true;
+					break;
+				}
+				/* No - we have a match ! */
+				return (char *)cp;
+			}
+		} while (cp-- != s);
+		if (!got_mb)
+			return NULL;
+	}
+
+	while (*s) {
+		size_t size;
+		codepoint_t c2 = next_codepoint_convenience(ic, s, &size);
+		if (c2 == c) {
+			ret = discard_const_p(char, s);
+		}
+		s += size;
+	}
+
+	return ret;
+}
+
+/**
+  return True if any (multi-byte) character is lower case
+*/
+_PUBLIC_ bool strhaslower(const char *string)
+{
+	struct smb_iconv_convenience *ic = get_iconv_convenience();
+	while (*string) {
+		size_t c_size;
+		codepoint_t s;
+		codepoint_t t;
+
+		s = next_codepoint_convenience(ic, string, &c_size);
+		string += c_size;
+
+		t = toupper_m(s);
+
+		if (s != t) {
+			return true; /* that means it has lower case chars */
+		}
+	}
+
+	return false;
+}
+
+/**
+  return True if any (multi-byte) character is upper case
+*/
+_PUBLIC_ bool strhasupper(const char *string)
+{
+	struct smb_iconv_convenience *ic = get_iconv_convenience();
+	while (*string) {
+		size_t c_size;
+		codepoint_t s;
+		codepoint_t t;
+
+		s = next_codepoint_convenience(ic, string, &c_size);
+		string += c_size;
+
+		t = tolower_m(s);
+
+		if (s != t) {
+			return true; /* that means it has upper case chars */
+		}
+	}
+
+	return false;
+}
+
diff --git a/lib/util/charset/util_unistr.c b/lib/util/charset/util_unistr.c
index b6bfb29..ad2ba68 100644
--- a/lib/util/charset/util_unistr.c
+++ b/lib/util/charset/util_unistr.c
@@ -22,45 +22,6 @@
 #include "system/locale.h"
 
 /**
- Case insensitive string compararison
-**/
-_PUBLIC_ int strcasecmp_m(const char *s1, const char *s2)
-{
-	codepoint_t c1=0, c2=0;
-	size_t size1, size2;
-	struct smb_iconv_convenience *iconv_convenience = get_iconv_convenience();
-
-	/* handle null ptr comparisons to simplify the use in qsort */
-	if (s1 == s2) return 0;
-	if (s1 == NULL) return -1;
-	if (s2 == NULL) return 1;
-
-	while (*s1 && *s2) {
-		c1 = next_codepoint_convenience(iconv_convenience, s1, &size1);
-		c2 = next_codepoint_convenience(iconv_convenience, s2, &size2);
-
-		s1 += size1;
-		s2 += size2;
-
-		if (c1 == c2) {
-			continue;


-- 
Samba Shared Repository


More information about the samba-cvs mailing list