[SCM] Samba Shared Repository - branch v3-5-test updated

Thu Nov 11 03:56:41 MST 2010

The branch, v3-5-test has been updated
       via  8e46bff s3:librpc/ndr: use new strlen_m_ext_term() in ndr_charset_length(): fix bug #7594
       via  9fd5cc6 librpc/ndr: correctly implement ndr_charset_length()
       via  f7928a0 s3:lib/util_str: add strlen_m_ext_term() - variant of strlen_m_ext() counting terminator
       via  054cd7e s3:lib/util_str: add strlen_m_ext() that takes the dest charset as a parameter.
      from  7effd96 Fix bug 7409 - Thousands of reduce_name: couldn't get realpath.

http://gitweb.samba.org/?p=samba.git;a=shortlog;h=v3-5-test


- Log -----------------------------------------------------------------
commit 8e46bff8b88103f4a5b0d3920ab6e3901decaf22
Author: Michael Adam <obnox at samba.org>
Date:   Sun Oct 31 02:04:25 2010 +0200

    s3:librpc/ndr: use new strlen_m_ext_term() in ndr_charset_length(): fix bug #7594
    
    This fixes the calculation of needed space for destination unicode charset.
    
    The last 4 patches address bug #7594 ("wbinfo -u" and "wbinfo -g" gives no
    output (log=>ndr_pull_error)).

commit 9fd5cc6d85d3179972d7567bad95538ab2873c30
Author: Stefan Metzmacher <metze at samba.org>
Date:   Wed Aug 25 10:05:15 2010 +0200

    librpc/ndr: correctly implement ndr_charset_length()
    
    Before we ignored the charset type.
    
    metze
    
    Signed-off-by: Michael Adam <obnox at samba.org>

commit f7928a0e0b2be27e83bf26644c45ac554c5acec2
Author: Michael Adam <obnox at samba.org>
Date:   Sun Oct 31 02:02:16 2010 +0200

    s3:lib/util_str: add strlen_m_ext_term() - variant of strlen_m_ext() counting terminator

commit 054cd7ec30a3289443c97d36ea416d37f19d6b0b
Author: Michael Adam <obnox at samba.org>
Date:   Mon Nov 1 16:28:43 2010 +0100

    s3:lib/util_str: add strlen_m_ext() that takes the dest charset as a parameter.

-----------------------------------------------------------------------

Summary of changes:
 source3/include/proto.h         |    2 +
 source3/lib/util_str.c          |   72 ++++++++++++++++++++++++++++++++------
 source3/librpc/ndr/ndr_string.c |   16 +++++++-
 3 files changed, 76 insertions(+), 14 deletions(-)


Changeset truncated at 500 lines:

diff --git a/source3/include/proto.h b/source3/include/proto.h
index 5064fdb..f7bfc2a 100644
--- a/source3/include/proto.h
+++ b/source3/include/proto.h
@@ -1539,6 +1539,8 @@ char *strnrchr_m(const char *s, char c, unsigned int n);
 char *strstr_m(const char *src, const char *findstr);
 void strlower_m(char *s);
 void strupper_m(char *s);
+size_t strlen_m_ext(const char *s, const charset_t dst_charset);
+size_t strlen_m_ext_term(const char *s, const charset_t dst_charset);
 size_t strlen_m(const char *s);
 size_t strlen_m_term(const char *s);
 size_t strlen_m_term_null(const char *s);
diff --git a/source3/lib/util_str.c b/source3/lib/util_str.c
index 9a0b12a..3da2b83 100644
--- a/source3/lib/util_str.c
+++ b/source3/lib/util_str.c
@@ -1454,12 +1454,12 @@ void strupper_m(char *s)
 }
 
 /**
- Count the number of UCS2 characters in a string. Normally this will
- be the same as the number of bytes in a string for single byte strings,
- but will be different for multibyte.
-**/
-
-size_t strlen_m(const char *s)
+ * Calculate the number of units (8 or 16-bit, depending on the
+ * destination charset), that would be needed to convert the input
+ * string which is expected to be in in CH_UNIX encoding to the
+ * destination charset (which should be a unicode charset).
+ */
+size_t strlen_m_ext(const char *s, const charset_t dst_charset)
 {
 	size_t count = 0;
 
@@ -1479,19 +1479,67 @@ size_t strlen_m(const char *s)
 	while (*s) {
 		size_t c_size;
 		codepoint_t c = next_codepoint(s, &c_size);
-		if (c < 0x10000) {
-			/* Unicode char fits into 16 bits. */
+		s += c_size;
+
+		switch(dst_charset) {
+		case CH_UTF16LE:
+		case CH_UTF16BE:
+		case CH_UTF16MUNGED:
+			if (c < 0x10000) {
+				/* Unicode char fits into 16 bits. */
+				count += 1;
+			} else {
+				/* Double-width unicode char - 32 bits. */
+				count += 2;
+			}
+			break;
+		case CH_UTF8:
+			/*
+			 * this only checks ranges, and does not
+			 * check for invalid codepoints
+			 */
+			if (c < 0x80) {
+				count += 1;
+			} else if (c < 0x800) {
+				count += 2;
+			} else if (c < 0x1000) {
+				count += 3;
+			} else {
+				count += 4;
+			}
+			break;
+		default:
+			/*
+			 * non-unicode encoding:
+			 * assume that each codepoint fits into
+			 * one unit in the destination encoding.
+			 */
 			count += 1;
-		} else {
-			/* Double-width unicode char - 32 bits. */
-			count += 2;
 		}
-		s += c_size;
 	}
 
 	return count;
 }
 
+size_t strlen_m_ext_term(const char *s, const charset_t dst_charset)
+{
+	if (!s) {
+		return 0;
+	}
+	return strlen_m_ext(s, dst_charset) + 1;
+}
+
+/**
+ Count the number of UCS2 characters in a string. Normally this will
+ be the same as the number of bytes in a string for single byte strings,
+ but will be different for multibyte.
+**/
+
+size_t strlen_m(const char *s)
+{
+	return strlen_m_ext(s, CH_UTF16LE);
+}
+
 /**
  Count the number of UCS2 characters in a string including the null
  terminator.
diff --git a/source3/librpc/ndr/ndr_string.c b/source3/librpc/ndr/ndr_string.c
index 519be7b..f0c3e47 100644
--- a/source3/librpc/ndr/ndr_string.c
+++ b/source3/librpc/ndr/ndr_string.c
@@ -714,7 +714,19 @@ _PUBLIC_ enum ndr_err_code ndr_push_charset(struct ndr_push *ndr, int ndr_flags,
 /* Return number of elements in a string in the specified charset */
 _PUBLIC_ uint32_t ndr_charset_length(const void *var, charset_t chset)
 {
-	/* FIXME: Treat special chars special here, taking chset into account */
-	/* Also include 0 byte */
+	switch (chset) {
+	/* case CH_UTF16: this has the same value as CH_UTF16LE */
+	case CH_UTF16LE:
+	case CH_UTF16BE:
+	case CH_UTF16MUNGED:
+	case CH_UTF8:
+		return strlen_m_ext_term((const char *)var, chset);
+	case CH_DISPLAY:
+	case CH_DOS:
+	case CH_UNIX:
+		return strlen((const char *)var)+1;
+	}
+
+	/* Fallback, this should never happen */
 	return strlen((const char *)var)+1;
 }


-- 
Samba Shared Repository