[SCM] Samba Shared Repository - branch master updated - 55d55d9d9b881b2ec09fa76515cdd1cf6f0e2442

Andrew Tridgell tridge at samba.org
Fri Oct 31 04:42:45 GMT 2008


The branch, master has been updated
       via  55d55d9d9b881b2ec09fa76515cdd1cf6f0e2442 (commit)
       via  5ecccac1c34f58019b195f6838f57366faa3575d (commit)
       via  391264a1006c147d82c983701ae382005c251d75 (commit)
      from  6066e1a44d041d7a5cafcb4c0276f5ff323226d9 (commit)

http://gitweb.samba.org/?p=samba.git;a=shortlog;h=master


- Log -----------------------------------------------------------------
commit 55d55d9d9b881b2ec09fa76515cdd1cf6f0e2442
Author: Andrew Tridgell <tridge at samba.org>
Date:   Fri Oct 31 15:41:34 2008 +1100

    finished adding UTF16_MUNGED charset
    
    Changed the approach for the charset to go via utf16, which makes a
    bit more sense to read.
    
    Added a testsuiite for UTF16_MUNGED as part of LOCAL-ICONV

commit 5ecccac1c34f58019b195f6838f57366faa3575d
Merge: 391264a1006c147d82c983701ae382005c251d75 6066e1a44d041d7a5cafcb4c0276f5ff323226d9
Author: Andrew Tridgell <tridge at samba.org>
Date:   Fri Oct 31 13:55:26 2008 +1100

    Merge branch 'master' of ssh://git.samba.org/data/git/samba

commit 391264a1006c147d82c983701ae382005c251d75
Author: Andrew Tridgell <tridge at samba.org>
Date:   Fri Oct 31 13:51:37 2008 +1100

    added a new charset for string2key
    
    This charset follows the rules for converting random buffers to utf8
    strings, matching the way windows does it. This should allow us to be
    compatible for the generation of AES keys

-----------------------------------------------------------------------

Summary of changes:
 lib/util/charset/charcnv.c     |    1 +
 lib/util/charset/charset.h     |    4 +-
 lib/util/charset/iconv.c       |  103 ++++++++++++++++++++++++++++++++++++++++
 lib/util/charset/tests/iconv.c |   58 ++++++++++++++++++++++
 4 files changed, 164 insertions(+), 2 deletions(-)


Changeset truncated at 500 lines:

diff --git a/lib/util/charset/charcnv.c b/lib/util/charset/charcnv.c
index 2ae16c3..9dd68f0 100644
--- a/lib/util/charset/charcnv.c
+++ b/lib/util/charset/charcnv.c
@@ -57,6 +57,7 @@ static const char *charset_name(struct smb_iconv_convenience *ic, charset_t ch)
 	case CH_DOS: return ic->dos_charset;
 	case CH_UTF8: return "UTF8";
 	case CH_UTF16BE: return "UTF-16BE";
+	case CH_UTF16MUNGED: return "UTF16_MUNGED";
 	default:
 	return "ASCII";
 	}
diff --git a/lib/util/charset/charset.h b/lib/util/charset/charset.h
index 21fc20b..cace79f 100644
--- a/lib/util/charset/charset.h
+++ b/lib/util/charset/charset.h
@@ -28,9 +28,9 @@
 #include <talloc.h>
 
 /* this defines the charset types used in samba */
-typedef enum {CH_UTF16=0, CH_UNIX, CH_DOS, CH_UTF8, CH_UTF16BE} charset_t;
+typedef enum {CH_UTF16=0, CH_UNIX, CH_DOS, CH_UTF8, CH_UTF16BE, CH_UTF16MUNGED} charset_t;
 
-#define NUM_CHARSETS 5
+#define NUM_CHARSETS 6
 
 /*
  *   for each charset we have a function that pulls from that charset to
diff --git a/lib/util/charset/iconv.c b/lib/util/charset/iconv.c
index a01b6a5..b6842a4 100644
--- a/lib/util/charset/iconv.c
+++ b/lib/util/charset/iconv.c
@@ -51,6 +51,7 @@ static size_t ascii_pull  (void *,const char **, size_t *, char **, size_t *);
 static size_t ascii_push  (void *,const char **, size_t *, char **, size_t *);
 static size_t utf8_pull   (void *,const char **, size_t *, char **, size_t *);
 static size_t utf8_push   (void *,const char **, size_t *, char **, size_t *);
+static size_t utf16_munged_pull(void *,const char **, size_t *, char **, size_t *);
 static size_t ucs2hex_pull(void *,const char **, size_t *, char **, size_t *);
 static size_t ucs2hex_push(void *,const char **, size_t *, char **, size_t *);
 static size_t iconv_copy  (void *,const char **, size_t *, char **, size_t *);
@@ -66,6 +67,10 @@ static const struct charset_functions builtin_functions[] = {
 	/* we include the UTF-8 alias to cope with differing locale settings */
 	{"UTF8",   utf8_pull,  utf8_push},
 	{"UTF-8",   utf8_pull,  utf8_push},
+
+	/* this handles the munging needed for String2Key */
+	{"UTF16_MUNGED",   utf16_munged_pull,  iconv_copy},
+
 	{"ASCII", ascii_pull, ascii_push},
 	{"UCS2-HEX", ucs2hex_pull, ucs2hex_push}
 };
@@ -707,4 +712,102 @@ error:
 }
 
 
+/*
+  this takes a UTF16 munged sequence, modifies it according to the
+  string2key rules, and produces a UTF16 sequence
+
+The rules are:
+
+    1) any 0x0000 characters are mapped to 0x0001
+
+    2) convert any instance of 0xD800 - 0xDBFF (high surrogate)
+       without an immediately following 0xDC00 - 0x0xDFFF (low surrogate) to
+       U+FFFD (OBJECT REPLACEMENT CHARACTER).
+
+    3) the same for any low surrogate that was not preceded by a high surrogate.
+
+ */
+static size_t utf16_munged_pull(void *cd, const char **inbuf, size_t *inbytesleft,
+			       char **outbuf, size_t *outbytesleft)
+{
+	size_t in_left=*inbytesleft, out_left=*outbytesleft;
+	uint8_t *c = (uint8_t *)*outbuf;
+	const uint8_t *uc = (const uint8_t *)*inbuf;
+
+	while (in_left >= 2 && out_left >= 2) {
+		unsigned int codepoint = uc[0] | (uc[1]<<8);
+
+		if (codepoint == 0) {
+			codepoint = 1;
+		}
+
+		if ((codepoint & 0xfc00) == 0xd800) {
+			/* a high surrogate */
+			unsigned int codepoint2;
+			if (in_left < 4) {
+				codepoint = 0xfffd;
+				goto codepoint16;				
+			}
+			codepoint2 = uc[2] | (uc[3]<<8);
+			if ((codepoint2 & 0xfc00) != 0xdc00) {
+				/* high surrogate not followed by low
+				   surrogate: convert to 0xfffd */
+				codepoint = 0xfffd;
+				goto codepoint16;
+			}
+			if (out_left < 4) {
+				errno = E2BIG;
+				goto error;
+			}
+			memcpy(c, uc, 4);
+			in_left  -= 4;
+			out_left -= 4;
+			uc       += 4;
+			c        += 4;
+			continue;
+		}
+
+		if ((codepoint & 0xfc00) == 0xdc00) {
+			/* low surrogate not preceded by high
+			   surrogate: convert to 0xfffd */
+			codepoint = 0xfffd;
+		}
+
+	codepoint16:
+		c[0] = codepoint & 0xFF;
+		c[1] = (codepoint>>8) & 0xFF;
+		
+		in_left  -= 2;
+		out_left -= 2;
+		uc  += 2;
+		c   += 2;
+		continue;		
+	}
+
+	if (in_left == 1) {
+		errno = EINVAL;
+		goto error;
+	}
+
+	if (in_left > 1) {
+		errno = E2BIG;
+		goto error;
+	}
+
+	*inbytesleft = in_left;
+	*outbytesleft = out_left;
+	*inbuf  = (const char *)uc;
+	*outbuf = (char *)c;
+	
+	return 0;
+
+error:
+	*inbytesleft = in_left;
+	*outbytesleft = out_left;
+	*inbuf  = (const char *)uc;
+	*outbuf = (char *)c;
+	return -1;
+}
+
+
 
diff --git a/lib/util/charset/tests/iconv.c b/lib/util/charset/tests/iconv.c
index 40e223b..1facea6 100644
--- a/lib/util/charset/tests/iconv.c
+++ b/lib/util/charset/tests/iconv.c
@@ -398,10 +398,65 @@ static bool test_random_5m(struct torture_context *tctx)
 	return true;
 }
 
+
+static bool test_string2key(struct torture_context *tctx)
+{
+	uint16_t *buf;
+	char *dest = NULL;
+	TALLOC_CTX *mem_ctx = talloc_new(tctx);
+	ssize_t ret;
+	size_t len = (random()%1000)+1;
+	const uint16_t in1[10] = { 'a', 0xd805, 'b', 0xdcf0, 'c', 0, 'd', 'e', 'f', 'g' };
+	uint8_t le1[20];
+	uint8_t *munged1;
+	uint8_t *out1;
+	int i;
+	const char *correct = "a\357\277\275b\357\277\275c\001defg";
+
+	buf = talloc_size(mem_ctx, len*2);
+	generate_random_buffer((uint8_t *)buf, len*2);
+
+	torture_comment(tctx, "converting random buffer\n");
+
+	ret = convert_string_talloc(mem_ctx, CH_UTF16MUNGED, CH_UTF8, (void *)buf, len*2, (void**)&dest);
+	if (ret == -1) {
+		torture_fail(tctx, "Failed to convert random buffer\n");
+	}
+
+	for (i=0;i<10;i++) {
+		SSVAL(&le1[2*i], 0, in1[i]);
+	}
+
+	torture_comment(tctx, "converting fixed buffer to UTF16\n");
+
+	ret = convert_string_talloc(mem_ctx, CH_UTF16MUNGED, CH_UTF16, (void *)le1, 20, (void**)&munged1);
+	if (ret == -1) {
+		torture_fail(tctx, "Failed to convert fixed buffer to UTF16_MUNGED\n");
+	}
+
+	torture_assert(tctx, ret == 20, "conversion should give 20 bytes\n");
+
+	torture_comment(tctx, "converting fixed buffer to UTF8\n");
+
+	ret = convert_string_talloc(mem_ctx, CH_UTF16MUNGED, CH_UTF8, (void *)le1, 20, (void**)&out1);
+	if (ret == -1) {
+		torture_fail(tctx, "Failed to convert fixed buffer to UTF8\n");
+	}
+
+	torture_assert(tctx, strcmp(correct, out1) == 0, "conversion gave incorrect result\n");
+
+	talloc_free(mem_ctx);
+
+	return true;
+}
+
 struct torture_suite *torture_local_iconv(TALLOC_CTX *mem_ctx)
 {
 	struct torture_suite *suite = torture_suite_create(mem_ctx, "ICONV");
 
+	torture_suite_add_simple_test(suite, "string2key",
+				      test_string2key);
+
 	torture_suite_add_simple_test(suite, "next_codepoint()",
 				      test_next_codepoint);
 
@@ -410,6 +465,9 @@ struct torture_suite *torture_local_iconv(TALLOC_CTX *mem_ctx)
 
 	torture_suite_add_simple_test(suite, "5M random UTF-16LE sequences",
 				      test_random_5m);
+
+	torture_suite_add_simple_test(suite, "string2key",
+				      test_string2key);
 	return suite;
 }
 


-- 
Samba Shared Repository


More information about the samba-cvs mailing list