[SCM] Samba Shared Repository - branch master updated
Jeremy Allison
jra at samba.org
Fri Jun 18 04:28:02 UTC 2021
The branch, master has been updated
via 4711ad9e813 util/charset: warn loudly on unexpected E2BIG
via 1ea18166291 util/iconv: reject improperly packed UTF-8
via 50047588c0c torture: talloc_string_sub tests for utf-8 brevity
from 1c3821c9f9c netcmd: Incorrect arguments to Exception constructor
https://git.samba.org/?p=samba.git;a=shortlog;h=master
- Log -----------------------------------------------------------------
commit 4711ad9e81326c40309614df41ac36994fa9b961
Author: Douglas Bagnall <douglas.bagnall at catalyst.net.nz>
Date: Thu Apr 8 21:20:17 2021 +1200
util/charset: warn loudly on unexpected E2BIG
Signed-off-by: Douglas Bagnall <douglas.bagnall at catalyst.net.nz>
Reviewed-by: Jeremy Allison <jra at samba.org>
Autobuild-User(master): Jeremy Allison <jra at samba.org>
Autobuild-Date(master): Fri Jun 18 04:27:17 UTC 2021 on sn-devel-184
commit 1ea1816629104e16d9b180bee8f4ccc6292869ed
Author: Douglas Bagnall <douglas.bagnall at catalyst.net.nz>
Date: Thu Apr 8 21:18:46 2021 +1200
util/iconv: reject improperly packed UTF-8
If we allow a string that encodes say '\0' as a multi-byte sequence,
we are open to confusion where we mix NUL terminated strings with
sized data blobs, which is to say EVERYWHERE.
BUG: https://bugzilla.samba.org/show_bug.cgi?id=14684
Signed-off-by: Douglas Bagnall <douglas.bagnall at catalyst.net.nz>
Reviewed-by: Jeremy Allison <jra at samba.org>
commit 50047588c0c8da2e1ffa0b08a8dc5d31e49f6a3b
Author: Douglas Bagnall <douglas.bagnall at catalyst.net.nz>
Date: Wed Jun 16 17:35:19 2021 +1200
torture: talloc_string_sub tests for utf-8 brevity
If we allow overly long UTF-8 sequences (in the tests, encoding '\0'
as 2, 3, or 4 bytes), it might be possible for bad strings to slip
through.
We fail. But wait for the next commit.
BUG: https://bugzilla.samba.org/show_bug.cgi?id=14684
Signed-off-by: Douglas Bagnall <douglas.bagnall at catalyst.net.nz>
Reviewed-by: Jeremy Allison <jra at samba.org>
-----------------------------------------------------------------------
Summary of changes:
lib/util/charset/convert_string.c | 4 +--
lib/util/charset/iconv.c | 32 +++++++++++++--------
lib/util/tests/str.c | 58 +++++++++++++++++++++++++++++++++++++++
3 files changed, 81 insertions(+), 13 deletions(-)
Changeset truncated at 500 lines:
diff --git a/lib/util/charset/convert_string.c b/lib/util/charset/convert_string.c
index 96a3af68d27..88b128be547 100644
--- a/lib/util/charset/convert_string.c
+++ b/lib/util/charset/convert_string.c
@@ -435,8 +435,8 @@ bool convert_string_talloc_handle(TALLOC_CTX *ctx, struct smb_iconv_handle *ic,
break;
case E2BIG:
reason = "output buffer is too small";
- DBG_NOTICE("Conversion error: %s\n",
- reason);
+ DBG_ERR("Conversion error: %s\n",
+ reason);
break;
case EILSEQ:
reason="Illegal multibyte sequence";
diff --git a/lib/util/charset/iconv.c b/lib/util/charset/iconv.c
index 1f2d49c0e27..43b3306b0de 100644
--- a/lib/util/charset/iconv.c
+++ b/lib/util/charset/iconv.c
@@ -832,6 +832,11 @@ static size_t utf8_pull(void *cd, const char **inbuf, size_t *inbytesleft,
}
uc[1] = (c[0]>>2) & 0x7;
uc[0] = (c[0]<<6) | (c[1]&0x3f);
+ if (uc[1] == 0 && uc[0] < 0x80) {
+ /* this should have been a single byte */
+ errno = EILSEQ;
+ goto error;
+ }
c += 2;
in_left -= 2;
out_left -= 2;
@@ -840,14 +845,24 @@ static size_t utf8_pull(void *cd, const char **inbuf, size_t *inbytesleft,
}
if ((c[0] & 0xf0) == 0xe0) {
+ unsigned int codepoint;
if (in_left < 3 ||
(c[1] & 0xc0) != 0x80 ||
(c[2] & 0xc0) != 0x80) {
errno = EILSEQ;
goto error;
}
- uc[1] = ((c[0]&0xF)<<4) | ((c[1]>>2)&0xF);
- uc[0] = (c[1]<<6) | (c[2]&0x3f);
+ codepoint = ((c[2] & 0x3f) |
+ ((c[1] & 0x3f) << 6) |
+ ((c[0] & 0x0f) << 12));
+
+ if (codepoint < 0x800) {
+ /* this should be a 1 or 2 byte sequence */
+ errno = EILSEQ;
+ goto error;
+ }
+ uc[0] = codepoint & 0xff;
+ uc[1] = codepoint >> 8;
c += 3;
in_left -= 3;
out_left -= 2;
@@ -870,15 +885,10 @@ static size_t utf8_pull(void *cd, const char **inbuf, size_t *inbytesleft,
((c[1]&0x3f)<<12) |
((c[0]&0x7)<<18);
if (codepoint < 0x10000) {
- /* accept UTF-8 characters that are not
- minimally packed, but pack the result */
- uc[0] = (codepoint & 0xFF);
- uc[1] = (codepoint >> 8);
- c += 4;
- in_left -= 4;
- out_left -= 2;
- uc += 2;
- continue;
+ /* reject UTF-8 characters that are not
+ minimally packed */
+ errno = EILSEQ;
+ goto error;
}
codepoint -= 0x10000;
diff --git a/lib/util/tests/str.c b/lib/util/tests/str.c
index 93bf809f385..41a28366cf4 100644
--- a/lib/util/tests/str.c
+++ b/lib/util/tests/str.c
@@ -91,6 +91,52 @@ static bool test_talloc_string_sub_multiple(struct torture_context *tctx)
return true;
}
+/*
+ * with these next three tests, the failure is that the pattern looks like
+ * "+++" because the \x.. bytes encode a zero byte in UTF-8. If we are not
+ * careful with these strings we will see crashes instead of failures.
+ */
+
+static bool test_talloc_string_sub_tricky_utf8_4(struct torture_context *tctx)
+{
+ const char string[] = "++++--\xD8\xBB";
+ const char pattern[] = "+++\xF0\x80\x80\x80++";
+ const char replace[] = "...";
+
+ char *t = talloc_string_sub(tctx, string, pattern, replace);
+ torture_assert_str_equal(tctx, t, string,
+ "should reject 4 byte NUL char");
+ talloc_free(t);
+ return true;
+}
+
+static bool test_talloc_string_sub_tricky_utf8_3(struct torture_context *tctx)
+{
+ const char string[] = "++++--\xD8\xBB";
+ const char pattern[] = "+++\xE0\x80\x80++";
+ const char replace[] = "...";
+
+ char *t = talloc_string_sub(tctx, string, pattern, replace);
+ torture_assert_str_equal(tctx, t, string,
+ "should reject 3 byte NUL char");
+ talloc_free(t);
+ return true;
+}
+
+static bool test_talloc_string_sub_tricky_utf8_2(struct torture_context *tctx)
+{
+ const char string[] = "++++--\xD8\xBB";
+ const char pattern[] = "+++\xC0\x80++";
+ const char replace[] = "...";
+
+ char *t = talloc_string_sub(tctx, string, pattern, replace);
+ torture_assert_str_equal(tctx, t, string,
+ "should reject 2 byte NUL char");
+ talloc_free(t);
+ return true;
+}
+
+
struct torture_suite *torture_local_util_str(TALLOC_CTX *mem_ctx)
@@ -118,5 +164,17 @@ struct torture_suite *torture_local_util_str(TALLOC_CTX *mem_ctx)
torture_suite_add_simple_test(suite, "string_sub_talloc_multiple",
test_talloc_string_sub_multiple);
+ torture_suite_add_simple_test(suite,
+ "test_talloc_string_sub_tricky_utf8_4",
+ test_talloc_string_sub_tricky_utf8_4);
+
+ torture_suite_add_simple_test(suite,
+ "test_talloc_string_sub_tricky_utf8_3",
+ test_talloc_string_sub_tricky_utf8_3);
+
+ torture_suite_add_simple_test(suite,
+ "test_talloc_string_sub_tricky_utf8_2",
+ test_talloc_string_sub_tricky_utf8_2);
+
return suite;
}
--
Samba Shared Repository
More information about the samba-cvs
mailing list