[Patches] fix str[n]casecmp_m() by comparing lower case values (bug #13018)

Stefan Metzmacher metze at samba.org
Thu Sep 14 16:06:07 UTC 2017


Hi,

The commits
c615ebed6e3d273a682806b952d543e834e5630d^..f19ab5d334e3fb15761fb009e5de876dfc6ea785
replaced Str[n]CaseCmp() by str[n]casecmp_m().

The logic we had in str[n]casecmp_w() used to compare
the upper cased as well as the lower cased versions of the
characters and returned the difference between the lower cased versions.

The problem is that we need to make sure that 'A' vs. 'b' and 'a' vs.
'B' are sorted correctly, if we use strcasecmp_m with qsort.

'smbtorture //a/b local.string_case_handle.plato_case_utf8' shows
that it's important to compare the toupper_m as well as the tolower_m
values as sometimes multiple lower case characters map to the same upper
case character, so that tolower_m() may loose information.

Please review and push.

metze
-------------- next part --------------
From 66d2ca77e052693cf22df9a5a9743baac8486852 Mon Sep 17 00:00:00 2001
From: Stefan Metzmacher <metze at samba.org>
Date: Wed, 6 Sep 2017 10:38:37 +0200
Subject: [PATCH 1/4] charset/tests: assert the exact values of
 str[n]casecmp_m()

BUG: https://bugzilla.samba.org/show_bug.cgi?id=13018

Signed-off-by: Stefan Metzmacher <metze at samba.org>
---
 lib/util/charset/tests/charset.c | 34 +++++++++++++++++-----------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/lib/util/charset/tests/charset.c b/lib/util/charset/tests/charset.c
index 7f33656..e2d4155 100644
--- a/lib/util/charset/tests/charset.c
+++ b/lib/util/charset/tests/charset.c
@@ -54,13 +54,13 @@ static bool test_strcasecmp_m(struct torture_context *tctx)
 	const char file_iso8859_1[7] = { 0x66, 0x69, 0x6c, 0x65, 0x2d, 0xe9, 0 };
 	/* file.{accented e} in utf8 */
 	const char file_utf8[8] =      { 0x66, 0x69, 0x6c, 0x65, 0x2d, 0xc3, 0xa9, 0 };
-	torture_assert(tctx, strcasecmp_m("foo", "bar") != 0, "different strings");
-	torture_assert(tctx, strcasecmp_m("foo", "foo") == 0, "same case strings");
-	torture_assert(tctx, strcasecmp_m("foo", "Foo") == 0, "different case strings");
-	torture_assert(tctx, strcasecmp_m(NULL, "Foo") != 0, "one NULL");
-	torture_assert(tctx, strcasecmp_m("foo", NULL) != 0, "other NULL");
-	torture_assert(tctx, strcasecmp_m(NULL, NULL) == 0, "both NULL");
-	torture_assert(tctx, strcasecmp_m(file_iso8859_1, file_utf8) != 0,
+	torture_assert_int_equal(tctx, strcasecmp_m("foo", "bar"), 4, "different strings both lower");
+	torture_assert_int_equal(tctx, strcasecmp_m("foo", "foo"), 0, "same case strings");
+	torture_assert_int_equal(tctx, strcasecmp_m("foo", "Foo"), 0, "different case strings");
+	torture_assert_int_equal(tctx, strcasecmp_m(NULL, "Foo"),  -1, "one NULL");
+	torture_assert_int_equal(tctx, strcasecmp_m("foo", NULL),  1, "other NULL");
+	torture_assert_int_equal(tctx, strcasecmp_m(NULL, NULL),   0, "both NULL");
+	torture_assert_int_equal(tctx, strcasecmp_m(file_iso8859_1, file_utf8), 38,
 		"file.{accented e} should differ");
 	return true;
 }
@@ -112,16 +112,16 @@ static bool test_strncasecmp_m(struct torture_context *tctx)
 	const char file_iso8859_1[7] = { 0x66, 0x69, 0x6c, 0x65, 0x2d, 0xe9, 0 };
 	/* file.{accented e} in utf8 */
 	const char file_utf8[8] =      { 0x66, 0x69, 0x6c, 0x65, 0x2d, 0xc3, 0xa9, 0 };
-	torture_assert(tctx, strncasecmp_m("foo", "bar", 3) != 0, "different strings");
-	torture_assert(tctx, strncasecmp_m("foo", "foo", 3) == 0, "same case strings");
-	torture_assert(tctx, strncasecmp_m("foo", "Foo", 3) == 0, "different case strings");
-	torture_assert(tctx, strncasecmp_m("fool", "Foo", 3) == 0, "different case strings");
-	torture_assert(tctx, strncasecmp_m("fool", "Fool", 40) == 0, "over size");
-	torture_assert(tctx, strncasecmp_m("BLA", "Fool", 0) == 0, "empty");
-	torture_assert(tctx, strncasecmp_m(NULL, "Foo", 3) != 0, "one NULL");
-	torture_assert(tctx, strncasecmp_m("foo", NULL, 3) != 0, "other NULL");
-	torture_assert(tctx, strncasecmp_m(NULL, NULL, 3) == 0, "both NULL");
-	torture_assert(tctx, strncasecmp_m(file_iso8859_1, file_utf8, 6) != 0,
+	torture_assert_int_equal(tctx, strncasecmp_m("foo", "bar", 3), 4, "different strings");
+	torture_assert_int_equal(tctx, strncasecmp_m("foo", "foo", 3), 0, "same case strings");
+	torture_assert_int_equal(tctx, strncasecmp_m("foo", "Foo", 3), 0, "different case strings");
+	torture_assert_int_equal(tctx, strncasecmp_m("fool", "Foo", 3),0, "different case strings");
+	torture_assert_int_equal(tctx, strncasecmp_m("fool", "Fool", 40), 0, "over size");
+	torture_assert_int_equal(tctx, strncasecmp_m("BLA", "Fool", 0),0, "empty");
+	torture_assert_int_equal(tctx, strncasecmp_m(NULL, "Foo", 3),  -1, "one NULL");
+	torture_assert_int_equal(tctx, strncasecmp_m("foo", NULL, 3),  1, "other NULL");
+	torture_assert_int_equal(tctx, strncasecmp_m(NULL, NULL, 3),   0, "both NULL");
+	torture_assert_int_equal(tctx, strncasecmp_m(file_iso8859_1, file_utf8, 6), 38,
 		"file.{accented e} should differ");
 	return true;
 }
-- 
1.9.1


From c6142bc2ee6c98264afbcf9b3ee293031c74e777 Mon Sep 17 00:00:00 2001
From: Stefan Metzmacher <metze at samba.org>
Date: Wed, 6 Sep 2017 10:39:00 +0200
Subject: [PATCH 2/4] charset/tests: add more str[n]casecmp_m() tests to
 demonstrate the bug

BUG: https://bugzilla.samba.org/show_bug.cgi?id=13018

Signed-off-by: Stefan Metzmacher <metze at samba.org>
---
 lib/util/charset/tests/charset.c | 8 +++++++-
 selftest/knownfail.d/strcasecmp  | 2 ++
 2 files changed, 9 insertions(+), 1 deletion(-)
 create mode 100644 selftest/knownfail.d/strcasecmp

diff --git a/lib/util/charset/tests/charset.c b/lib/util/charset/tests/charset.c
index e2d4155..918bf57 100644
--- a/lib/util/charset/tests/charset.c
+++ b/lib/util/charset/tests/charset.c
@@ -55,6 +55,9 @@ static bool test_strcasecmp_m(struct torture_context *tctx)
 	/* file.{accented e} in utf8 */
 	const char file_utf8[8] =      { 0x66, 0x69, 0x6c, 0x65, 0x2d, 0xc3, 0xa9, 0 };
 	torture_assert_int_equal(tctx, strcasecmp_m("foo", "bar"), 4, "different strings both lower");
+	torture_assert_int_equal(tctx, strcasecmp_m("foo", "Bar"), 4, "different strings lower/upper");
+	torture_assert_int_equal(tctx, strcasecmp_m("Foo", "bar"), 4, "different strings upper/lower");
+	torture_assert_int_equal(tctx, strcasecmp_m("AFoo", "_bar"), 2, "different strings upper/lower");
 	torture_assert_int_equal(tctx, strcasecmp_m("foo", "foo"), 0, "same case strings");
 	torture_assert_int_equal(tctx, strcasecmp_m("foo", "Foo"), 0, "different case strings");
 	torture_assert_int_equal(tctx, strcasecmp_m(NULL, "Foo"),  -1, "one NULL");
@@ -112,7 +115,10 @@ static bool test_strncasecmp_m(struct torture_context *tctx)
 	const char file_iso8859_1[7] = { 0x66, 0x69, 0x6c, 0x65, 0x2d, 0xe9, 0 };
 	/* file.{accented e} in utf8 */
 	const char file_utf8[8] =      { 0x66, 0x69, 0x6c, 0x65, 0x2d, 0xc3, 0xa9, 0 };
-	torture_assert_int_equal(tctx, strncasecmp_m("foo", "bar", 3), 4, "different strings");
+	torture_assert_int_equal(tctx, strncasecmp_m("foo", "bar", 3), 4, "different strings both lower");
+	torture_assert_int_equal(tctx, strncasecmp_m("foo", "Bar", 3), 4, "different strings lower/upper");
+	torture_assert_int_equal(tctx, strncasecmp_m("Foo", "bar", 3), 4, "different strings upper/lower");
+	torture_assert_int_equal(tctx, strncasecmp_m("AFoo", "_bar", 4), 2, "different strings upper/lower");
 	torture_assert_int_equal(tctx, strncasecmp_m("foo", "foo", 3), 0, "same case strings");
 	torture_assert_int_equal(tctx, strncasecmp_m("foo", "Foo", 3), 0, "different case strings");
 	torture_assert_int_equal(tctx, strncasecmp_m("fool", "Foo", 3),0, "different case strings");
diff --git a/selftest/knownfail.d/strcasecmp b/selftest/knownfail.d/strcasecmp
new file mode 100644
index 0000000..23d755a
--- /dev/null
+++ b/selftest/knownfail.d/strcasecmp
@@ -0,0 +1,2 @@
+^samba4.local.charset.strcasecmp_m
+^samba4.local.charset.strncasecmp_m
-- 
1.9.1


From da8e1a4f7564648608e4156d1ef046b837617529 Mon Sep 17 00:00:00 2001
From: Stefan Metzmacher <metze at samba.org>
Date: Wed, 6 Sep 2017 11:24:28 +0200
Subject: [PATCH 3/4] charset/tests: also tests the system str[n]casecmp()

BUG: https://bugzilla.samba.org/show_bug.cgi?id=13018

Signed-off-by: Stefan Metzmacher <metze at samba.org>
---
 lib/util/charset/tests/charset.c | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/lib/util/charset/tests/charset.c b/lib/util/charset/tests/charset.c
index 918bf57..71635c6 100644
--- a/lib/util/charset/tests/charset.c
+++ b/lib/util/charset/tests/charset.c
@@ -48,6 +48,21 @@ static bool test_codepoint_cmpi(struct torture_context *tctx)
 	return true;
 }
 
+static bool test_strcasecmp(struct torture_context *tctx)
+{
+	torture_assert_int_equal(tctx, strcasecmp("foo", "bar"), 4, "different strings both lower");
+	torture_assert_int_equal(tctx, strcasecmp("foo", "Bar"), 4, "different strings lower/upper");
+	torture_assert_int_equal(tctx, strcasecmp("Foo", "bar"), 4, "different strings upper/lower");
+	torture_assert_int_equal(tctx, strcasecmp("AFoo", "_bar"), 2, "different strings upper/lower");
+	torture_assert_int_equal(tctx, strcasecmp("foo", "foo"), 0, "same case strings");
+	torture_assert_int_equal(tctx, strcasecmp("foo", "Foo"), 0, "different case strings");
+
+	/*
+	 * Note that strcasecmp() doesn't allow NULL arguments
+	 */
+	return true;
+}
+
 static bool test_strcasecmp_m(struct torture_context *tctx)
 {
 	/* file.{accented e} in iso8859-1 */
@@ -109,6 +124,24 @@ static bool test_string_replace_m(struct torture_context *tctx)
 	return true;
 }
 
+static bool test_strncasecmp(struct torture_context *tctx)
+{
+	torture_assert_int_equal(tctx, strncasecmp("foo", "bar", 3), 4, "different strings both lower");
+	torture_assert_int_equal(tctx, strncasecmp("foo", "Bar", 3), 4, "different strings lower/upper");
+	torture_assert_int_equal(tctx, strncasecmp("Foo", "bar", 3), 4, "different strings upper/lower");
+	torture_assert_int_equal(tctx, strncasecmp("AFoo", "_bar", 4), 2, "different strings upper/lower");
+	torture_assert_int_equal(tctx, strncasecmp("foo", "foo", 3), 0, "same case strings");
+	torture_assert_int_equal(tctx, strncasecmp("foo", "Foo", 3), 0, "different case strings");
+	torture_assert_int_equal(tctx, strncasecmp("fool", "Foo", 3),0, "different case strings");
+	torture_assert_int_equal(tctx, strncasecmp("fool", "Fool", 40), 0, "over size");
+	torture_assert_int_equal(tctx, strncasecmp("BLA", "Fool", 0),0, "empty");
+
+	/*
+	 * Note that strncasecmp() doesn't allow NULL arguments
+	 */
+	return true;
+}
+
 static bool test_strncasecmp_m(struct torture_context *tctx)
 {
 	/* file.{accented e} in iso8859-1 */
@@ -282,10 +315,12 @@ struct torture_suite *torture_local_charset(TALLOC_CTX *mem_ctx)
 	torture_suite_add_simple_test(suite, "toupper_m", test_toupper_m);
 	torture_suite_add_simple_test(suite, "tolower_m", test_tolower_m);
 	torture_suite_add_simple_test(suite, "codepoint_cmpi", test_codepoint_cmpi);
+	torture_suite_add_simple_test(suite, "strcasecmp", test_strcasecmp);
 	torture_suite_add_simple_test(suite, "strcasecmp_m", test_strcasecmp_m);
 	torture_suite_add_simple_test(suite, "strequal_m", test_strequal_m);
 	torture_suite_add_simple_test(suite, "strcsequal", test_strcsequal);
 	torture_suite_add_simple_test(suite, "string_replace_m", test_string_replace_m);
+	torture_suite_add_simple_test(suite, "strncasecmp", test_strncasecmp);
 	torture_suite_add_simple_test(suite, "strncasecmp_m", test_strncasecmp_m);
 	torture_suite_add_simple_test(suite, "next_token", test_next_token);
 	torture_suite_add_simple_test(suite, "next_token_null", test_next_token_null);
-- 
1.9.1


From 1d92047debc5408dbfa93e88d1a27ff2b0675915 Mon Sep 17 00:00:00 2001
From: Stefan Metzmacher <metze at samba.org>
Date: Wed, 6 Sep 2017 09:47:20 +0200
Subject: [PATCH 4/4] charset: fix str[n]casecmp_m() by comparing lower case
 values

The commits c615ebed6e3d273a682806b952d543e834e5630d^..f19ab5d334e3fb15761fb009e5de876dfc6ea785
replaced Str[n]CaseCmp() by str[n]casecmp_m().

The logic we had in str[n]casecmp_w() used to compare
the upper cased as well as the lower cased versions of the
characters and returned the difference between the lower cased versions.

BUG: https://bugzilla.samba.org/show_bug.cgi?id=13018

Signed-off-by: Stefan Metzmacher <metze at samba.org>
---
 lib/util/charset/util_str.c     | 32 ++++++++++++++++++++++++++++----
 selftest/knownfail.d/strcasecmp |  2 --
 2 files changed, 28 insertions(+), 6 deletions(-)
 delete mode 100644 selftest/knownfail.d/strcasecmp

diff --git a/lib/util/charset/util_str.c b/lib/util/charset/util_str.c
index 550fba3..6feed17 100644
--- a/lib/util/charset/util_str.c
+++ b/lib/util/charset/util_str.c
@@ -36,6 +36,8 @@ _PUBLIC_ int strcasecmp_m_handle(struct smb_iconv_handle *iconv_handle,
 				 const char *s1, const char *s2)
 {
 	codepoint_t c1=0, c2=0;
+	codepoint_t u1=0, u2=0;
+	codepoint_t l1=0, l2=0;
 	size_t size1, size2;
 
 	/* handle null ptr comparisons to simplify the use in qsort */
@@ -59,9 +61,19 @@ _PUBLIC_ int strcasecmp_m_handle(struct smb_iconv_handle *iconv_handle,
 			continue;
 		}
 
-		if (toupper_m(c1) != toupper_m(c2)) {
-			return c1 - c2;
+		u1 = toupper_m(c1);
+		u2 = toupper_m(c2);
+		if (u1 == u2) {
+			continue;
 		}
+
+		l1 = tolower_m(c1);
+		l2 = tolower_m(c2);
+		if (l1 == l2) {
+			continue;
+		}
+
+		return l1 - l2;
 	}
 
 	return *s1 - *s2;
@@ -83,6 +95,8 @@ _PUBLIC_ int strncasecmp_m_handle(struct smb_iconv_handle *iconv_handle,
 				  const char *s1, const char *s2, size_t n)
 {
 	codepoint_t c1=0, c2=0;
+	codepoint_t u1=0, u2=0;
+	codepoint_t l1=0, l2=0;
 	size_t size1, size2;
 
 	/* handle null ptr comparisons to simplify the use in qsort */
@@ -123,9 +137,19 @@ _PUBLIC_ int strncasecmp_m_handle(struct smb_iconv_handle *iconv_handle,
 			continue;
 		}
 
-		if (toupper_m(c1) != toupper_m(c2)) {
-			return c1 - c2;
+		u1 = toupper_m(c1);
+		u2 = toupper_m(c2);
+		if (u1 == u2) {
+			continue;
 		}
+
+		l1 = tolower_m(c1);
+		l2 = tolower_m(c2);
+		if (l1 == l2) {
+			continue;
+		}
+
+		return l1 - l2;
 	}
 
 	if (n == 0) {
diff --git a/selftest/knownfail.d/strcasecmp b/selftest/knownfail.d/strcasecmp
deleted file mode 100644
index 23d755a..0000000
--- a/selftest/knownfail.d/strcasecmp
+++ /dev/null
@@ -1,2 +0,0 @@
-^samba4.local.charset.strcasecmp_m
-^samba4.local.charset.strncasecmp_m
-- 
1.9.1

-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 836 bytes
Desc: OpenPGP digital signature
URL: <http://lists.samba.org/pipermail/samba-technical/attachments/20170914/6f26c37c/signature.sig>


More information about the samba-technical mailing list