>From 9e7b2a03dc04909a245828dfde073f47ffd0543c Mon Sep 17 00:00:00 2001 From: Douglas Bagnall Date: Wed, 9 Mar 2016 11:25:36 +1300 Subject: [PATCH 1/2] Add python server sort tests The tests are repeated twice: once properly with complex Unicode strings, and again in a simplified ASCII subset. We only expect Samba to pass the simplified version. The hard tests are aspirational and show what Active Directory does. Signed-off-by: Douglas Bagnall --- selftest/knownfail | 5 + source4/dsdb/tests/python/sort.py | 307 ++++++++++++++++++++++++++++++++++++++ source4/selftest/tests.py | 3 + 3 files changed, 315 insertions(+) create mode 100644 source4/dsdb/tests/python/sort.py diff --git a/selftest/knownfail b/selftest/knownfail index 1ac99d4..813a635 100644 --- a/selftest/knownfail +++ b/selftest/knownfail @@ -294,3 +294,8 @@ # we can watch for set methods on. # ^samba.tests.dcerpc.integer.samba.tests.dcerpc.integer.IntegerTests.test_.*_into_uint8_list +# +# Samba sort takes a primative approach to unicode sort. These tests +# match Windows 2012R2 behaviour. +# +^samba4.ldap.sort.python.+UnicodeSortTests diff --git a/source4/dsdb/tests/python/sort.py b/source4/dsdb/tests/python/sort.py new file mode 100644 index 0000000..436cb8c --- /dev/null +++ b/source4/dsdb/tests/python/sort.py @@ -0,0 +1,307 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Originally based on ./sam.py +from unicodedata import normalize +import locale +locale.setlocale(locale.LC_ALL, ('en_US', 'UTF-8')) + +from collections import Counter +import optparse +import sys +import os +import re + +sys.path.insert(0, "bin/python") +import samba +from samba.tests.subunitrun import SubunitOptions, TestProgram + +import samba.getopt as options + +from samba.auth import system_session +import ldb +from samba.samdb import SamDB + +parser = optparse.OptionParser("sam.py [options] ") +sambaopts = options.SambaOptions(parser) +parser.add_option_group(sambaopts) +parser.add_option_group(options.VersionOptions(parser)) +# use command line creds if available +credopts = options.CredentialsOptions(parser) +parser.add_option_group(credopts) +subunitopts = SubunitOptions(parser) +parser.add_option_group(subunitopts) + +parser.add_option('--elements', type='int', default=33, + help="use this many elements in the tests") + +opts, args = parser.parse_args() + +if len(args) < 1: + parser.print_usage() + sys.exit(1) + +host = args[0] + +lp = sambaopts.get_loadparm() +creds = credopts.get_credentials(lp) + + +def norm(x): + x = x.decode('utf-8') + return normalize('NFKC', x).upper().encode('utf-8') + +# Python, Windows, and Samba all sort the following sequence in +# drastically different ways. The order here is what you get from +# Windows2012R2. +FIENDISH_TESTS = [' ', ' e', '\t-\t', '\n\t\t', '!@#!@#!', '¼', '¹', '1', + '1/4', '1⁄4', '1\xe2\x81\x845', '3', 'abc', 'fo\x00od', + + # Here we also had '\x00food', but that seems to sort + # non-deterministically on Windows vis-a-vis 'fo\x00od'. + + 'kōkako', 'ŋđ¼³ŧ “«đð', 'ŋđ¼³ŧ“«đð', + 'sorttest', 'sorttēst11,', 'śorttest2', 'śoRttest2', + 'ś-o-r-t-t-e-s-t-2', 'soRTTēst2,', 'ṡorttest4', 'ṡorttesT4', + 'sörttest-5', 'sÖrttest-5', 'so-rttest7,', '桑巴'] + + +class BaseSortTests(samba.tests.TestCase): + avoid_tricky_sort = False + maxDiff = 2000 + + def create_user(self, i, n, prefix='sorttest', suffix='', attrs=None, + tricky=False): + name = "%s%d%s" % (prefix, i, suffix) + user = { + 'cn': name, + "objectclass": "user", + 'givenName': "abcdefghijklmnopqrstuvwxyz"[i % 26], + "roomNumber": "%sb\x00c" % (n - i), + "carLicense": "后来经", + "employeeNumber": "%s%sx" % (abs(i * (99 - i)), '\n' * (i & 255)), + "accountExpires": "%s" % (10 ** 9 + 1000000 * i), + "msTSExpireDate4": "19%02d0101010000.0Z" % (i % 100), + "flags": str(i * (n - i)), + "serialNumber": "abc %s%s%s" % ('AaBb |-/'[i & 7], + ' 3z}'[i & 3], + '"@'[i & 1],), + "comment": "Favourite colour is %d" % (n % (i + 1)), + } + + if self.avoid_tricky_sort: + # We are not even going to try passing tests that assume + # some kind of Unicode awareness. + for k, v in user.items(): + user[k] = re.sub(r'[^\w,.]', 'X', v) + else: + # Add some even trickier ones! + fiendish_index = i % len(FIENDISH_TESTS) + user.update({ + # Sort doesn't look past a NUL byte. + "photo": "\x00%d" % (n - i), + "audio": "%sn octet string %s%s ♫♬\x00lalala" % ('Aa'[i & 1], + chr(i & 255), + i), + "displayNamePrintable": "%d\x00%c" % (i, i & 255), + "adminDisplayName": "%d\x00b" % (n-i), + "title": "%d%sb" % (n - i, '\x00' * i), + + # Names that vary only in case. Windows returns + # equivalent addresses in the order they were put + # in ('a st', 'A st',...). We don't check that. + "street": "%s st" % (chr(65 | (i & 14) | ((i & 1) * 32))), + + "streetAddress": FIENDISH_TESTS[fiendish_index], + "postalAddress": FIENDISH_TESTS[-fiendish_index], + }) + + if attrs is not None: + user.update(attrs) + + user['dn'] = "cn=%s,%s" % (user['cn'], self.ou) + + self.users.append(user) + self.ldb.add(user) + return user + + def setUp(self): + super(BaseSortTests, self).setUp() + self.ldb = SamDB(host, credentials=creds, + session_info=system_session(lp), lp=lp) + + self.base_dn = self.ldb.domain_dn() + self.ou = "ou=sort,%s" % self.base_dn + if False: + try: + self.ldb.delete(self.ou, ['tree_delete:1']) + except ldb.LdbError, e: + print "tried deleting %s, got error %s" % (self.ou, e) + + self.ldb.add({ + "dn": self.ou, + "objectclass": "organizationalUnit"}) + self.users = [] + n = opts.elements + for i in range(n): + self.create_user(i, n) + + attrs = set(self.users[0].keys()) - set([ + 'objectclass', 'dn']) + self.binary_sorted_keys = attrs.intersection(['audio', + 'photo', + "msTSExpireDate4", + 'serialNumber', + "displayNamePrintable"]) + + self.numeric_sorted_keys = attrs.intersection(['flags', + 'accountExpires']) + + self.timestamp_keys = attrs.intersection(['msTSExpireDate4']) + + self.int64_keys = set(['accountExpires']) + + self.locale_sorted_keys = [x for x in attrs if + x not in (self.binary_sorted_keys | + self.numeric_sorted_keys)] + + self.expected_results = {} + self.expected_results_binary = {} + + for k in self.locale_sorted_keys: + # Using key=locale.strxfrm fails on \x00 + forward = sorted((norm(x[k]) for x in self.users), + cmp=locale.strcoll) + reverse = list(reversed(forward)) + self.expected_results[k] = (forward, reverse) + + for k in self.binary_sorted_keys: + forward = sorted((x[k] for x in self.users)) + reverse = list(reversed(forward)) + self.expected_results_binary[k] = (forward, reverse) + self.expected_results[k] = (forward, reverse) + + # Fix up some because Python gets it wrong, using Schwartzian tramsform + for k in ('adminDisplayName', 'title', 'streetAddress', + 'employeeNumber'): + if k in self.expected_results: + broken = self.expected_results[k][0] + tmp = [(x.replace('\x00', ''), x) for x in broken] + tmp.sort() + fixed = [x[1] for x in tmp] + self.expected_results[k] = (fixed, list(reversed(fixed))) + for k in ('streetAddress', 'postalAddress'): + if k in self.expected_results: + c = Counter([u[k] for u in self.users]) + fixed = [] + for x in FIENDISH_TESTS: + fixed += [norm(x)] * c[x] + + rev = list(reversed(fixed)) + self.expected_results[k] = (fixed, rev) + + def tearDown(self): + super(BaseSortTests, self).tearDown() + self.ldb.delete(self.ou, ['tree_delete:1']) + + def _test_server_sort_default(self): + attrs = self.locale_sorted_keys + + for attr in attrs: + for rev in (0, 1): + res = self.ldb.search(self.ou, + scope=ldb.SCOPE_ONELEVEL, attrs=[attr], + controls=["server_sort:1:%d:%s" % + (rev, attr)]) + self.assertEqual(len(res), len(self.users)) + + expected_order = self.expected_results[attr][rev] + received_order = [norm(x[attr][0]) for x in res] + if expected_order != received_order: + print attr, ['forward', 'reverse'][rev] + print "expected", expected_order + print "recieved", received_order + print "unnormalised:", [x[attr][0] for x in res] + print "unnormalised: «%s»" % '» «'.join(x[attr][0] + for x in res) + self.assertEquals(expected_order, received_order) + + def _test_server_sort_binary(self): + for attr in self.binary_sorted_keys: + for rev in (0, 1): + res = self.ldb.search(self.ou, + scope=ldb.SCOPE_ONELEVEL, attrs=[attr], + controls=["server_sort:1:%d:%s" % + (rev, attr)]) + + self.assertEqual(len(res), len(self.users)) + expected_order = self.expected_results_binary[attr][rev] + received_order = [x[attr][0] for x in res] + if expected_order != received_order: + print attr + print expected_order + print received_order + self.assertEquals(expected_order, received_order) + + def _test_server_sort_us_english(self): + # Windows doesn't support many matching rules, but does allow + # the locale specific sorts -- if it has the locale installed. + # The most reliable locale is the default US English, which + # won't change the sort order. + + for lang, oid in [('en_US', '1.2.840.113556.1.4.1499'), + ]: + + for attr in self.locale_sorted_keys: + for rev in (0, 1): + res = self.ldb.search(self.ou, + scope=ldb.SCOPE_ONELEVEL, + attrs=[attr], + controls=["server_sort:1:%d:%s:%s" % + (rev, attr, oid)]) + + self.assertTrue(len(res) == len(self.users)) + expected_order = self.expected_results[attr][rev] + received_order = [norm(x[attr][0]) for x in res] + if expected_order != received_order: + print attr, lang + print ['forward', 'reverse'][rev] + print "expected: ", expected_order + print "recieved: ", received_order + print "unnormalised:", [x[attr][0] for x in res] + print "unnormalised: «%s»" % '» «'.join(x[attr][0] + for x in res) + + self.assertEquals(expected_order, received_order) + + +class SimpleSortTests(BaseSortTests): + avoid_tricky_sort = True + + def test_server_sort_default(self): + self._test_server_sort_default() + + def test_server_sort_binary(self): + self._test_server_sort_binary() + + def test_server_sort_us_english(self): + self._test_server_sort_us_english() + + +class UnicodeSortTests(BaseSortTests): + avoid_tricky_sort = False + + def test_server_sort_default(self): + self._test_server_sort_default() + + def test_server_sort_us_english(self): + self._test_server_sort_us_english() + + +if "://" not in host: + if os.path.isfile(host): + host = "tdb://%s" % host + else: + host = "ldap://%s" % host + + +TestProgram(module=__name__, opts=subunitopts) diff --git a/source4/selftest/tests.py b/source4/selftest/tests.py index 32d47a0..931ae45 100755 --- a/source4/selftest/tests.py +++ b/source4/selftest/tests.py @@ -508,6 +508,9 @@ plantestsuite_loadlist("samba4.ldap.dirsync.python(ad_dc_ntvfs)", "ad_dc_ntvfs", plantestsuite_loadlist("samba4.ldap.match_rules.python", "ad_dc_ntvfs", [python, os.path.join(srcdir(), "lib/ldb-samba/tests/match_rules.py"), '$SERVER', '-U"$USERNAME%$PASSWORD"', '--workgroup=$DOMAIN', '$LOADLIST', '$LISTOPT']) plantestsuite_loadlist("samba4.ldap.notification.python(ad_dc_ntvfs)", "ad_dc_ntvfs", [python, os.path.join(samba4srcdir, "dsdb/tests/python/notification.py"), '$SERVER', '-U"$USERNAME%$PASSWORD"', '--workgroup=$DOMAIN', '$LOADLIST', '$LISTOPT']) plantestsuite_loadlist("samba4.ldap.sites.python(ad_dc_ntvfs)", "ad_dc_ntvfs", [python, os.path.join(samba4srcdir, "dsdb/tests/python/sites.py"), '$SERVER', '-U"$USERNAME%$PASSWORD"', '--workgroup=$DOMAIN', '$LOADLIST', '$LISTOPT']) + +plantestsuite_loadlist("samba4.ldap.sort.python(ad_dc_ntvfs)", "ad_dc_ntvfs", [python, os.path.join(samba4srcdir, "dsdb/tests/python/sort.py"), '$SERVER', '-U"$USERNAME%$PASSWORD"', '--workgroup=$DOMAIN', '$LOADLIST', '$LISTOPT']) + for env in ["ad_dc_ntvfs", "fl2000dc", "fl2003dc", "fl2008r2dc"]: plantestsuite_loadlist("samba4.ldap_schema.python(%s)" % env, env, [python, os.path.join(samba4srcdir, "dsdb/tests/python/ldap_schema.py"), '$SERVER', '-U"$USERNAME%$PASSWORD"', '--workgroup=$DOMAIN', '$LOADLIST', '$LISTOPT']) plantestsuite("samba4.ldap.possibleInferiors.python(%s)" % env, env, [python, os.path.join(samba4srcdir, "dsdb/samdb/ldb_modules/tests/possibleinferiors.py"), "ldap://$SERVER", '-U"$USERNAME%$PASSWORD"', "-W$DOMAIN"]) -- 2.5.0 >From ca60c961fd949f07a13f649b9fed45c427608574 Mon Sep 17 00:00:00 2001 From: Douglas Bagnall Date: Tue, 8 Mar 2016 14:43:40 +1300 Subject: [PATCH 2/2] ldb sort: allow sorting on attributes not returned in search The attribute is added to the search request, then peeled off again before the sort module passes the results on. Signed-off-by: Douglas Bagnall --- lib/ldb/modules/sort.c | 44 ++++++++++++++++++++++++++++-- source4/dsdb/tests/python/sort.py | 56 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 98 insertions(+), 2 deletions(-) diff --git a/lib/ldb/modules/sort.c b/lib/ldb/modules/sort.c index 1b762f7..19cf60b 100644 --- a/lib/ldb/modules/sort.c +++ b/lib/ldb/modules/sort.c @@ -56,6 +56,7 @@ struct sort_context { char **referrals; unsigned int num_msgs; unsigned int num_refs; + const char *extra_sort_key; const struct ldb_schema_attribute *a; int sort_result; @@ -162,7 +163,9 @@ static int server_sort_results(struct sort_context *ac) ares->type = LDB_REPLY_ENTRY; ares->message = talloc_move(ares, &ac->msgs[i]); - + if (ac->extra_sort_key) { + ldb_msg_remove_attr(ares->message, ac->extra_sort_key); + } ret = ldb_module_send_entry(ac->req, ares->message, ares->controls); if (ret != LDB_SUCCESS) { return ret; @@ -256,6 +259,9 @@ static int server_sort_search(struct ldb_module *module, struct ldb_request *req struct sort_context *ac; struct ldb_context *ldb; int ret; + const char * const *attrs; + size_t n_attrs, i; + const char *sort_attr; ldb = ldb_module_get_ctx(module); @@ -303,6 +309,40 @@ static int server_sort_search(struct ldb_module *module, struct ldb_request *req } } + /* We are asked to sort on an attribute, and if that attribute is not + already in the search attributes we need to add it (and later + remove it on the return journey). + */ + sort_attr = sort_ctrls[0]->attributeName; + if (req->op.search.attrs == NULL) { + /* This means all non-operational attributes, which means + there's nothing to add. */ + attrs = NULL; + } else { + n_attrs = 0; + while (req->op.search.attrs[n_attrs] != NULL) { + if (sort_attr && + strcmp(req->op.search.attrs[n_attrs], sort_attr) == 0) { + sort_attr = NULL; + } + n_attrs++; + } + + if (sort_attr == NULL) { + attrs = req->op.search.attrs; + } else { + const char **tmp = talloc_array(ac, const char *, n_attrs + 2); + + for (i = 0; i < n_attrs; i++) { + tmp[i] = req->op.search.attrs[i]; + } + ac->extra_sort_key = sort_attr; + tmp[n_attrs] = sort_attr; + tmp[n_attrs + 1] = NULL; + attrs = tmp; + } + } + ac->attributeName = sort_ctrls[0]->attributeName; ac->orderingRule = sort_ctrls[0]->orderingRule; ac->reverse = sort_ctrls[0]->reverse; @@ -311,7 +351,7 @@ static int server_sort_search(struct ldb_module *module, struct ldb_request *req req->op.search.base, req->op.search.scope, req->op.search.tree, - req->op.search.attrs, + attrs, req->controls, ac, server_sort_search_callback, diff --git a/source4/dsdb/tests/python/sort.py b/source4/dsdb/tests/python/sort.py index 436cb8c..c4d2c44 100644 --- a/source4/dsdb/tests/python/sort.py +++ b/source4/dsdb/tests/python/sort.py @@ -273,9 +273,62 @@ class BaseSortTests(samba.tests.TestCase): self.assertEquals(expected_order, received_order) + def _test_server_sort_different_attr(self): + + def cmp_locale(a, b): + return locale.strcoll(a[0], b[0]) + + def cmp_binary(a, b): + return cmp(a[0], b[0]) + + def cmp_numeric(a, b): + return cmp(int(a[0]), int(b[0])) + + # For testing simplicity, the attributes in here need to be + # unique for each user. Otherwise there are multiple possible + # valid answers. + sort_functions = {'cn': cmp_binary, + "employeeNumber": cmp_locale, + "accountExpires": cmp_numeric, + "msTSExpireDate4":cmp_binary} + attrs = sort_functions.keys() + attr_pairs = zip(attrs, attrs[1:] + attrs[:1]) + + for sort_attr, result_attr in attr_pairs: + forward = sorted(((norm(x[sort_attr]), norm(x[result_attr])) + for x in self.users), + cmp=sort_functions[sort_attr]) + reverse = list(reversed(forward)) + + for rev in (0, 1): + res = self.ldb.search(self.ou, + scope=ldb.SCOPE_ONELEVEL, + attrs=[result_attr], + controls=["server_sort:1:%d:%s" % + (rev, sort_attr)]) + self.assertEqual(len(res), len(self.users)) + + expected_order = [x[1] for x in (forward, reverse)[rev]] + received_order = [norm(x[result_attr][0]) for x in res] + + if expected_order != received_order: + print sort_attr, result_attr, ['forward', 'reverse'][rev] + print "expected", expected_order + print "recieved", received_order + print "unnormalised:", [x[result_attr][0] for x in res] + print "unnormalised: «%s»" % '» «'.join(x[result_attr][0] + for x in res) + self.assertEquals(expected_order, received_order) + for x in res: + if sort_attr in x: + self.fail('the search for %s should not return %s' % + (result_attr, sort_attr)) + class SimpleSortTests(BaseSortTests): avoid_tricky_sort = True + def test_server_sort_different_attr(self): + self._test_server_sort_different_attr() def test_server_sort_default(self): self._test_server_sort_default() @@ -296,6 +349,9 @@ class UnicodeSortTests(BaseSortTests): def test_server_sort_us_english(self): self._test_server_sort_us_english() + def test_server_sort_different_attr(self): + self._test_server_sort_different_attr() + if "://" not in host: if os.path.isfile(host): -- 2.5.0