LDAP and UTF-8
Juergen Hasch
Hasch at t-online.de
Tue Feb 19 13:33:07 GMT 2002
Hi Andrew,
Am Sonntag, 17. Februar 2002 22:21 schrieb Andrew Bartlett:
> This looks like the right way to do it. If I understand correctly, the
> UTF8->UTF8 case should be a no-op and thetas the most common situation.
> (Unix is normally set to utf8).
This is my understanding, too.
I made a minor change to the patch by removing one chunk in the patch for
charcnv.c, as Igor Vergeichik recommended.
I dropped the patch to ldap.c for now, it's probably better to do the
conversion directly where the string is received from the ldap function.
Maybe you could apply this, I will work on a patch for ldap.c and winbindd
for UTF-8 conversion.
...Juergen
--- include/charset.orig Sun Feb 17 16:21:43 2002
+++ include/charset.h Sun Feb 17 16:21:46 2002
@@ -19,6 +19,6 @@
*/
/* this defines the charset types used in samba */
-typedef enum {CH_UCS2=0, CH_UNIX=1, CH_DISPLAY=2, CH_DOS=3} charset_t;
+typedef enum {CH_UCS2=0, CH_UNIX=1, CH_DISPLAY=2, CH_DOS=3, CH_UTF8=4} charset_t;
-#define NUM_CHARSETS 4
+#define NUM_CHARSETS 5
--- lib/charcnv.orig Sun Feb 17 16:07:34 2002
+++ lib/charcnv.c Sun Feb 17 16:16:24 2002
@@ -37,6 +37,7 @@
else if (ch == CH_UNIX) ret = lp_unix_charset();
else if (ch == CH_DOS) ret = lp_dos_charset();
else if (ch == CH_DISPLAY) ret = lp_display_charset();
+ else if (ch == CH_UTF8) ret = "UTF-8";
if (!ret || !*ret) ret = "ASCII";
return ret;
@@ -151,7 +159,7 @@
break;
- case EILSEQ: reason="Illegal myltibyte sequence"; break;
+ case EILSEQ: reason="Illegal multibyte sequence"; break;
}
/* smb_panic(reason); */
}
@@ -390,6 +398,37 @@
return len;
}
+/****************************************************************************
+copy a string from a char* src to a unicode destination
+return the number of bytes occupied by the string in the destination
+flags can have:
+ STR_TERMINATE means include the null termination
+ STR_UPPER means uppercase in the destination
+dest_len is the maximum length allowed in the destination. If dest_len
+is -1 then no maxiumum is used
+****************************************************************************/
+int push_utf8(const void *base_ptr, void *dest, const char *src, int dest_len, int flags)
+{
+ int src_len = strlen(src);
+ pstring tmpbuf;
+
+ /* treat a pstring as "unlimited" length */
+ if (dest_len == -1) {
+ dest_len = sizeof(pstring);
+ }
+
+ if (flags & STR_UPPER) {
+ pstrcpy(tmpbuf, src);
+ strupper(tmpbuf);
+ src = tmpbuf;
+ }
+
+ if (flags & STR_TERMINATE) {
+ src_len++;
+ }
+
+ return convert_string(CH_UNIX, CH_UTF8, src, src_len, dest, dest_len);
+}
/****************************************************************************
copy a string from a ucs2 source to a unix char* destination
@@ -435,6 +474,40 @@
return pull_ucs2(NULL, dest, src, sizeof(fstring), -1, STR_TERMINATE);
}
+/****************************************************************************
+copy a string from a utf-8 source to a unix char* destination
+flags can have:
+ STR_TERMINATE means the string in src is null terminated
+if STR_TERMINATE is set then src_len is ignored
+src_len is the length of the source area in bytes
+return the number of bytes occupied by the string in src
+the resulting string in "dest" is always null terminated
+****************************************************************************/
+int pull_utf8(const void *base_ptr, char *dest, const void *src, int dest_len, int src_len, int flags)
+{
+ int ret;
+
+ if (dest_len == -1) {
+ dest_len = sizeof(pstring);
+ }
+
+ if (flags & STR_TERMINATE) src_len = strlen(src)+1;
+
+ ret = convert_string(CH_UTF8, CH_UNIX, src, src_len, dest, dest_len);
+ if (dest_len) dest[MIN(ret, dest_len-1)] = 0;
+
+ return src_len;
+}
+
+int pull_utf8_pstring(char *dest, const void *src)
+{
+ return pull_utf8(NULL, dest, src, sizeof(pstring), -1, STR_TERMINATE);
+}
+
+int pull_utf8_fstring(char *dest, const void *src)
+{
+ return pull_utf8(NULL, dest, src, sizeof(fstring), -1, STR_TERMINATE);
+}
/****************************************************************************
copy a string from a char* src to a unicode or ascii
More information about the samba-technical
mailing list