summary refs log tree commit diff
path: root/Src
diff options
context:
space:
mode:
authorPeter Stephenson <pws@users.sourceforge.net>2007-01-22 14:35:12 +0000
committerPeter Stephenson <pws@users.sourceforge.net>2007-01-22 14:35:12 +0000
commite375d5ee8817e7f98d0a2f37cfb7566b8572d0e0 (patch)
tree4fcda72583f7a2f899be950db94b24017aa3e0ce /Src
parentc53aa4adee9236a08d2d88c6e753588760b88f0e (diff)
downloadzsh-e375d5ee8817e7f98d0a2f37cfb7566b8572d0e0.tar.gz
zsh-e375d5ee8817e7f98d0a2f37cfb7566b8572d0e0.tar.xz
zsh-e375d5ee8817e7f98d0a2f37cfb7566b8572d0e0.zip
23119: lower case in sorting properly
Diffstat (limited to 'Src')
-rw-r--r--Src/sort.c48
1 files changed, 45 insertions, 3 deletions
diff --git a/Src/sort.c b/Src/sort.c
index 2fdb77931..1b8507342 100644
--- a/Src/sort.c
+++ b/Src/sort.c
@@ -248,7 +248,8 @@ strmetasort(char **array, int sortwhat, int *unmetalenp)
 	    || *metaptr == Meta) {
 	    char *s, *t, *src = *arrptr, *dst;
 	    int len;
-	    sortarrptr->cmp = dst = (char *)zhalloc(strlen(src) + 1);
+	    sortarrptr->cmp = dst =
+		(char *)zhalloc(((sortwhat & SORTIT_IGNORING_CASE)?2:1)*strlen(src)+1);
 
 	    if (unmetalenp) {
 		/* Already unmetafied and we have the length. */
@@ -283,8 +284,49 @@ strmetasort(char **array, int sortwhat, int *unmetalenp)
 		len = metaptr - src;
 	    }
 	    if (sortwhat & SORTIT_IGNORING_CASE) {
-		for (s = src, t = dst; s - src != len; )
-		    *t++ = tulower(*s++);
+		char *send = src + len;
+#ifdef MULTIBYTE_SUPPORT
+		if (isset(MULTIBYTE)) {
+		    /*
+		     * Lower the case the hard way.  Convert to a wide
+		     * character, process that, and convert back.  We
+		     * don't assume the characters have the same
+		     * multibyte length.  We can't use casemodify()
+		     * because we have unmetafied data, which may have
+		     * been passed down to use.
+		     */
+		    mbstate_t mbsin, mbsout;
+		    int clen;
+		    wchar_t wc;
+		    memset(&mbsin, 0, sizeof(mbstate_t));
+		    memset(&mbsout, 0, sizeof(mbstate_t));
+
+		    for (s = src, t = dst; s < send; ) {
+			clen = mbrtowc(&wc, s, send-s, &mbsin);
+			if (clen < 0) {
+			    /* invalid or unfinished: treat as single bytes */
+			    while (s < send)
+				*t++ = tulower(*s++);
+			    break;
+			}
+			if (clen == 0) {
+			    /* embedded null */
+			    *t++ = '\0';
+			    s++;
+			    continue;
+			}
+			s += clen;
+			wc = towlower(wc);
+			clen = wcrtomb(t, wc, &mbsout);
+			t += clen;
+			DPUTS(clen < 0, "Bad conversion when lowering case");
+		    }
+		    *t = '\0';
+		    len = t - dst;
+		} else
+#endif
+		    for (s = src, t = dst; s < send; )
+			*t++ = tulower(*s++);
 		src = dst;
 	    }
 	    if (sortwhat & SORTIT_IGNORING_BACKSLASHES) {