about summary refs log tree commit diff
path: root/Src/sort.c
diff options
context:
space:
mode:
authorPeter Stephenson <pws@users.sourceforge.net>2007-01-21 22:47:36 +0000
committerPeter Stephenson <pws@users.sourceforge.net>2007-01-21 22:47:36 +0000
commit553e011320798af097e8de95a1e2a1d2ed6a1a3e (patch)
treed2e7cd070f33afddb0ce84129e61b4f3034bceb4 /Src/sort.c
parentf3bf48149a2e86faf35db529d864edd904b52fb4 (diff)
downloadzsh-553e011320798af097e8de95a1e2a1d2ed6a1a3e.tar.gz
zsh-553e011320798af097e8de95a1e2a1d2ed6a1a3e.tar.xz
zsh-553e011320798af097e8de95a1e2a1d2ed6a1a3e.zip
23118: improve sorting to make it work with locales
Diffstat (limited to 'Src/sort.c')
-rw-r--r--Src/sort.c332
1 files changed, 332 insertions, 0 deletions
diff --git a/Src/sort.c b/Src/sort.c
new file mode 100644
index 000000000..2fdb77931
--- /dev/null
+++ b/Src/sort.c
@@ -0,0 +1,332 @@
+/*
+ * sort.c - comparison and sorting of strings
+ *
+ * This file is part of zsh, the Z shell.
+ *
+ * Copyright (c) 1992-2007 Paul Falstad
+ * All rights reserved.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and to distribute modified versions of this software for any
+ * purpose, provided that the above copyright notice and the following
+ * two paragraphs appear in all copies of this software.
+ *
+ * In no event shall Paul Falstad or the Zsh Development Group be liable
+ * to any party for direct, indirect, special, incidental, or consequential
+ * damages arising out of the use of this software and its documentation,
+ * even if Paul Falstad and the Zsh Development Group have been advised of
+ * the possibility of such damage.
+ *
+ * Paul Falstad and the Zsh Development Group specifically disclaim any
+ * warranties, including, but not limited to, the implied warranties of
+ * merchantability and fitness for a particular purpose.  The software
+ * provided hereunder is on an "as is" basis, and Paul Falstad and the
+ * Zsh Development Group have no obligation to provide maintenance,
+ * support, updates, enhancements, or modifications.
+ *
+ */
+
+#include "zsh.mdh"
+#include "sort.pro"
+
+/* Flag for direction of sort: 1 forwards, -1 reverse */
+static int sortdir;
+
+/* Flag that sort is numeric */
+static int sortnumeric;
+
+/**/
+static int
+eltpcmp(const void *a, const void *b)
+{
+    const SortElt ae = *(const SortElt *)a;
+    const SortElt be = *(const SortElt *)b;
+    const char *as = ae->cmp;
+    const char *bs = be->cmp;
+    int cmp;
+
+    if (ae->len != -1 || be->len != -1) {
+	/*
+	 * Length recorded.  We only do that if there are embedded
+	 * nulls we need to treat as regular characters.
+	 *
+	 * Since we don't know where multibyte characters start,
+	 * but do know that a null character can't occur inside
+	 * one (we are relying on multibyte characters being extensions
+	 * of ASCII), we can compare starting from after the last
+	 * null character that occurred in both strings.
+	 */
+	const char *cmpa, *cmpb;
+	const char *laststarta = as;
+	int len;
+	if (ae->len != -1) {
+	    len = ae->len;
+	    if (be->len != -1 && len > be->len)
+		len = be->len;
+	}
+	else
+	    len = be->len;
+
+	for (cmpa = as, cmpb = bs; *cmpa == *cmpb && len--; cmpa++, cmpb++)
+	    if (!*cmpa)
+		laststarta = cmpa + 1;
+	if (*cmpa == *cmpb && ae->len != be->len) {
+	    /*
+	     * Special case: one of the strings has finished, but
+	     * another one continues after the NULL.  The string
+	     * that's finished sorts below the other.  We need
+	     * to handle this here since stroll() or strcmp()
+	     * will just compare the strings as equal.
+	     */
+	    if (ae->len != -1) {
+		if (be->len != -1) {
+		    /*
+		     * if a is shorter it's earlier, so return -1 and
+		     * vice versa 
+		     */
+		    return (ae->len - be->len) * sortdir;
+		} else {
+		    /*
+		     * a has a length and so continues, hence
+		     * b sorts lower.
+		     */
+		    return sortdir;
+		}
+	    } else {
+		/*
+		 * b must continue because it has a recorded length,
+		 * so a sorts lower.
+		 */
+		return - sortdir;
+	    }
+	}
+
+	bs += (laststarta - as);
+	as += (laststarta - as);
+    }
+#ifdef HAVE_STRCOLL
+    cmp = strcoll(as, bs);
+#endif
+    if (sortnumeric) {
+	for (; *as == *bs && *as; as++, bs++);
+#ifndef HAVE_STRCOLL
+	cmp = (int)STOUC(*as) - (int)STOUC(*bs);
+#endif
+	if (idigit(*as) || idigit(*bs)) {
+	    for (; as > *(char **)a && idigit(as[-1]); as--, bs--);
+	    if (idigit(*as) && idigit(*bs)) {
+		while (*as == '0')
+		    as++;
+		while (*bs == '0')
+		    bs++;
+		for (; idigit(*as) && *as == *bs; as++, bs++);
+		if (idigit(*as) || idigit(*bs)) {
+		    cmp = (int)STOUC(*as) - (int)STOUC(*bs);
+		    while (idigit(*as) && idigit(*bs))
+			as++, bs++;
+		    if (idigit(*as) && !idigit(*bs))
+			return 1;
+		    if (idigit(*bs) && !idigit(*as))
+			return -1;
+		}
+	    }
+	}
+    }
+#ifndef HAVE_STRCOLL
+    else
+	cmp = strcmp(as, bs);
+#endif	
+
+    return sortdir * cmp;
+}
+
+
+/*
+ * Front-end to eltpcmp() to compare strings.
+ * TODO: it would be better to eliminate this altogether by
+ * making the calling function call into the sort code
+ * at a higher level.
+ */
+
+/**/
+mod_export int
+zstrcmp(const char *as, const char *bs, int sortflags)
+{
+    struct sortelt ae, be, *aeptr, *beptr;
+    int oldsortdir = sortdir, oldsortnumeric = sortnumeric, ret;
+
+    ae.cmp = as;
+    be.cmp = bs;
+    ae.len = -1;
+    be.len = -1;
+
+    aeptr = &ae;
+    beptr = &be;
+
+    sortdir = 1;
+    sortnumeric = (sortflags & SORTIT_NUMERICALLY) ? 1 : 0;
+
+    ret = eltpcmp(&aeptr, &beptr);
+
+    /* Paranoia: I don't think we ever need to restore these. */
+    sortnumeric = oldsortnumeric;
+    sortdir = oldsortdir;
+
+    return ret;
+}
+
+
+/*
+ * Sort an array of metafied strings.  Use an "or" of bit flags
+ * to decide how to sort.  See the SORTIT_* flags in zsh.h.
+ *
+ * If unmetalenp is not NULL, the strings in array are already
+ * unmetafied and unmetalenp is an array containing the corresponding
+ * lengths.
+ */
+
+/**/
+void
+strmetasort(char **array, int sortwhat, int *unmetalenp)
+{
+    char **arrptr;
+    /*
+     * Array of elements containing stuff to sort.  Note sortptrarr
+     * is an array of pointers, since that's more efficient
+     * for qsort() to manipulate.  sortarr is the array of
+     * structures.
+     */
+    SortElt *sortptrarr, *sortptrarrptr;
+    SortElt sortarr, sortarrptr;
+    int oldsortdir, oldsortnumeric, nsort;
+
+    nsort = arrlen(array);
+    if (nsort < 2)
+	return;
+
+    pushheap();
+
+    sortptrarr = (SortElt *) zhalloc(nsort * sizeof(SortElt));
+    sortarr = (SortElt) zhalloc(nsort * sizeof(struct sortelt));
+    for (arrptr = array, sortptrarrptr = sortptrarr, sortarrptr = sortarr;
+	 *arrptr; arrptr++, sortptrarrptr++, sortarrptr++) {
+	char *metaptr;
+	int needlen, needalloc;
+	*sortptrarrptr = sortarrptr;
+	sortarrptr->orig = *arrptr;
+
+	if (unmetalenp) {
+	    /*
+	     * Already unmetafied.  We just need to check for
+	     * embededded nulls.
+	     */
+	    int count = unmetalenp[arrptr-array];
+	    /* Remember this length for sorted array */
+	    sortarrptr->origlen = count;
+	    for (metaptr = *arrptr; *metaptr != '\0' && count--; metaptr++)
+		;
+	    /* *metaptr must now be \0, even if we reached the end */
+	    needlen = (count != 0);
+	} else {
+	    /*
+	     * Not yet unmetafied.  See if it needs unmetafying.
+	     * If it doesn't, there can't be any embedded nulls,
+	     * since these are metafied.
+	     */
+	    needlen = 0;
+	    for (metaptr = *arrptr; *metaptr && *metaptr != Meta;
+		 metaptr++);
+	}
+	/*
+	 * See if we need to do some special checking.
+	 * Either we're going to need to copy it to transform it,
+	 * or we need to unmetafy it.
+	 */
+	if ((needalloc = (sortwhat &
+			  (SORTIT_IGNORING_CASE|SORTIT_IGNORING_BACKSLASHES)))
+	    || *metaptr == Meta) {
+	    char *s, *t, *src = *arrptr, *dst;
+	    int len;
+	    sortarrptr->cmp = dst = (char *)zhalloc(strlen(src) + 1);
+
+	    if (unmetalenp) {
+		/* Already unmetafied and we have the length. */
+		len = unmetalenp[arrptr-array];
+	    } else if (*metaptr != '\0') {
+		/*
+		 * Needs unmetafying.  We need to check for
+		 * embedded nulls while we do this.
+		 */
+		char *t = dst + (metaptr - src);
+
+		if (metaptr != src)
+		    memcpy(dst, src, metaptr - src);
+		while ((*t = *metaptr++)) {
+		    if (*t++ == Meta) {
+			if ((t[-1] = *metaptr++ ^ 32) == '\0')
+			    needlen = 1;
+		    }
+		}
+		len = t - dst;
+		src = dst;
+	    } else {
+		/*
+		 * Doesn't need unmetafying.
+		 * This means metaptr is the NULL at the
+		 * end of the string, so we have the length, and
+		 * there are no embedded nulls, so we don't
+		 * need the length later.
+		 * We're only copying the string to transform it
+		 * below.
+		 */
+		len = metaptr - src;
+	    }
+	    if (sortwhat & SORTIT_IGNORING_CASE) {
+		for (s = src, t = dst; s - src != len; )
+		    *t++ = tulower(*s++);
+		src = dst;
+	    }
+	    if (sortwhat & SORTIT_IGNORING_BACKSLASHES) {
+		/* copy null byte, so increment length */
+		for (s = src, t = dst; s - src != len+1; ) {
+		    if (*s == '\\') {
+			s++;
+			len--;
+		    }
+		    *t++ = *s++;
+		}
+	    }
+	    /* Do we need to store the length (embedded null)? */
+	    sortarrptr->len = needlen ? len : -1;
+	} else {
+	    /*
+	     * We can use the string as is, although it's possible
+	     * we still need to take account of an embedded null.
+	     */
+	    sortarrptr->cmp = *arrptr;
+	    sortarrptr->len = needlen ? unmetalenp[arrptr-array] : -1;
+	}
+    }
+    /*
+     * We need to restore sortdir so that calls to
+     * [n]strcmp work when 
+     */
+    oldsortdir = sortdir;
+    oldsortnumeric = sortnumeric;
+
+    sortdir = (sortwhat & SORTIT_BACKWARDS) ? -1 : 1;
+    sortnumeric = (sortwhat & SORTIT_NUMERICALLY) ? 1 : 0;
+
+    qsort(sortptrarr, nsort, sizeof(SortElt *), eltpcmp);
+
+    sortnumeric = oldsortnumeric;
+    sortdir = oldsortdir;
+    for (arrptr = array, sortptrarrptr = sortptrarr; nsort--; ) {
+	if (unmetalenp)
+	    unmetalenp[arrptr-array] = (*sortptrarrptr)->origlen;
+	*arrptr++ = (*sortptrarrptr++)->orig;
+    }
+
+    popheap();
+}