diff options
author | Peter Stephenson <pws@users.sourceforge.net> | 2007-01-21 22:47:36 +0000 |
---|---|---|
committer | Peter Stephenson <pws@users.sourceforge.net> | 2007-01-21 22:47:36 +0000 |
commit | 553e011320798af097e8de95a1e2a1d2ed6a1a3e (patch) | |
tree | d2e7cd070f33afddb0ce84129e61b4f3034bceb4 | |
parent | f3bf48149a2e86faf35db529d864edd904b52fb4 (diff) | |
download | zsh-553e011320798af097e8de95a1e2a1d2ed6a1a3e.tar.gz zsh-553e011320798af097e8de95a1e2a1d2ed6a1a3e.tar.xz zsh-553e011320798af097e8de95a1e2a1d2ed6a1a3e.zip |
23118: improve sorting to make it work with locales
-rw-r--r-- | ChangeLog | 9 | ||||
-rw-r--r-- | Doc/Zsh/expn.yo | 27 | ||||
-rw-r--r-- | Src/Zle/compcore.c | 7 | ||||
-rw-r--r-- | Src/Zle/computil.c | 2 | ||||
-rw-r--r-- | Src/Zle/zle_tricky.c | 20 | ||||
-rw-r--r-- | Src/builtin.c | 35 | ||||
-rw-r--r-- | Src/glob.c | 5 | ||||
-rw-r--r-- | Src/jobs.c | 4 | ||||
-rw-r--r-- | Src/sort.c | 332 | ||||
-rw-r--r-- | Src/subst.c | 185 | ||||
-rw-r--r-- | Src/utils.c | 11 | ||||
-rw-r--r-- | Src/zsh.h | 45 | ||||
-rw-r--r-- | Src/zsh.mdd | 2 | ||||
-rw-r--r-- | Test/B03print.ztst | 17 | ||||
-rw-r--r-- | Test/D04parameter.ztst | 17 |
15 files changed, 497 insertions, 221 deletions
diff --git a/ChangeLog b/ChangeLog index 9733e7e0e..8bcc4a322 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,12 @@ +2007-01-21 Peter Stephenson <p.w.stephenson@ntlworld.com> + + * 23118: Doc/Zsh/expn.yo, Src/builtin.c, Src/glob.c, Src/jobs.c, + Src/sort.c, Src/subst.c, Src/utils.c, Src/zsh.h, Src/zsh.mdd, + Src/Zle/compcore.c, Src/Zle/computil.c, Src/Zle/zle_tricky.c, + Test/B03print.ztst, Test/D04parameter.ztst: improve sorting, + making it work properly with locales and handling embedded + nulls consistently. + 2007-01-21 Clint Adams <clint@zsh.org> * 23117: arno: Completion/Unix/Command/_yafc: diff --git a/Doc/Zsh/expn.yo b/Doc/Zsh/expn.yo index c9c67bebb..c18a297ad 100644 --- a/Doc/Zsh/expn.yo +++ b/Doc/Zsh/expn.yo @@ -723,9 +723,10 @@ example by using `tt(${(AA)=)var(name)tt(=)...tt(})' to activate field splitting, when creating an associative array. ) item(tt(a))( -With tt(o) or tt(O), sort in array index order. Note that `tt(oa)' is -therefore equivalent to the default but `tt(Oa)' is useful for -obtaining an array's elements in reverse order. +Sort in array index order; when combined with `tt(O)' sort in reverse +array index order. Note that `tt(a)' is therefore equivalent to the +default but `tt(Oa)' is useful for obtaining an array's elements in reverse +order. ) item(tt(c))( With tt(${#)var(name)tt(}), count the total number of characters in an array, @@ -750,7 +751,7 @@ Join the words of arrays together using newline as a separator. This is a shorthand for `tt(pj:\n:)'. ) item(tt(i))( -With tt(o) or tt(O), sort case-independently. +Sort case-insensitively. May be combined with `tt(n)' or `tt(O)'. ) item(tt(k))( If var(name) refers to an associative array, substitute the em(keys) @@ -763,13 +764,25 @@ item(tt(L))( Convert all letters in the result to lower case. ) item(tt(n))( -With tt(o) or tt(O), sort numerically. +Sort decimal numbers numerically; if the first differing +characters of two test strings are not digits, sorting +is lexical. Numbers with initial zeroes +are sorted before those without. Hence the array `tt(foo1 foo02 +foo2 foo3 foo20 foo23)' is sorted into the order shown. Trailing +non-digits are not sorted; the order of `tt(2foo)' and `tt(2bar)' +is not defined. May be combined with `tt(i)' or `tt(O)'. ) item(tt(o))( -Sort the resulting words in ascending order. +Sort the resulting words in ascending order; if this appears on its +own the sorting is lexical and case-sensitive (unless the locale +renders it case-insensitive). Sorting in ascending order is the +default for other forms of sorting, so this is ignored if combined +with `tt(a)', `tt(i)' or `tt(n)'. ) item(tt(O))( -Sort the resulting words in descending order. +Sort the resulting words in descending order; `tt(O)' without `tt(a)', +`tt(i)' or `tt(n)' sorts in reverse lexical order. May be combined +with `tt(a)', `tt(i)' or `tt(n)' to reverse the order of sorting. ) item(tt(P))( This forces the value of the parameter var(name) to be interpreted as a diff --git a/Src/Zle/compcore.c b/Src/Zle/compcore.c index 2259db01a..3e4f690f3 100644 --- a/Src/Zle/compcore.c +++ b/Src/Zle/compcore.c @@ -3028,7 +3028,7 @@ matchcmp(Cmatch *a, Cmatch *b) if ((*b)->disp && !((*b)->flags & CMF_MORDER)) return 1; - return strbpcmp(&((*a)->str), &((*b)->str)); + return zstrbcmp((*a)->str, (*b)->str); } /* This tests whether two matches are equal (would produce the same @@ -3077,8 +3077,9 @@ makearray(LinkList l, int type, int flags, int *np, int *nlp, int *llp) char **ap, **bp, **cp; /* Now sort the array (it contains strings). */ - qsort((void *) rp, n, sizeof(char *), - (int (*) _((const void *, const void *)))strbpcmp); + strmetasort((char **)rp, SORTIT_IGNORING_BACKSLASHES | + (isset(NUMERICGLOBSORT) ? SORTIT_NUMERICALLY : 0), + NULL); /* And delete the ones that occur more than once. */ for (ap = cp = (char **) rp; *ap; ap++) { diff --git a/Src/Zle/computil.c b/Src/Zle/computil.c index ce70dee72..1dbefa589 100644 --- a/Src/Zle/computil.c +++ b/Src/Zle/computil.c @@ -213,7 +213,7 @@ cd_calc() static int cd_sort(const void *a, const void *b) { - return strpcmp(&(*((Cdstr *) a))->sortstr, &(*((Cdstr *) b))->sortstr); + return zstrpcmp((*((Cdstr *) a))->sortstr, (*((Cdstr *) b))->sortstr, 0); } static int diff --git a/Src/Zle/zle_tricky.c b/Src/Zle/zle_tricky.c index 1393027f7..845f74bc5 100644 --- a/Src/Zle/zle_tricky.c +++ b/Src/Zle/zle_tricky.c @@ -2100,28 +2100,26 @@ sfxlen(char *s, char *t) } #endif -/* This is strcmp with ignoring backslashes. */ +/* This is zstrcmp with ignoring backslashes. */ /**/ mod_export int -strbpcmp(char **aa, char **bb) +zstrbcmp(const char *a, const char *b) { - char *a = *aa, *b = *bb; + const char *astart = a; while (*a && *b) { if (*a == '\\') a++; if (*b == '\\') b++; - if (*a != *b) + if (*a != *b || !*a) break; - if (*a) - a++; - if (*b) - b++; + a++; + b++; } if (isset(NUMERICGLOBSORT) && (idigit(*a) || idigit(*b))) { - for (; a > *aa && idigit(a[-1]); a--, b--); + for (; a > astart && idigit(a[-1]); a--, b--); if (idigit(*a) && idigit(*b)) { while (*a == '0') a++; @@ -2310,8 +2308,8 @@ listlist(LinkList l) *p = (char *) getdata(node); *p = NULL; - qsort((void *) data, num, sizeof(char *), - (int (*) _((const void *, const void *))) strbpcmp); + strmetasort((char **)data, SORTIT_IGNORING_BACKSLASHES | + (isset(NUMERICGLOBSORT) ? SORTIT_NUMERICALLY : 0), NULL); for (p = data, lenp = lens; *p; p++, lenp++) { len = *lenp = ZMB_nicewidth(*p) + 2; diff --git a/Src/builtin.c b/Src/builtin.c index 260ba603b..36e829f22 100644 --- a/Src/builtin.c +++ b/Src/builtin.c @@ -617,8 +617,7 @@ bin_set(char *nam, char **args, UNUSED(Options ops), UNUSED(int func)) } } if (sort) - qsort(args, arrlen(args), sizeof(char *), - sort > 0 ? strpcmp : invstrpcmp); + strmetasort(args, sort < 0 ? SORTIT_BACKWARDS : 0, NULL); if (array) { /* create an array with the specified elements */ char **a = NULL, **y; @@ -3603,30 +3602,16 @@ bin_print(char *name, char **args, Options ops, int func) } /* -o and -O -- sort the arguments */ - /* - * TODO: this appears to be yet another of the endless - * chunks of code that didn't get fixed up properly - * to reflect the fact that args contains unmetafied - * strings that may contain NULs with the lengths in - * len. - */ - if (OPT_ISSET(ops,'o')) { - if (fmt && !*args) return 0; - if (OPT_ISSET(ops,'i')) - qsort(args, arrlen(args), sizeof(char *), cstrpcmp); - else - qsort(args, arrlen(args), sizeof(char *), strpcmp); - } else if (OPT_ISSET(ops,'O')) { - if (fmt && !*args) return 0; - if (OPT_ISSET(ops,'i')) - qsort(args, arrlen(args), sizeof(char *), invcstrpcmp); - else - qsort(args, arrlen(args), sizeof(char *), invstrpcmp); + if (OPT_ISSET(ops,'o') || OPT_ISSET(ops,'O')) { + int flags; + + if (fmt && !*args) + return 0; + flags = OPT_ISSET(ops,'i') ? SORTIT_IGNORING_CASE : 0; + if (OPT_ISSET(ops,'O')) + flags |= SORTIT_BACKWARDS; + strmetasort(args, flags, len); } - /* after sorting arguments, recalculate lengths */ - if(OPT_ISSET(ops,'o') || OPT_ISSET(ops,'O')) - for(n = 0; n < argc; n++) - len[n] = strlen(args[n]); /* -c -- output in columns */ if (!fmt && (OPT_ISSET(ops,'c') || OPT_ISSET(ops,'C'))) { diff --git a/Src/glob.c b/Src/glob.c index 394e91d01..afd362c08 100644 --- a/Src/glob.c +++ b/Src/glob.c @@ -855,10 +855,7 @@ gmatchcmp(Gmatch a, Gmatch b) for (i = gf_nsorts, s = gf_sortlist; i; i--, s++) { switch (*s & ~GS_DESC) { case GS_NAME: - if (gf_numsort) - r = nstrpcmp(&b->name, &a->name); - else - r = strpcmp(&b->name, &a->name); + r = zstrcmp(b->name, a->name, gf_numsort ? SORTIT_NUMERICALLY : 0); break; case GS_DEPTH: { diff --git a/Src/jobs.c b/Src/jobs.c index 9dbdc8185..6f8bf0e12 100644 --- a/Src/jobs.c +++ b/Src/jobs.c @@ -1967,13 +1967,13 @@ bin_kill(char *nam, char **argv, UNUSED(Options ops), UNUSED(int func)) if (!strncmp(signame, "SIG", 3)) signame += 3; for (sig = 1; sig <= SIGCOUNT; sig++) - if (!cstrpcmp(sigs + sig, &signame)) + if (!strcasecmp(sigs[sig], signame)) break; if (sig > SIGCOUNT) { int i; for (i = 0; alt_sigs[i].name; i++) - if (!cstrpcmp(&alt_sigs[i].name, &signame)) + if (!strcasecmp(alt_sigs[i].name, signame)) { sig = alt_sigs[i].num; break; diff --git a/Src/sort.c b/Src/sort.c new file mode 100644 index 000000000..2fdb77931 --- /dev/null +++ b/Src/sort.c @@ -0,0 +1,332 @@ +/* + * sort.c - comparison and sorting of strings + * + * This file is part of zsh, the Z shell. + * + * Copyright (c) 1992-2007 Paul Falstad + * All rights reserved. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and to distribute modified versions of this software for any + * purpose, provided that the above copyright notice and the following + * two paragraphs appear in all copies of this software. + * + * In no event shall Paul Falstad or the Zsh Development Group be liable + * to any party for direct, indirect, special, incidental, or consequential + * damages arising out of the use of this software and its documentation, + * even if Paul Falstad and the Zsh Development Group have been advised of + * the possibility of such damage. + * + * Paul Falstad and the Zsh Development Group specifically disclaim any + * warranties, including, but not limited to, the implied warranties of + * merchantability and fitness for a particular purpose. The software + * provided hereunder is on an "as is" basis, and Paul Falstad and the + * Zsh Development Group have no obligation to provide maintenance, + * support, updates, enhancements, or modifications. + * + */ + +#include "zsh.mdh" +#include "sort.pro" + +/* Flag for direction of sort: 1 forwards, -1 reverse */ +static int sortdir; + +/* Flag that sort is numeric */ +static int sortnumeric; + +/**/ +static int +eltpcmp(const void *a, const void *b) +{ + const SortElt ae = *(const SortElt *)a; + const SortElt be = *(const SortElt *)b; + const char *as = ae->cmp; + const char *bs = be->cmp; + int cmp; + + if (ae->len != -1 || be->len != -1) { + /* + * Length recorded. We only do that if there are embedded + * nulls we need to treat as regular characters. + * + * Since we don't know where multibyte characters start, + * but do know that a null character can't occur inside + * one (we are relying on multibyte characters being extensions + * of ASCII), we can compare starting from after the last + * null character that occurred in both strings. + */ + const char *cmpa, *cmpb; + const char *laststarta = as; + int len; + if (ae->len != -1) { + len = ae->len; + if (be->len != -1 && len > be->len) + len = be->len; + } + else + len = be->len; + + for (cmpa = as, cmpb = bs; *cmpa == *cmpb && len--; cmpa++, cmpb++) + if (!*cmpa) + laststarta = cmpa + 1; + if (*cmpa == *cmpb && ae->len != be->len) { + /* + * Special case: one of the strings has finished, but + * another one continues after the NULL. The string + * that's finished sorts below the other. We need + * to handle this here since stroll() or strcmp() + * will just compare the strings as equal. + */ + if (ae->len != -1) { + if (be->len != -1) { + /* + * if a is shorter it's earlier, so return -1 and + * vice versa + */ + return (ae->len - be->len) * sortdir; + } else { + /* + * a has a length and so continues, hence + * b sorts lower. + */ + return sortdir; + } + } else { + /* + * b must continue because it has a recorded length, + * so a sorts lower. + */ + return - sortdir; + } + } + + bs += (laststarta - as); + as += (laststarta - as); + } +#ifdef HAVE_STRCOLL + cmp = strcoll(as, bs); +#endif + if (sortnumeric) { + for (; *as == *bs && *as; as++, bs++); +#ifndef HAVE_STRCOLL + cmp = (int)STOUC(*as) - (int)STOUC(*bs); +#endif + if (idigit(*as) || idigit(*bs)) { + for (; as > *(char **)a && idigit(as[-1]); as--, bs--); + if (idigit(*as) && idigit(*bs)) { + while (*as == '0') + as++; + while (*bs == '0') + bs++; + for (; idigit(*as) && *as == *bs; as++, bs++); + if (idigit(*as) || idigit(*bs)) { + cmp = (int)STOUC(*as) - (int)STOUC(*bs); + while (idigit(*as) && idigit(*bs)) + as++, bs++; + if (idigit(*as) && !idigit(*bs)) + return 1; + if (idigit(*bs) && !idigit(*as)) + return -1; + } + } + } + } +#ifndef HAVE_STRCOLL + else + cmp = strcmp(as, bs); +#endif + + return sortdir * cmp; +} + + +/* + * Front-end to eltpcmp() to compare strings. + * TODO: it would be better to eliminate this altogether by + * making the calling function call into the sort code + * at a higher level. + */ + +/**/ +mod_export int +zstrcmp(const char *as, const char *bs, int sortflags) +{ + struct sortelt ae, be, *aeptr, *beptr; + int oldsortdir = sortdir, oldsortnumeric = sortnumeric, ret; + + ae.cmp = as; + be.cmp = bs; + ae.len = -1; + be.len = -1; + + aeptr = &ae; + beptr = &be; + + sortdir = 1; + sortnumeric = (sortflags & SORTIT_NUMERICALLY) ? 1 : 0; + + ret = eltpcmp(&aeptr, &beptr); + + /* Paranoia: I don't think we ever need to restore these. */ + sortnumeric = oldsortnumeric; + sortdir = oldsortdir; + + return ret; +} + + +/* + * Sort an array of metafied strings. Use an "or" of bit flags + * to decide how to sort. See the SORTIT_* flags in zsh.h. + * + * If unmetalenp is not NULL, the strings in array are already + * unmetafied and unmetalenp is an array containing the corresponding + * lengths. + */ + +/**/ +void +strmetasort(char **array, int sortwhat, int *unmetalenp) +{ + char **arrptr; + /* + * Array of elements containing stuff to sort. Note sortptrarr + * is an array of pointers, since that's more efficient + * for qsort() to manipulate. sortarr is the array of + * structures. + */ + SortElt *sortptrarr, *sortptrarrptr; + SortElt sortarr, sortarrptr; + int oldsortdir, oldsortnumeric, nsort; + + nsort = arrlen(array); + if (nsort < 2) + return; + + pushheap(); + + sortptrarr = (SortElt *) zhalloc(nsort * sizeof(SortElt)); + sortarr = (SortElt) zhalloc(nsort * sizeof(struct sortelt)); + for (arrptr = array, sortptrarrptr = sortptrarr, sortarrptr = sortarr; + *arrptr; arrptr++, sortptrarrptr++, sortarrptr++) { + char *metaptr; + int needlen, needalloc; + *sortptrarrptr = sortarrptr; + sortarrptr->orig = *arrptr; + + if (unmetalenp) { + /* + * Already unmetafied. We just need to check for + * embededded nulls. + */ + int count = unmetalenp[arrptr-array]; + /* Remember this length for sorted array */ + sortarrptr->origlen = count; + for (metaptr = *arrptr; *metaptr != '\0' && count--; metaptr++) + ; + /* *metaptr must now be \0, even if we reached the end */ + needlen = (count != 0); + } else { + /* + * Not yet unmetafied. See if it needs unmetafying. + * If it doesn't, there can't be any embedded nulls, + * since these are metafied. + */ + needlen = 0; + for (metaptr = *arrptr; *metaptr && *metaptr != Meta; + metaptr++); + } + /* + * See if we need to do some special checking. + * Either we're going to need to copy it to transform it, + * or we need to unmetafy it. + */ + if ((needalloc = (sortwhat & + (SORTIT_IGNORING_CASE|SORTIT_IGNORING_BACKSLASHES))) + || *metaptr == Meta) { + char *s, *t, *src = *arrptr, *dst; + int len; + sortarrptr->cmp = dst = (char *)zhalloc(strlen(src) + 1); + + if (unmetalenp) { + /* Already unmetafied and we have the length. */ + len = unmetalenp[arrptr-array]; + } else if (*metaptr != '\0') { + /* + * Needs unmetafying. We need to check for + * embedded nulls while we do this. + */ + char *t = dst + (metaptr - src); + + if (metaptr != src) + memcpy(dst, src, metaptr - src); + while ((*t = *metaptr++)) { + if (*t++ == Meta) { + if ((t[-1] = *metaptr++ ^ 32) == '\0') + needlen = 1; + } + } + len = t - dst; + src = dst; + } else { + /* + * Doesn't need unmetafying. + * This means metaptr is the NULL at the + * end of the string, so we have the length, and + * there are no embedded nulls, so we don't + * need the length later. + * We're only copying the string to transform it + * below. + */ + len = metaptr - src; + } + if (sortwhat & SORTIT_IGNORING_CASE) { + for (s = src, t = dst; s - src != len; ) + *t++ = tulower(*s++); + src = dst; + } + if (sortwhat & SORTIT_IGNORING_BACKSLASHES) { + /* copy null byte, so increment length */ + for (s = src, t = dst; s - src != len+1; ) { + if (*s == '\\') { + s++; + len--; + } + *t++ = *s++; + } + } + /* Do we need to store the length (embedded null)? */ + sortarrptr->len = needlen ? len : -1; + } else { + /* + * We can use the string as is, although it's possible + * we still need to take account of an embedded null. + */ + sortarrptr->cmp = *arrptr; + sortarrptr->len = needlen ? unmetalenp[arrptr-array] : -1; + } + } + /* + * We need to restore sortdir so that calls to + * [n]strcmp work when + */ + oldsortdir = sortdir; + oldsortnumeric = sortnumeric; + + sortdir = (sortwhat & SORTIT_BACKWARDS) ? -1 : 1; + sortnumeric = (sortwhat & SORTIT_NUMERICALLY) ? 1 : 0; + + qsort(sortptrarr, nsort, sizeof(SortElt *), eltpcmp); + + sortnumeric = oldsortnumeric; + sortdir = oldsortdir; + for (arrptr = array, sortptrarrptr = sortptrarr; nsort--; ) { + if (unmetalenp) + unmetalenp[arrptr-array] = (*sortptrarrptr)->origlen; + *arrptr++ = (*sortptrarrptr++)->orig; + } + + popheap(); +} diff --git a/Src/subst.c b/Src/subst.c index cee2e6e5c..484b55b4d 100644 --- a/Src/subst.c +++ b/Src/subst.c @@ -618,146 +618,6 @@ strcatsub(char **d, char *pb, char *pe, char *src, int l, char *s, int glbsub, return dest; } -typedef int (*CompareFn) _((const void *, const void *)); - -/**/ -mod_export int -strpcmp(const void *a, const void *b) -{ -#ifdef HAVE_STRCOLL - return strcoll(*(char **)a, *(char **)b); -#else - return strcmp(*(char **)a, *(char **)b); -#endif -} - -/**/ -int -invstrpcmp(const void *a, const void *b) -{ -#ifdef HAVE_STRCOLL - return -strcoll(*(char **)a, *(char **)b); -#else - return -strcmp(*(char **)a, *(char **)b); -#endif -} - -/**/ -int -cstrpcmp(const void *a, const void *b) -{ -#ifdef HAVE_STRCOLL - VARARR(char, c, strlen(*(char **) a) + 1); - VARARR(char, d, strlen(*(char **) b) + 1); - char *s, *t; - int cmp; - - for (s = *(char **) a, t = c; (*t++ = tulower(*s++));); - for (s = *(char **) b, t = d; (*t++ = tulower(*s++));); - - cmp = strcoll(c, d); - - return cmp; -#else - char *c = *(char **)a, *d = *(char **)b; - - for (; *c && tulower(*c) == tulower(*d); c++, d++); - - return (int)STOUC(tulower(*c)) - (int)STOUC(tulower(*d)); -#endif -} - -/**/ -int -invcstrpcmp(const void *a, const void *b) -{ -#ifdef HAVE_STRCOLL - VARARR(char, c, strlen(*(char **) a) + 1); - VARARR(char, d, strlen(*(char **) b) + 1); - char *s, *t; - int cmp; - - for (s = *(char **) a, t = c; (*t++ = tulower(*s++));); - for (s = *(char **) b, t = d; (*t++ = tulower(*s++));); - - cmp = strcoll(c, d); - - return -cmp; -#else - char *c = *(char **)a, *d = *(char **)b; - - for (; *c && tulower(*c) == tulower(*d); c++, d++); - - return (int)STOUC(tulower(*d)) - (int)STOUC(tulower(*c)); -#endif -} - -/**/ -int -nstrpcmp(const void *a, const void *b) -{ - char *c = *(char **)a, *d = *(char **)b; - int cmp; - -#ifdef HAVE_STRCOLL - cmp = strcoll(c, d); -#endif - for (; *c == *d && *c; c++, d++); -#ifndef HAVE_STRCOLL - cmp = (int)STOUC(*c) - (int)STOUC(*d); -#endif - if (idigit(*c) || idigit(*d)) { - for (; c > *(char **)a && idigit(c[-1]); c--, d--); - if (idigit(*c) && idigit(*d)) { - while (*c == '0') - c++; - while (*d == '0') - d++; - for (; idigit(*c) && *c == *d; c++, d++); - if (idigit(*c) || idigit(*d)) { - cmp = (int)STOUC(*c) - (int)STOUC(*d); - while (idigit(*c) && idigit(*d)) - c++, d++; - if (idigit(*c) && !idigit(*d)) - return 1; - if (idigit(*d) && !idigit(*c)) - return -1; - } - } - } - return cmp; -} - -/**/ -int -invnstrpcmp(const void *a, const void *b) -{ - return -nstrpcmp(a, b); -} - -/**/ -int -instrpcmp(const void *a, const void *b) -{ - VARARR(char, c, strlen(*(char **) a) + 1); - VARARR(char, d, strlen(*(char **) b) + 1); - char **e = (char **)&c; - char **f = (char **)&d; - char *s, *t; - - for (s = *(char **) a, t = c; (*t++ = tulower(*s++));); - for (s = *(char **) b, t = d; (*t++ = tulower(*s++));); - - return nstrpcmp(&e, &f); -} - -/**/ -int -invinstrpcmp(const void *a, const void *b) -{ - return -instrpcmp(a, b); -} - /* * Pad the string str, returning a result from the heap (or str itself, * if it didn't need padding). If str is too large, it will be truncated. @@ -1470,13 +1330,11 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) /* Value from (I) flag, used for ditto. */ int flnum = 0; /* - * sortit is an obscure combination of the settings for (o), (O), - * (i) and (n). casind is (i) and numord is (n); these are - * separate so we can have fun doing the obscure combinatorics later. + * sortit is to be passed to strmetasort(). * indord is the (a) flag, which for consistency doesn't get * combined into sortit. */ - int sortit = 0, casind = 0, numord = 0, indord = 0; + int sortit = SORTIT_ANYOLDHOW, indord = 0; /* (u): straightforward. */ int unique = 0; /* combination of (L), (U) and (C) flags. */ @@ -1693,18 +1551,20 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) break; case 'o': - sortit = 1; + if (!sortit) + sortit |= SORTIT_SOMEHOW; /* sort, no modifiers */ break; case 'O': - sortit = 2; + sortit |= SORTIT_BACKWARDS; break; case 'i': - casind = 1; + sortit |= SORTIT_IGNORING_CASE; break; case 'n': - numord = 1; + sortit |= SORTIT_NUMERICALLY; break; case 'a': + sortit |= SORTIT_SOMEHOW; indord = 1; break; @@ -1870,15 +1730,6 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) s++; } } - /* Sort is done by indexing on sortit-1: - * bit 1: ascending (o)/descending (O) - * bit 2: case sensitive/independent (i) - * bit 3: strict order/numeric (n) - * unless indord (a) is set set, in which case only test for - * descending by assuming only (O) is possible (not verified). - */ - if (sortit) - sortit += (casind << 1) + (numord << 2); /* * premul, postmul specify the padding character to be used @@ -3171,11 +3022,11 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) return n; } /* Handle (o) and (O) and their variants */ - if (sortit) { + if (sortit != SORTIT_ANYOLDHOW) { if (!copied) aval = arrdup(aval); if (indord) { - if (sortit & 2) { + if (sortit & SORTIT_BACKWARDS) { char *copy; char **end = aval + arrlen(aval) - 1, **start = aval; @@ -3187,14 +3038,14 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) } } } else { - static CompareFn sortfn[] = { - strpcmp, invstrpcmp, cstrpcmp, invcstrpcmp, - nstrpcmp, invnstrpcmp, instrpcmp, invinstrpcmp - }; - - i = arrlen(aval); - if (i && (*aval[i-1] || --i)) - qsort(aval, i, sizeof(char *), sortfn[sortit-1]); + /* + * HERE: we tested if the last element of the array + * was not a NULL string. Why the last element? + * Why didn't we expect NULL strings to work? + * Was it just a clumsy way of testing whether there + * was enough in the array to sort? + */ + strmetasort(aval, sortit, NULL); } } if (plan9) { diff --git a/Src/utils.c b/Src/utils.c index 6faf196a9..4aa5b0799 100644 --- a/Src/utils.c +++ b/Src/utils.c @@ -3625,6 +3625,17 @@ metafy(char *buf, int len, int heap) return buf; } + +/* + * Take a null-terminated, metafied string in s into a literal + * representation by converting in place. The length is in *len + * len is non-NULL; if len is NULL, you don't know the length of + * the final string, but if it's to be supplied to some system + * routine that always uses NULL termination, such as a filename + * interpreter, that doesn't matter. Note the NULL termination + * is always copied for purposes of that kind. + */ + /**/ mod_export char * unmetafy(char *s, int *len) diff --git a/Src/zsh.h b/Src/zsh.h index e2eb2544a..dcda38b89 100644 --- a/Src/zsh.h +++ b/Src/zsh.h @@ -1944,6 +1944,51 @@ struct heap { #define ZSIG_ALIAS (1<<3) /* Trap is stored under an alias */ #define ZSIG_SHIFT 4 +/***********/ +/* Sorting */ +/***********/ + +typedef int (*CompareFn) _((const void *, const void *)); + +enum { + SORTIT_ANYOLDHOW = 0, /* Defaults */ + SORTIT_IGNORING_CASE = 1, + SORTIT_NUMERICALLY = 2, + SORTIT_BACKWARDS = 4, + /* + * Ignore backslashes that quote another character---which may + * be another backslash; the second backslash is active. + */ + SORTIT_IGNORING_BACKSLASHES = 8, + /* + * Ignored by strmetasort(); used by paramsubst() to indicate + * there is some sorting to do. + */ + SORTIT_SOMEHOW = 16, +}; + +/* + * Element of array passed to qsort(). + */ +struct sortelt { + /* The original string. */ + char *orig; + /* The string used for comparison. */ + const char *cmp; + /* + * The length of the string if passed down to the sort algorithm. + * Used to sort the lengths together with the strings. + */ + int origlen; + /* + * The length of the string, if needed, else -1. + * The length is only needed if there are embededded nulls. + */ + int len; +}; + +typedef struct sortelt *SortElt; + /************************/ /* Flags to casemodifiy */ /************************/ diff --git a/Src/zsh.mdd b/Src/zsh.mdd index 41602ed7e..4d81a9e30 100644 --- a/Src/zsh.mdd +++ b/Src/zsh.mdd @@ -12,7 +12,7 @@ alwayslink=1 objects="builtin.o compat.o cond.o exec.o glob.o hashtable.o \ hist.o init.o input.o jobs.o lex.o linklist.o loop.o math.o \ mem.o module.o options.o params.o parse.o pattern.o prompt.o signals.o \ -signames.o string.o subst.o text.o utils.o watch.o" +signames.o sort.o string.o subst.o text.o utils.o watch.o" headers="../config.h system.h zsh.h sigcount.h signals.h \ prototypes.h hashtable.h ztype.h" diff --git a/Test/B03print.ztst b/Test/B03print.ztst index e36e55caa..c3ba42b18 100644 --- a/Test/B03print.ztst +++ b/Test/B03print.ztst @@ -263,3 +263,20 @@ printf '%b\n' '\0123' 0:printf handles \0... octal escapes in replacement text >S + + print -lO $'a' $'a\0' $'a\0b' $'a\0b\0' $'a\0b\0a' $'a\0b\0b' $'a\0c' | + while read -r line; do + for (( i = 1; i <= ${#line}; i++ )); do + foo=$line[i] + printf "%02x" $(( #foo )) + done + print + done +0:sorting with embedded nulls +>610063 +>6100620062 +>6100620061 +>61006200 +>610062 +>6100 +>61 diff --git a/Test/D04parameter.ztst b/Test/D04parameter.ztst index a2613dc1f..1caed4479 100644 --- a/Test/D04parameter.ztst +++ b/Test/D04parameter.ztst @@ -886,3 +886,20 @@ >/one >/ >/ + + nully=($'a\0c' $'a\0b\0b' $'a\0b\0a' $'a\0b\0' $'a\0b' $'a\0' $'a') + for string in ${(o)nully}; do + for (( i = 1; i <= ${#string}; i++ )); do + foo=$string[i] + printf "%02x" $(( #foo )) + done + print + done +0:Sorting arrays with embedded nulls +>61 +>6100 +>610062 +>61006200 +>6100620061 +>6100620062 +>610063 |