From 2eacbef91913fe967335812900d43cf2edfa54d9 Mon Sep 17 00:00:00 2001 From: Peter Stephenson Date: Mon, 7 Aug 2017 18:47:43 +0100 Subject: 41499 (with one further tweak): POSIX_STRINGS behaviour. Ignore a terminating delimiter when splitting as separators despite being called separators are terminators. --- ChangeLog | 6 ++++++ Doc/Zsh/options.yo | 30 +++++++++++++++++++++--------- Src/utils.c | 20 ++++++++++++++++++-- Test/E01options.ztst | 41 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 86 insertions(+), 11 deletions(-) diff --git a/ChangeLog b/ChangeLog index c224c675f..f885721ff 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +2017-08-07 Peter Stephenson + + * 41499 (with one further tweak): Src/utils.c, + Doc/Zsh/options.yo, Test/E01options.ztst: With POSIX_STRINGS + ignore terminating separator when splitting. + 2017-08-07 Peter Stephenson * Anthony: 41500: Doc/Zsh/mod_zutil.yo: document '-' to end diff --git a/Doc/Zsh/options.yo b/Doc/Zsh/options.yo index 70092d681..36bd939ad 100644 --- a/Doc/Zsh/options.yo +++ b/Doc/Zsh/options.yo @@ -2193,16 +2193,16 @@ cindex(discarding embedded nulls in $'...') cindex(embedded nulls, in $'...') cindex(nulls, embedded in $'...') item(tt(POSIX_STRINGS) )( -This option affects processing of quoted strings. Currently it only -affects the behaviour of null characters, i.e. character 0 in the -portable character set corresponding to US ASCII. +This option affects processing of quoted strings, and also +splitting of strngs. -When this option is not set, null characters embedded within strings -of the form tt($')var(...)tt(') are treated as ordinary characters. The -entire string is maintained within the shell and output to files where -necessary, although owing to restrictions of the library interface -the string is truncated at the null character in file names, environment -variables, or in arguments to external programs. +When this option is not set, null characters (character 0 in the +portable character set coresponding to US ASCII) that are embedded +within strings of the form tt($')var(...)tt(') are treated as ordinary +characters. The entire string is maintained within the shell and output +to files where necessary, although owing to restrictions of the library +interface the string is truncated at the null character in file names, +environment variables, or in arguments to external programs. When this option is set, the tt($')var(...)tt(') expression is truncated at the null character. Note that remaining parts of the same string @@ -2211,6 +2211,18 @@ beyond the termination of the quotes are not truncated. For example, the command line argument tt(a$'b\0c'd) is treated with the option off as the characters tt(a), tt(b), null, tt(c), tt(d), and with the option on as the characters tt(a), tt(b), tt(d). + +Furthermore, when the option is set, a trailing separator followed by an +empty strings does not cause extra fields to be produced when the string +is split. For example, + +example(var="foo bar " +print -l "${=var}") + +outputs a blank line at the end if tt(POSIXSTRINGS) is not set, but +no blank line if the option is set. Note that empty elements would in +any case be removed if quotation marks were not used. If the separator +is not white space, only the final separator is ignored in this fashion. ) pindex(POSIX_TRAPS) pindex(NO_POSIX_TRAPS) diff --git a/Src/utils.c b/Src/utils.c index 5055d69fe..d30a7b47e 100644 --- a/Src/utils.c +++ b/Src/utils.c @@ -3500,12 +3500,12 @@ skipwsep(char **s) mod_export char ** spacesplit(char *s, int allownull, int heap, int quote) { - char *t, **ret, **ptr; + char *t, **ret, **ptr, **eptr; int l = sizeof(*ret) * (wordcount(s, NULL, -!allownull) + 1); char *(*dup)(const char *) = (heap ? dupstring : ztrdup); /* ### TODO: s/calloc/alloc/ */ - ptr = ret = (char **) (heap ? hcalloc(l) : zshcalloc(l)); + eptr = ptr = ret = (char **) (heap ? hcalloc(l) : zshcalloc(l)); if (quote) { /* @@ -3537,6 +3537,7 @@ spacesplit(char *s, int allownull, int heap, int quote) if (s > t || allownull) { *ptr = (char *) (heap ? zhalloc((s - t) + 1) : zalloc((s - t) + 1)); + eptr = ptr; ztrncpy(*ptr++, t, s - t); } else *ptr++ = dup(nulstring); @@ -3545,6 +3546,21 @@ spacesplit(char *s, int allownull, int heap, int quote) } if (!allownull && t != s) *ptr++ = dup(""); + if (isset(POSIXSTRINGS) && ptr > eptr + 1) { + /* + * Trailing separators do not generate extra fields in POSIX. + * Note this is only the final separator --- if the + * immediately preceding field was null it is still counted. + * So just back up one. + */ + --ptr; + if (!heap) { + char **ret2 = realloc(ret, sizeof(*ret) * (ptr+1-ret)); + ptr -= ret-ret2; + free(ret); + ret = ret2; + } + } *ptr = NULL; return ret; } diff --git a/Test/E01options.ztst b/Test/E01options.ztst index f01d83567..b394e7cf4 100644 --- a/Test/E01options.ztst +++ b/Test/E01options.ztst @@ -1339,3 +1339,44 @@ ?(anon):4: `break' active at end of function scope ?(anon):4: `break' active at end of function scope ?(anon):4: `break' active at end of function scope + + for opt in POSIX_STRINGS NO_POSIX_STRINGS; do + var="foo bar " + (setopt $opt; print -l X "${=var}" Y) + var="foo2::bar2:" + (setopt $opt; IFS=:; print -l X "${=var}" Y) + var="foo3:bar3::" + (setopt $opt; IFS=:; print -l X "${=var}" Y) + done +0:POSIX_STRINGS effect on final delimiters +>X +>foo +>bar +>Y +>X +>foo2 +> +>bar2 +>Y +>X +>foo3 +>bar3 +> +>Y +>X +>foo +>bar +> +>Y +>X +>foo2 +> +>bar2 +> +>Y +>X +>foo3 +>bar3 +> +> +>Y -- cgit 1.4.1