about summary refs log tree commit diff
diff options
context:
space:
mode:
authorPeter Stephenson <pws@users.sourceforge.net>2010-11-18 10:07:55 +0000
committerPeter Stephenson <pws@users.sourceforge.net>2010-11-18 10:07:55 +0000
commit23bdfc7fd2a012d5205ed22d18eb39e41c8fbc95 (patch)
tree80d7f0e6c868a1283f3275cf88e08b8c86d509c1
parent851b8e151004ec729dad9c8c7867bbf67649df8e (diff)
downloadzsh-23bdfc7fd2a012d5205ed22d18eb39e41c8fbc95.tar.gz
zsh-23bdfc7fd2a012d5205ed22d18eb39e41c8fbc95.tar.xz
zsh-23bdfc7fd2a012d5205ed22d18eb39e41c8fbc95.zip
28418: add ${NAME:OFFSET:LENGTH} substitution
-rw-r--r--ChangeLog8
-rw-r--r--Doc/Zsh/expn.yo39
-rw-r--r--Src/lex.c26
-rw-r--r--Src/params.c4
-rw-r--r--Src/subst.c128
-rw-r--r--Test/D04parameter.ztst46
6 files changed, 241 insertions, 10 deletions
diff --git a/ChangeLog b/ChangeLog
index dd3873860..91b8d964d 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+2010-11-18  Peter Stephenson  <pws@csr.com>
+
+	* 28418: Doc/Zsh/expn.yo, Src/lex.c, Src/params.c, Src/subst.c,
+	Test/D04parameter.ztst: add ${NAME:OFFSET} and
+	${NAME:OFFSET:LENGTH} substitution syntax.
+
 2010-11-17  Peter Stephenson  <pws@csr.com>
 
 	* 28377: Doc/Zsh/grammar.yo: document more alias problems.
@@ -13819,5 +13825,5 @@
 
 *****************************************************
 * This is used by the shell to define $ZSH_PATCHLEVEL
-* $Revision: 1.5121 $
+* $Revision: 1.5122 $
 *****************************************************
diff --git a/Doc/Zsh/expn.yo b/Doc/Zsh/expn.yo
index 64fcd74e3..6f29a8778 100644
--- a/Doc/Zsh/expn.yo
+++ b/Doc/Zsh/expn.yo
@@ -585,6 +585,45 @@ If var(name) is an array
 the matching array elements are removed (use the `tt((M))' flag to
 remove the non-matched elements).
 )
+xitem(tt(${)var(name)tt(:)var(offset)tt(}))
+item(tt(${)var(name)tt(:)var(offset)tt(:)var(length)tt(}))(
+This syntax gives effects similar to parameter subscripting
+in the form tt($)var(name)tt({)var(offset)tt(,)var(end)tt(}) but in
+a form compatible with other shells.
+
+If the variable var(name) is a scalar, substitute the contents
+starting from offset var(offset); if var(name) is an array,
+substitute elements from element var(offset).  If var(length) is
+given, substitute that many characters or elements, otherwise the
+entire rest of the scalar or array.
+
+var(offset) is treated similarly to a parameter subscript:
+the offset of the first character or element in var(name)
+is 0 if the option tt(KSH_ARRAYS) is set, else 1; a negative
+subscript counts backwards so that -1 corresponds to the last
+character or element.
+
+var(length) is always treated directly as a length and hence may not be
+negative.
+
+var(offset) and var(length) undergo the same set of shell substitutions
+as for scalar assignment; in addition, they are then subject to arithmetic
+evaluation.  Hence, for example
+
+example(print ${foo:3}
+print ${foo: 1 + 2}
+print ${foo:$(( 1 + 2))}
+print ${foo:$(echo 1 + 2)})
+
+all have the same effect.
+
+Note that if var(offset) is negative, the tt(-) may not appear immediately
+after the tt(:) as this indicates the
+tt(${)var(name)tt(:-)var(word)tt(}) form of substitution; a space
+may be inserted before the tt(-).  Furthermore, neither var(offset) nor
+var(length) may begin with an alphabetic character or tt(&) as these are
+used to indicate history-style modifiers.
+)
 xitem(tt(${)var(name)tt(/)var(pattern)tt(/)var(repl)tt(}))
 item(tt(${)var(name)tt(//)var(pattern)tt(/)var(repl)tt(}))(
 Replace the longest possible match of var(pattern) in the expansion of
diff --git a/Src/lex.c b/Src/lex.c
index 28899fef0..fdb4b98ac 100644
--- a/Src/lex.c
+++ b/Src/lex.c
@@ -1398,7 +1398,12 @@ gettokstr(int c, int sub)
 }
 
 
-/* Return non-zero for error (character to unget), else zero */
+/*
+ * Parse input as if in double quotes.
+ * endchar is the end character to expect.
+ * sub has got something to do with whether we are doing quoted substitution.
+ * Return non-zero for error (character to unget), else zero
+ */
 
 /**/
 static int
@@ -1591,14 +1596,20 @@ parsestrnoerr(char *s)
     return err;
 }
 
+/*
+ * Parse a subscript in string s.
+ * sub is passed down to dquote_parse().
+ * endchar is the final character.
+ * Return the next character, or NULL.
+ */
 /**/
 mod_export char *
-parse_subscript(char *s, int sub)
+parse_subscript(char *s, int sub, int endchar)
 {
     int l = strlen(s), err;
     char *t;
 
-    if (!*s || *s == ']')
+    if (!*s || *s == endchar)
 	return 0;
     lexsave();
     untokenize(t = dupstring(s));
@@ -1607,15 +1618,16 @@ parse_subscript(char *s, int sub)
     len = 0;
     bptr = tokstr = s;
     bsiz = l + 1;
-    err = dquote_parse(']', sub);
+    err = dquote_parse(endchar, sub);
     if (err) {
 	err = *bptr;
-	*bptr = 0;
+	*bptr = '\0';
 	untokenize(s);
 	*bptr = err;
-	s = 0;
-    } else
+	s = NULL;
+    } else {
 	s = bptr;
+    }
     strinend();
     inpop();
     DPUTS(cmdsp, "BUG: parse_subscript: cmdstack not empty.");
diff --git a/Src/params.c b/Src/params.c
index 7ac33b912..92e0e5368 100644
--- a/Src/params.c
+++ b/Src/params.c
@@ -1013,7 +1013,7 @@ isident(char *s)
 	return 0;
 
     /* Require balanced [ ] pairs with something between */
-    if (!(ss = parse_subscript(++ss, 1)))
+    if (!(ss = parse_subscript(++ss, 1, ']')))
 	return 0;
     untokenize(s);
     return !ss[1];
@@ -1628,7 +1628,7 @@ getindex(char **pptr, Value v, int flags)
 
     *s++ = '[';
     /* Error handled after untokenizing */
-    s = parse_subscript(s, flags & SCANPM_DQUOTED);
+    s = parse_subscript(s, flags & SCANPM_DQUOTED, ']');
     /* Now we untokenize everything except inull() markers so we can check *
      * for the '*' and '@' special subscripts.  The inull()s are removed  *
      * in getarg() after we know whether we're doing reverse indexing.    */
diff --git a/Src/subst.c b/Src/subst.c
index 5f65945a5..c0fb38a48 100644
--- a/Src/subst.c
+++ b/Src/subst.c
@@ -1371,6 +1371,43 @@ untok_and_escape(char *s, int escapes, int tok_arg)
     return dst;
 }
 
+/*
+ * See if an argument str looks like a subscript or length following
+ * a colon and parse it.  It must be followed by a ':' or nothing.
+ * If this succeeds, expand and return the evaulated expression if
+ * found, else return NULL.
+ *
+ * We assume this is what is meant if the first character is not
+ * an alphabetic character or '&', which signify modifiers.
+ *
+ * Set *endp to point to the next character following.
+ */
+static char *
+check_colon_subscript(char *str, char **endp)
+{
+    int sav;
+
+    /* Could this be a modifier (or empty)? */
+    if (!*str || ialpha(*str) || *str == '&')
+	return NULL;
+
+    *endp = parse_subscript(str, 0, ':');
+    if (!*endp) {
+	/* No trailing colon? */
+	*endp = parse_subscript(str, 0, '\0');
+	if (!*endp)
+	    return NULL;
+    }
+    sav = **endp;
+    **endp = '\0';
+    if (parsestr(str = dupstring(str)))
+	return NULL;
+    singsub(&str);
+
+    **endp = sav;
+    return str;
+}
+
 /* parameter substitution */
 
 #define	isstring(c) ((c) == '$' || (char)(c) == String || (char)(c) == Qstring)
@@ -2683,6 +2720,97 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
 	    }
 	    val = dupstring("");
 	}
+	if (colf && inbrace) {
+	    /*
+	     * Look for ${PARAM:OFFSET} or ${PARAM:OFFSET:LENGTH}.
+	     * This must appear before modifiers.  For compatibility
+	     * with bash we perform both standard string substitutions
+	     * and math eval.
+	     */
+	    char *check_offset2;
+	    char *check_offset = check_colon_subscript(s, &check_offset2);
+	    if (check_offset) {
+		zlong offset = mathevali(check_offset);
+		zlong length = (zlong)-1;
+		if (errflag)
+		    return NULL;
+		if ((*check_offset2 && *check_offset2 != ':')) {
+		    zerr("invalid subscript: %s", check_offset);
+		    return NULL;
+		}
+		if (*check_offset2) {
+		    check_offset = check_colon_subscript(check_offset2 + 1,
+							 &check_offset2);
+		    if (*check_offset2 && *check_offset2 != ':') {
+			zerr("invalid length: %s", check_offset);
+			return NULL;
+		    }
+		    length = mathevali(check_offset);
+		    if (errflag)
+			return NULL;
+		    if (length < (zlong)0) {
+			zerr("invalid length: %s", check_offset);
+			return NULL;
+		    }
+		}
+		if (!isset(KSHARRAYS) && offset > 0)
+		    offset--;
+		if (isarr) {
+		    int alen = arrlen(aval), count;
+		    char **srcptr, **dstptr, **newarr;
+
+		    if (offset < 0) {
+			offset += alen;
+			if (offset < 0)
+			    offset = 0;
+		    }
+		    if (length < 0)
+		      length = alen;
+		    if (offset > alen)
+			offset = alen;
+		    if (offset + length > alen)
+			length = alen - offset;
+		    count = length;
+		    srcptr = aval + offset;
+		    newarr = dstptr = (char **)
+			zhalloc((length+1)*sizeof(char *));
+		    while (count--)
+			*dstptr++ = dupstring(*srcptr++);
+		    *dstptr = (char *)NULL;
+		    aval = newarr;
+		} else {
+		    char *sptr, *eptr;
+		    if (offset < 0) {
+			MB_METACHARINIT();
+			for (sptr = val; *sptr; ) {
+			    sptr += MB_METACHARLEN(sptr);
+			    offset++;
+			}
+			if (offset < 0)
+			    offset = 0;
+		    }
+		    MB_METACHARINIT();
+		    for (sptr = val; *sptr && offset; ) {
+			sptr += MB_METACHARLEN(sptr);
+			offset--;
+		    }
+		    if (length >= 0) {
+			for (eptr = sptr; *eptr && length; ) {
+			    eptr += MB_METACHARLEN(eptr);
+			    length--;
+			}
+			val = dupstrpfx(sptr, eptr - sptr);
+		    } else {
+			val = dupstring(sptr);
+		    }
+		}
+		if (!*check_offset2) {
+		    colf = 0;
+		} else {
+		    s = check_offset2 + 1;
+		}
+	    }
+	}
 	if (colf) {
 	    /*
 	     * History style colon modifiers.  May need to apply
diff --git a/Test/D04parameter.ztst b/Test/D04parameter.ztst
index fe978263f..7c6a465af 100644
--- a/Test/D04parameter.ztst
+++ b/Test/D04parameter.ztst
@@ -1256,3 +1256,49 @@
 0:$ZSH_EVAL_CONTEXT and $zsh_eval_context
 >toplevel
 >shfunc cmdsubst
+
+   foo="123456789"
+   print ${foo:3}
+   print ${foo: 1 + 3}
+   print ${foo:$(( 2 + 3))}
+   print ${foo:$(echo 3 + 3)}
+   print ${foo:3:1}
+   print ${foo: 1 + 3:(4-2)/2}
+   print ${foo:$(( 2 + 3)):$(( 7 - 6 ))}
+   print ${foo:$(echo 3 + 3):`echo 4 - 3`}
+   print ${foo: -1}
+   print ${foo: -10}
+0:Bash-style subscripts, scalar
+>3456789
+>456789
+>56789
+>6789
+>3
+>4
+>5
+>6
+>9
+>123456789
+
+   foo=(1 2 3 4 5 6 7 8 9)
+   print ${foo:3}
+   print ${foo: 1 + 3}
+   print ${foo:$(( 2 + 3))}
+   print ${foo:$(echo 3 + 3)}
+   print ${foo:3:1}
+   print ${foo: 1 + 3:(4-2)/2}
+   print ${foo:$(( 2 + 3)):$(( 7 - 6 ))}
+   print ${foo:$(echo 3 + 3):`echo 4 - 3`}
+   print ${foo: -1}
+   print ${foo: -10}
+0:Bash-style subscripts, array
+>3 4 5 6 7 8 9
+>4 5 6 7 8 9
+>5 6 7 8 9
+>6 7 8 9
+>3
+>4
+>5
+>6
+>9
+>1 2 3 4 5 6 7 8 9