about summary refs log tree commit diff
diff options
context:
space:
mode:
authorPeter Stephenson <pws@users.sourceforge.net>2007-12-13 20:52:52 +0000
committerPeter Stephenson <pws@users.sourceforge.net>2007-12-13 20:52:52 +0000
commit5d5d1bef6e32c7c622f123069059e0e9a473b536 (patch)
treeb624a270660999292111006d1b2146d91ec45051
parent64c85ae68ab10392ecc6ef6a7a55c48e11ddb58f (diff)
downloadzsh-5d5d1bef6e32c7c622f123069059e0e9a473b536.tar.gz
zsh-5d5d1bef6e32c7c622f123069059e0e9a473b536.tar.xz
zsh-5d5d1bef6e32c7c622f123069059e0e9a473b536.zip
24234: apply typeset parameter flags consistently
-rw-r--r--ChangeLog7
-rw-r--r--Doc/Zsh/expn.yo31
-rw-r--r--Src/params.c125
-rw-r--r--Src/subst.c144
-rw-r--r--Test/B02typeset.ztst41
5 files changed, 193 insertions, 155 deletions
diff --git a/ChangeLog b/ChangeLog
index 196b6a6cf..1a49428c8 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+2007-12-13  Peter Stephenson  <p.w.stephenson@ntlworld.com>
+
+	* 24234: Doc/Zsh/expn.yo, Src/params.c, Src/subst.c,
+	Test/B02typeset.ztst: apply parameter flags defined with
+	typeset etc. consistently so that substitution code layered
+	above always sees the effects.
+
 2007-12-12  Peter Stephenson  <pws@csr.com>
 
 	* users/12325: Doc/Zsh/builtins.yo, Src/builtin.c:
diff --git a/Doc/Zsh/expn.yo b/Doc/Zsh/expn.yo
index f48616580..2803ae994 100644
--- a/Doc/Zsh/expn.yo
+++ b/Doc/Zsh/expn.yo
@@ -1062,7 +1062,12 @@ which is expanded by filename expansion to a full path; the outer
 substitution then applies the modifier tt(:h) and takes the directory part
 of the path.)
 )
-item(tt(2.) em(Parameter Subscripting))(
+time(tt(2.) em(Internal Parameter Flags))(
+Any parameter flags set by one of the tt(typeset) family of commands,
+in particular the tt(L), tt(R), tt(Z), tt(u) and tt(l) flags for padding
+and capitalization, are applied directly to the parameter value.
+)
+item(tt(3.) em(Parameter Subscripting))(
 If the value is a raw parameter reference with a subscript, such as
 tt(${)var(var)tt([3]}), the effect of subscripting is applied directly to
 the parameter.  Subscripts are evaluated left to right; subsequent
@@ -1072,11 +1077,11 @@ character of the first word, but tt(${var[2,4][2]}) is the entire third
 word (the second word of the range of words two through four of the
 original array).  Any number of subscripts may appear.
 )
-item(tt(3.) em(Parameter Name Replacement))(
+item(tt(4.) em(Parameter Name Replacement))(
 The effect of any tt((P)) flag, which treats the value so far as a
 parameter name and replaces it with the corresponding value, is applied.
 )
-item(tt(4.) em(Double-Quoted Joining))(
+item(tt(5.) em(Double-Quoted Joining))(
 If the value after this process is an array, and the substitution
 appears in double quotes, and no tt((@)) flag is present at the current
 level, the words of the value are joined with the first character of the
@@ -1084,7 +1089,7 @@ parameter tt($IFS), by default a space, between each word (single word
 arrays are not modified).  If the tt((j)) flag is present, that is used for
 joining instead of tt($IFS).
 )
-item(tt(5.) em(Nested Subscripting))(
+item(tt(6.) em(Nested Subscripting))(
 Any remaining subscripts (i.e. of a nested substitution) are evaluated at
 this point, based on whether the value is an array or a scalar.  As with
 tt(2.), multiple subscripts can appear.  Note that tt(${foo[2,4][2]}) is
@@ -1093,13 +1098,13 @@ tt("${${(@)foo[2,4]}[2]}") (the nested substitution returns an array in
 both cases), but not to tt("${${foo[2,4]}[2]}") (the nested substitution
 returns a scalar because of the quotes).
 )
-item(tt(6.) em(Modifiers))(
+item(tt(7.) em(Modifiers))(
 Any modifiers, as specified by a trailing `tt(#)', `tt(%)', `tt(/)'
 (possibly doubled) or by a set of modifiers of the form tt(:...) (see
 noderef(Modifiers) in noderef(History Expansion)), are applied to the words
 of the value at this level.
 )
-item(tt(7.) em(Forced Joining))(
+item(tt(8.) em(Forced Joining))(
 If the `tt((j))' flag is present, or no `tt((j))' flag is present but
 the string is to be split as given by rules tt(8.) or tt(9.), and joining
 did not take place at step tt(4.), any words in the value are joined
@@ -1107,36 +1112,36 @@ together using the given string or the first character of tt($IFS) if none.
 Note that the `tt((F))' flag implicitly supplies a string for joining in this
 manner.
 )
-item(tt(8.) em(Forced Splitting))(
+item(tt(9.) em(Forced Splitting))(
 If one of the `tt((s))', `tt((f))' or `tt((z))' flags are present, or the `tt(=)'
 specifier was present (e.g. tt(${=)var(var)tt(})), the word is split on
 occurrences of the specified string, or (for tt(=) with neither of the two
 flags present) any of the characters in tt($IFS).
 )
-item(tt(9.) em(Shell Word Splitting))(
+item(tt(10.) em(Shell Word Splitting))(
 If no `tt((s))', `tt((f))' or `tt(=)' was given, but the word is not
 quoted and the option tt(SH_WORD_SPLIT) is set, the word is split on
 occurrences of any of the characters in tt($IFS).  Note this step, too,
 takes place at all levels of a nested substitution.
 )
-item(tt(10.) em(Uniqueness))(
+item(tt(11.) em(Uniqueness))(
 If the result is an array and the `tt((u))' flag was present, duplicate
 elements are removed from the array.
 )
-item(tt(11.) em(Ordering))(
+item(tt(12.) em(Ordering))(
 If the result is still an array and one of the `tt((o))' or `tt((O))' flags
 was present, the array is reordered.
 )
-item(tt(12.) em(Re-Evaluation))(
+item(tt(13.) em(Re-Evaluation))(
 Any `tt((e))' flag is applied to the value, forcing it to be re-examined
 for new parameter substitutions, but also for command and arithmetic
 substitutions.
 )
-item(tt(13.) em(Padding))(
+item(tt(14.) em(Padding))(
 Any padding of the value by the `tt(LPAR()l.)var(fill)tt(.RPAR())' or
 `tt(LPAR()r.)var(fill)tt(.RPAR())' flags is applied.
 )
-item(tt(14.) em(Semantic Joining))(
+item(tt(15.) em(Semantic Joining))(
 In contexts where expansion semantics requires a single word to
 result, all words are rejoined with the first character of tt(IFS)
 between.  So in `tt(${LPAR()P)tt(RPAR()${LPAR()f)tt(RPAR()lines}})'
diff --git a/Src/params.c b/Src/params.c
index 0fe1a6dd8..9ebb98dcf 100644
--- a/Src/params.c
+++ b/Src/params.c
@@ -1884,11 +1884,134 @@ getstrvalue(Value v)
 	s = v->pm->gsu.s->getfn(v->pm);
 	break;
     default:
-	s = NULL;
+	s = "";
 	DPUTS(1, "BUG: param node without valid type");
 	break;
     }
 
+    if (v->pm->node.flags & (PM_LEFT|PM_RIGHT_B|PM_RIGHT_Z)) {
+	int fwidth = v->pm->width ? v->pm->width : MB_METASTRLEN(s);
+	switch (v->pm->node.flags & (PM_LEFT | PM_RIGHT_B | PM_RIGHT_Z)) {
+	    char *t, *tend;
+	    unsigned int t0;
+
+	case PM_LEFT:
+	case PM_LEFT | PM_RIGHT_Z:
+	    t = s;
+	    if (v->pm->node.flags & PM_RIGHT_Z)
+		while (*t == '0')
+		    t++;
+	    else
+		while (iblank(*t))
+		    t++;
+	    MB_METACHARINIT();
+	    for (tend = t, t0 = 0; t0 < fwidth && *tend; t0++)
+		tend += MB_METACHARLEN(tend);
+	    /*
+	     * t0 is the number of characters from t used,
+	     * hence (fwidth - t0) is the number of padding
+	     * characters.  fwidth is a misnomer: we use
+	     * character counts, not character widths.
+	     *
+	     * (tend - t) is the number of bytes we need
+	     * to get fwidth characters or the entire string;
+	     * the characters may be multiple bytes.
+	     */
+	    fwidth -= t0; /* padding chars remaining */
+	    t0 = tend - t; /* bytes to copy from string */
+	    s = (char *) hcalloc(t0 + fwidth + 1);
+	    memcpy(s, t, t0);
+	    if (fwidth)
+		memset(s + t0, ' ', fwidth);
+	    s[t0 + fwidth] = '\0';
+	    break;
+	case PM_RIGHT_B:
+	case PM_RIGHT_Z:
+	case PM_RIGHT_Z | PM_RIGHT_B:
+	    {
+		int zero = 1;
+		/* Calculate length in possibly multibyte chars */
+		unsigned int charlen = MB_METASTRLEN(s);
+
+		if (charlen < fwidth) {
+		    char *valprefend = s;
+		    int preflen;
+		    if (v->pm->node.flags & PM_RIGHT_Z) {
+			/*
+			 * This is a documented feature: when deciding
+			 * whether to pad with zeroes, ignore
+			 * leading blanks already in the value;
+			 * only look for numbers after that.
+			 * Not sure how useful this really is.
+			 * It's certainly confusing to code around.
+			 */
+			for (t = s; iblank(*t); t++)
+			    ;
+			/*
+			 * Allow padding after initial minus
+			 * for numeric variables.
+			 */
+			if ((v->pm->node.flags &
+			     (PM_INTEGER|PM_EFLOAT|PM_FFLOAT)) &&
+			    *t == '-')
+			    t++;
+			/*
+			 * Allow padding after initial 0x or
+			 * base# for integer variables.
+			 */
+			if (v->pm->node.flags & PM_INTEGER) {
+			    if (isset(CBASES) &&
+				t[0] == '0' && t[1] == 'x')
+				t += 2;
+			    else if ((valprefend = strchr(t, '#')))
+				t = valprefend + 1;
+			}
+			valprefend = t;
+			if (!*t)
+			    zero = 0;
+			else if (v->pm->node.flags &
+				 (PM_INTEGER|PM_EFLOAT|PM_FFLOAT)) {
+			    /* zero always OK */
+			} else if (!idigit(*t))
+			    zero = 0;
+		    }
+		    /* number of characters needed for padding */
+		    fwidth -= charlen;
+		    /* bytes from original string */
+		    t0 = strlen(s);
+		    t = (char *) hcalloc(fwidth + t0 + 1);
+		    /* prefix guaranteed to be single byte chars */
+		    preflen = valprefend - s;
+		    memset(t + preflen, 
+			   (((v->pm->node.flags & PM_RIGHT_B)
+			     || !zero) ?       ' ' : '0'), fwidth);
+		    /*
+		     * Copy - or 0x or base# before any padding
+		     * zeroes.
+		     */
+		    if (preflen)
+			memcpy(t, s, preflen);
+		    memcpy(t + preflen + fwidth,
+			   valprefend, t0 - preflen);
+		    t[fwidth + t0] = '\0';
+		    s = t;
+		} else {
+		    /* Need to skip (charlen - fwidth) chars */
+		    for (t0 = charlen - fwidth; t0; t0--)
+			s += MB_METACHARLEN(s);
+		}
+	    }
+	    break;
+	}
+    }
+    switch (v->pm->node.flags & (PM_LOWER | PM_UPPER)) {
+    case PM_LOWER:
+	s = casemodify(s, CASMOD_LOWER);
+	break;
+    case PM_UPPER:
+	s = casemodify(s, CASMOD_UPPER);
+	break;
+    }
     if (v->start == 0 && v->end == -1)
 	return s;
 
diff --git a/Src/subst.c b/Src/subst.c
index e586ede9a..e8257163b 100644
--- a/Src/subst.c
+++ b/Src/subst.c
@@ -1320,11 +1320,6 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
     /* Scalar and array value, see isarr above */
     char *val = NULL, **aval = NULL;
     /*
-     * Padding based on setting in parameter rather than substitution
-     * flags.  This is only used locally.
-     */
-    unsigned int fwidth = 0;
-    /*
      * vbuf and v are both used to retrieve parameter values; this
      * is a kludge, we pass down vbuf and it may or may not return v.
      */
@@ -2061,143 +2056,12 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
 	    }
 	    if (!vunset) {
 		/*
-		 * There really is a value.  Apply any necessary
-		 * padding or case transformation.  Note these
-		 * are the per-parameter transformations specified
-		 * with typeset, not the per-substitution ones set
-		 * by flags.  TODO: maybe therefore this would
-		 * be more consistent if moved into getstrvalue()?
-		 * Bet that's easier said than done.
-		 *
-		 * TODO: use string widths.  In fact, shouldn't the
-		 * strlen()s be ztrlen()s anyway?
+		 * There really is a value.  Padding and case
+		 * transformations used to be handled here, but
+		 * are now handled in getstrvalue() for greater
+		 * consistency.
 		 */
 		val = getstrvalue(v);
-		fwidth = v->pm->width ? v->pm->width : (int)strlen(val);
-		switch (v->pm->node.flags & (PM_LEFT | PM_RIGHT_B | PM_RIGHT_Z)) {
-		    char *t, *tend;
-		    unsigned int t0;
-
-		case PM_LEFT:
-		case PM_LEFT | PM_RIGHT_Z:
-		    t = val;
-		    if (v->pm->node.flags & PM_RIGHT_Z)
-			while (*t == '0')
-			    t++;
-		    else
-			while (iblank(*t))
-			    t++;
-		    MB_METACHARINIT();
-		    for (tend = t, t0 = 0; t0 < fwidth && *tend; t0++)
-			tend += MB_METACHARLEN(tend);
-		    /*
-		     * t0 is the number of characters from t used,
-		     * hence (fwidth - t0) is the number of padding
-		     * characters.  fwidth is a misnomer: we use
-		     * character counts, not character widths.
-		     *
-		     * (tend - t) is the number of bytes we need
-		     * to get fwidth characters or the entire string;
-		     * the characters may be multiple bytes.
-		     */
-		    fwidth -= t0; /* padding chars remaining */
-		    t0 = tend - t; /* bytes to copy from string */
-		    val = (char *) hcalloc(t0 + fwidth + 1);
-		    memcpy(val, t, t0);
-		    if (fwidth)
-			memset(val + t0, ' ', fwidth);
-		    val[t0 + fwidth] = '\0';
-		    copied = 1;
-		    break;
-		case PM_RIGHT_B:
-		case PM_RIGHT_Z:
-		case PM_RIGHT_Z | PM_RIGHT_B:
-		    {
-			int zero = 1;
-			/* Calculate length in possibly multibyte chars */
-			unsigned int charlen = MB_METASTRLEN(val);
-
-			if (charlen < fwidth) {
-			    char *valprefend = val;
-			    int preflen;
-			    if (v->pm->node.flags & PM_RIGHT_Z) {
-				/*
-				 * This is a documented feature: when deciding
-				 * whether to pad with zeroes, ignore
-				 * leading blanks already in the value;
-				 * only look for numbers after that.
-				 * Not sure how useful this really is.
-				 * It's certainly confusing to code around.
-				 */
-				for (t = val; iblank(*t); t++)
-				    ;
-				/*
-				 * Allow padding after initial minus
-				 * for numeric variables.
-				 */
-				if ((v->pm->node.flags &
-				     (PM_INTEGER|PM_EFLOAT|PM_FFLOAT)) &&
-				    *t == '-')
-				    t++;
-				/*
-				 * Allow padding after initial 0x or
-				 * base# for integer variables.
-				 */
-				if (v->pm->node.flags & PM_INTEGER) {
-				    if (isset(CBASES) &&
-					t[0] == '0' && t[1] == 'x')
-					t += 2;
-				    else if ((valprefend = strchr(t, '#')))
-					t = valprefend + 1;
-				}
-				valprefend = t;
-				if (!*t)
-				    zero = 0;
-				else if (v->pm->node.flags &
-					 (PM_INTEGER|PM_EFLOAT|PM_FFLOAT)) {
-				    /* zero always OK */
-				} else if (!idigit(*t))
-				    zero = 0;
-			    }
-			    /* number of characters needed for padding */
-			    fwidth -= charlen;
-			    /* bytes from original string */
-			    t0 = strlen(val);
-			    t = (char *) hcalloc(fwidth + t0 + 1);
-			    /* prefix guaranteed to be single byte chars */
-			    preflen = valprefend - val;
-			    memset(t + preflen, 
-				   (((v->pm->node.flags & PM_RIGHT_B)
-				     || !zero) ?       ' ' : '0'), fwidth);
-			    /*
-			     * Copy - or 0x or base# before any padding
-			     * zeroes.
-			     */
-			    if (preflen)
-				memcpy(t, val, preflen);
-			    memcpy(t + preflen + fwidth,
-				   valprefend, t0 - preflen);
-			    t[fwidth + t0] = '\0';
-			    val = t;
-			    copied = 1;
-			} else {
-			    /* Need to skip (charlen - fwidth) chars */
-			    for (t0 = charlen - fwidth; t0; t0--)
-				val += MB_METACHARLEN(val);
-			}
-		    }
-		    break;
-		}
-		switch (v->pm->node.flags & (PM_LOWER | PM_UPPER)) {
-		case PM_LOWER:
-		    val = casemodify(val, CASMOD_LOWER);
-		    copied = 1;
-		    break;
-		case PM_UPPER:
-		    val = casemodify(val, CASMOD_UPPER);
-		    copied = 1;
-		    break;
-		}
 	    }
 	}
 	/*
diff --git a/Test/B02typeset.ztst b/Test/B02typeset.ztst
index 1ec35183c..e9837a1c7 100644
--- a/Test/B02typeset.ztst
+++ b/Test/B02typeset.ztst
@@ -18,7 +18,6 @@
 #  Function tracing (typeset -ft)		E02xtrace
 
 # Not yet tested:
-#  Case conversion (-l, -u)
 #  Assorted illegal flag combinations
 
 %prep
@@ -339,6 +338,28 @@
 >'0x0000002B'
 >'-0x000002B'
 
+ setopt cbases
+ integer -Z 10 -i 16 foozi16c
+ for foozi16c in 0x1234 -0x1234; do
+   for (( i = 1; i <= 5; i++ )); do
+       print "'${foozi16c[i,11-i]}'"
+   done
+   print "'${foozi16c[-2]}'"
+ done
+0:Extracting substrings from padded integers
+>'0x00001234'
+>'x0000123'
+>'000012'
+>'0001'
+>'00'
+>'3'
+>'-0x0001234'
+>'0x000123'
+>'x00012'
+>'0001'
+>'00'
+>'3'
+
  typeset -F 3 -Z 10 foozf
  for foozf in 3.14159 -3.14159 4 -4; do
    print "'$foozf'"
@@ -405,3 +426,21 @@
 >FOOENV=BAR
 >Exec
 >Unset
+
+ local case1=upper
+ typeset -u case1
+ print $case1
+ UPPER="VALUE OF \$UPPER"
+ print ${(P)case1}
+0:Upper case conversion
+>UPPER
+>VALUE OF $UPPER
+
+ local case2=LOWER
+ typeset -l case2
+ print $case2
+ lower="value of \$lower"
+ print ${(P)case2}
+0:Lower case conversion
+>lower
+>value of $lower