From 4fc0fadfaa5dc5a586f231f32cdab8a43906f677 Mon Sep 17 00:00:00 2001
From: Peter Stephenson <pws@users.sourceforge.net>
Date: Mon, 9 May 2005 10:46:08 +0000
Subject: Add [[:IFS:]] etc. tests.

---
 ChangeLog         |  5 ++++
 Doc/Zsh/expn.yo   | 87 +++++++++++++++++++++++++++++++++++++++++++++++--------
 Src/pattern.c     | 32 ++++++++++++++++++--
 Test/D02glob.ztst | 25 ++++++++++++++++
 4 files changed, 135 insertions(+), 14 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index a254f0a32..4b61f4ee5 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+2005-05-09  Peter Stephenson  <pws@csr.com>
+
+	* 21211: Doc/Zsh/expn.yo, Src/pattern.c, Test/D02glob.ztst:
+	Add [[:IDENT:]], [[:IFS:]], [[:IFSSPACE:]], [[:WORD:]] tests.
+
 2005-05-08  Bart Schaefer  <schaefer@zsh.org>
 
 	* 21235, 21236: Completion/Unix/Command/_ssh: fix remote filename
diff --git a/Doc/Zsh/expn.yo b/Doc/Zsh/expn.yo
index a16e252e6..a6235222f 100644
--- a/Doc/Zsh/expn.yo
+++ b/Doc/Zsh/expn.yo
@@ -1224,19 +1224,82 @@ A `tt(-)' or `tt(])' may be matched by including it as the
 first character in the list.
 cindex(character classes)
 There are also several named classes of characters, in the form
-`tt([:)var(name)tt(:])' with the following meanings:  `tt([:alnum:])'
-alphanumeric, `tt([:alpha:])' alphabetic,
-`tt([:ascii:])' 7-bit,
-`tt([:blank:])' space or tab,
-`tt([:cntrl:])' control character, `tt([:digit:])' decimal
-digit, `tt([:graph:])' printable character except whitespace,
-`tt([:lower:])' lowercase letter, `tt([:print:])' printable character,
-`tt([:punct:])' printable character neither alphanumeric nor whitespace,
-`tt([:space:])' whitespace character, `tt([:upper:])' uppercase letter, 
-`tt([:xdigit:])' hexadecimal digit.  These use the macros provided by
+`tt([:)var(name)tt(:])' with the following meanings.
+The first set use the macros provided by
 the operating system to test for the given character combinations,
-including any modifications due to local language settings:  see
-manref(ctype)(3).  Note that the square brackets are additional
+including any modifications due to local language settings, see
+manref(ctype)(3):
+
+startitem()
+item(tt([:alnum:]))(
+The character is alphanumeric
+)
+item(tt([:alpha:]))
+(
+The character is alphabetic
+)
+item(tt([:ascii:]))(
+The character is 7-bit, i.e. is a single-byte character without
+the top bit set.
+)
+item(tt([:blank:]))(
+The character is either space or tab
+)
+item(tt([:cntrl:]))(
+The character is a control character
+)
+item(tt([:digit:]))(
+The character is a decimal digit
+)
+item(tt([:graph:]))(
+The character is a printable character other than whitespace
+)
+item(tt([:lower:]))(l
+The character is a lowercase letter
+)
+item(tt([:print:]))(
+The character is printable
+)
+item(tt([:punct:]))(
+The character is printable but neither alphanumeric nor whitespace
+)
+item(tt([:space:]))(
+The character is whitespace
+)
+item(tt([:upper:]))(
+The character is an uppercase letter
+)
+item(tt([:xdigit:]))(
+The character is a hexadecimal digit
+)
+enditem()
+
+Another set of named classes is handled internally by the shell and
+is not sensitive to the locale:
+
+startitem()
+item(tt([:IDENT:]))(
+The character is allowed to form part of a shell identifier, such
+as a parameter name
+)
+item(tt([:IFS:]))(
+The character is used as an input field separator, i.e. is contained in the
+tt(IFS) parameter
+)
+item(tt([:IFSSPACE:]))(
+The character is an IFS white space character; see the documentation
+for tt(IFS) in
+ifzman(the zmanref(zshparams) manual page)\
+ifnzman(noderef(Parameters Used By The Shell))\
+.
+)
+item(tt([:WORD:]))(
+The character is treated as part of a word; this test is sensitive
+to the value of the tt(WORDCHARS) parameter
+)
+enditem()
+
+Note that the square brackets are additional
 to those enclosing the whole set of characters, so to test for a
 single alphanumeric character you need `tt([[:alnum:]])'.  Named
 character sets can be used alongside other types,
diff --git a/Src/pattern.c b/Src/pattern.c
index ed88bb7ce..393d9bf41 100644
--- a/Src/pattern.c
+++ b/Src/pattern.c
@@ -193,8 +193,12 @@ typedef union upat *Upat;
 #define PP_SPACE  11
 #define PP_UPPER  12
 #define PP_XDIGIT 13
-#define PP_UNKWN  14
-#define PP_RANGE  15
+#define PP_IDENT  14
+#define PP_IFS    15
+#define PP_IFSSPACE   16
+#define PP_WORD   17
+#define PP_UNKWN  18
+#define PP_RANGE  19
 
 #define	P_OP(p)		((p)->l & 0xff)
 #define	P_NEXT(p)	((p)->l >> 8)
@@ -1118,6 +1122,14 @@ patcomppiece(int *flagp)
 			    ch = PP_UPPER;
 			else if (!strncmp(patparse, "xdigit", len))
 			    ch = PP_XDIGIT;
+			else if (!strncmp(patparse, "IDENT", len))
+			    ch = PP_IDENT;
+			else if (!strncmp(patparse, "IFS", len))
+			    ch = PP_IFS;
+			else if (!strncmp(patparse, "IFSSPACE", len))
+			    ch = PP_IFSSPACE;
+			else if (!strncmp(patparse, "WORD", len))
+			    ch = PP_WORD;
 			else
 			    ch = PP_UNKWN;
 			patparse = nptr + 2;
@@ -2724,6 +2736,22 @@ patmatchrange(char *range, int ch)
 		if (isxdigit(ch))
 		    return 1;
 		break;
+	    case PP_IDENT:
+		if (iident(ch))
+		    return 1;
+		break;
+	    case PP_IFS:
+		if (isep(ch))
+		    return 1;
+		break;
+	    case PP_IFSSPACE:
+		if (iwsep(ch))
+		    return 1;
+		break;
+	    case PP_WORD:
+		if (iword(ch))
+		    return 1;
+		break;
 	    case PP_RANGE:
 		range++;
 		r1 = STOUC(UNMETA(range));
diff --git a/Test/D02glob.ztst b/Test/D02glob.ztst
index e423176a3..d21aaf8c4 100644
--- a/Test/D02glob.ztst
+++ b/Test/D02glob.ztst
@@ -323,3 +323,28 @@
  print glob.tmp/ra=1.0_et=3.5/???
 0:Bug with intermediate paths with plain strings but tokenized characters
 >glob.tmp/ra=1.0_et=3.5/foo
+
+ doesmatch() {
+   setopt localoptions extendedglob
+   print -n $1 $2\ 
+   if [[ $1 = $~2 ]]; then print yes; else print no; fi;
+ }
+ doesmatch MY_IDENTIFIER '[[:IDENT:]]##'
+ doesmatch YOUR:IDENTIFIER '[[:IDENT:]]##'
+ IFS=$'\n' doesmatch $'\n' '[[:IFS:]]'
+ IFS=' ' doesmatch $'\n' '[[:IFS:]]'
+ IFS=':' doesmatch : '[[:IFSSPACE:]]'
+ IFS=' ' doesmatch ' ' '[[:IFSSPACE:]]'
+ WORDCHARS="" doesmatch / '[[:WORD:]]'
+ WORDCHARS="/" doesmatch / '[[:WORD:]]'
+0:Named character sets handled internally
+>MY_IDENTIFIER [[:IDENT:]]## yes
+>YOUR:IDENTIFIER [[:IDENT:]]## no
+>
+> [[:IFS:]] yes
+>
+> [[:IFS:]] no
+>: [[:IFSSPACE:]] no
+>  [[:IFSSPACE:]] yes
+>/ [[:WORD:]] no
+>/ [[:WORD:]] yes
-- 
cgit 1.4.1