From e86b3cce47e62c263301322ce24b56cf04b8cdb8 Mon Sep 17 00:00:00 2001
From: Peter Stephenson
Date: Thu, 10 Sep 2015 20:05:48 +0100
Subject: 36478: Add [[:INCOMPLETE:]] and [[:INVALID:]] pattern tests.
---
Src/pattern.c | 43 ++++++++++++++++++++++++++++++++++---------
1 file changed, 34 insertions(+), 9 deletions(-)
(limited to 'Src/pattern.c')
diff --git a/Src/pattern.c b/Src/pattern.c
index b4ba33e49..3b55ccf1c 100644
--- a/Src/pattern.c
+++ b/Src/pattern.c
@@ -145,7 +145,7 @@ typedef union upat *Upat;
*
* P_ANY, P_ANYOF: the operand is a null terminated
* string. Normal characters match as expected. Characters
- * in the range Meta+PP_ALPHA..Meta+PP_UNKNWN do the appropriate
+ * in the range Meta+PP_ALPHA..Meta+PP_UNKWN do the appropriate
* Posix range tests. This relies on imeta returning true for these
* characters. We treat unknown POSIX ranges as never matching.
* PP_RANGE means the next two (possibly metafied) characters form
@@ -1119,7 +1119,7 @@ patgetglobflags(char **strp, long *assertp, int *ignore)
static const char *colon_stuffs[] = {
"alpha", "alnum", "ascii", "blank", "cntrl", "digit", "graph",
"lower", "print", "punct", "space", "upper", "xdigit", "IDENT",
- "IFS", "IFSSPACE", "WORD", NULL
+ "IFS", "IFSSPACE", "WORD", "INCOMPLETE", "INVALID", NULL
};
/*
@@ -1870,9 +1870,9 @@ static int globdots; /* Glob initial dots? */
#ifdef MULTIBYTE_SUPPORT
/* Get a character from the start point in a string */
-#define CHARREF(x, y) charref((x), (y))
+#define CHARREF(x, y) charref((x), (y), (int *)NULL)
static wchar_t
-charref(char *x, char *y)
+charref(char *x, char *y, int *zmb_ind)
{
wchar_t wc;
size_t ret;
@@ -1886,9 +1886,13 @@ charref(char *x, char *y)
/* Error. */
/* Reset the shift state for next time. */
memset(&shiftstate, 0, sizeof(shiftstate));
+ if (zmb_ind)
+ *zmb_ind = (ret == MB_INVALID) ? ZMB_INVALID : ZMB_INCOMPLETE;
return WCHAR_INVALID(*x);
}
+ if (zmb_ind)
+ *zmb_ind = ZMB_VALID;
return wc;
}
@@ -2580,10 +2584,11 @@ patmatch(Upat prog)
fail = 1;
else {
#ifdef MULTIBYTE_SUPPORT
- wchar_t cr = CHARREF(patinput, patinend);
+ int zmb_ind;
+ wchar_t cr = charref(patinput, patinend, &zmb_ind);
char *scanop = (char *)P_OPERAND(scan);
if (patglobflags & GF_MULTIBYTE) {
- if (mb_patmatchrange(scanop, cr, NULL, NULL) ^
+ if (mb_patmatchrange(scanop, cr, zmb_ind, NULL, NULL) ^
(P_OP(scan) == P_ANYOF))
fail = 1;
else
@@ -3351,6 +3356,9 @@ patmatch(Upat prog)
* The null-terminated specification is in range; the test
* character is in ch.
*
+ * zmb is one of the enum defined above charref(), for indicating
+ * incomplete or invalid multibyte characters.
+ *
* indptr is used by completion matching, which is why this
* function is exported. If indptr is not NULL we set *indptr
* to the index of the character in the range string, adjusted
@@ -3367,7 +3375,7 @@ patmatch(Upat prog)
/**/
mod_export int
-mb_patmatchrange(char *range, wchar_t ch, wint_t *indptr, int *mtp)
+mb_patmatchrange(char *range, wchar_t ch, int zmb_ind, wint_t *indptr, int *mtp)
{
wchar_t r1, r2;
@@ -3476,6 +3484,14 @@ mb_patmatchrange(char *range, wchar_t ch, wint_t *indptr, int *mtp)
*indptr += r2 - r1;
}
break;
+ case PP_INCOMPLETE:
+ if (zmb_ind == ZMB_INCOMPLETE)
+ return 1;
+ break;
+ case PP_INVALID:
+ if (zmb_ind == ZMB_INVALID)
+ return 1;
+ break;
case PP_UNKWN:
DPUTS(1, "BUG: unknown posix range passed through.\n");
break;
@@ -3545,6 +3561,8 @@ mb_patmatchindex(char *range, wint_t ind, wint_t *chr, int *mtp)
case PP_IFS:
case PP_IFSSPACE:
case PP_WORD:
+ case PP_INCOMPLETE:
+ case PP_INVALID:
if (!ind) {
*mtp = swtype;
return 1;
@@ -3698,6 +3716,10 @@ patmatchrange(char *range, int ch, int *indptr, int *mtp)
if (indptr && r1 < r2)
*indptr += r2 - r1;
break;
+ case PP_INCOMPLETE:
+ case PP_INVALID:
+ /* Never true if not in multibyte mode */
+ break;
case PP_UNKWN:
DPUTS(1, "BUG: unknown posix range passed through.\n");
break;
@@ -3768,6 +3790,8 @@ patmatchindex(char *range, int ind, int *chr, int *mtp)
case PP_IFS:
case PP_IFSSPACE:
case PP_WORD:
+ case PP_INCOMPLETE:
+ case PP_INVALID:
if (!ind) {
*mtp = swtype;
return 1;
@@ -3851,9 +3875,10 @@ static int patrepeat(Upat p, char *charstart)
case P_ANYBUT:
while (scan < patinend) {
#ifdef MULTIBYTE_SUPPORT
- wchar_t cr = CHARREF(scan, patinend);
+ int zmb_ind;
+ wchar_t cr = charref(scan, patinend, &zmb_ind);
if (patglobflags & GF_MULTIBYTE) {
- if (mb_patmatchrange(opnd, cr, NULL, NULL) ^
+ if (mb_patmatchrange(opnd, cr, zmb_ind, NULL, NULL) ^
(P_OP(p) == P_ANYOF))
break;
} else if (patmatchrange(opnd, (int)cr, NULL, NULL) ^
--
cgit 1.4.1