diff options
author | Peter Stephenson <pws@users.sourceforge.net> | 2005-09-09 20:34:42 +0000 |
---|---|---|
committer | Peter Stephenson <pws@users.sourceforge.net> | 2005-09-09 20:34:42 +0000 |
commit | d33c6e502ab4d4398efa797702b6b115e6f5ff41 (patch) | |
tree | d0c5ed4be35dd4a11d99050dbac0138ed241b12b | |
parent | 58b9e731da1f2a4eac68e06e986c9c25adafc36e (diff) | |
download | zsh-d33c6e502ab4d4398efa797702b6b115e6f5ff41.tar.gz zsh-d33c6e502ab4d4398efa797702b6b115e6f5ff41.tar.xz zsh-d33c6e502ab4d4398efa797702b6b115e6f5ff41.zip |
21722: fix multibyte word stuff
-rw-r--r-- | ChangeLog | 7 | ||||
-rw-r--r-- | Src/Zle/zle.h | 3 | ||||
-rw-r--r-- | Src/Zle/zle_misc.c | 4 | ||||
-rw-r--r-- | Src/Zle/zle_word.c | 88 | ||||
-rw-r--r-- | Src/utils.c | 36 |
5 files changed, 93 insertions, 45 deletions
diff --git a/ChangeLog b/ChangeLog index 3eacfc50c..0651910c3 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,10 @@ +2005-09-09 Peter Stephenson <pws@pwstephenson.fsnet.co.uk> + + * 21722: Src/utils.c, Src/Zle/zle.h, Src/Zle/zle_misc.c, + Src/Zle/zle_word.c: fix broken transpose-words and improve + word character detection by assuming non-ASCII characters are word + characters if and only if alphanumeric. + 2005-09-09 Peter Stephenson <pws@csr.com> * 21720: Src/Zle/zle_main.c: getrestchar() from 21709 didn't diff --git a/Src/Zle/zle.h b/Src/Zle/zle.h index 2276f42b3..4b3f3f75a 100644 --- a/Src/Zle/zle.h +++ b/Src/Zle/zle.h @@ -69,11 +69,13 @@ typedef wint_t ZLE_INT_T; /* * TODO: doesn't work on arguments with side effects. * Also YUK. Not even sure this is guaranteed to work. + * Should be easy to do along the lines of wcsiword. */ #define ZC_iident(x) (x < 256 && iident((int)x)) #define ZC_tolower towlower #define ZC_toupper towupper +#define ZC_iword wcsiword #define LASTFULLCHAR lastchar_wide @@ -122,6 +124,7 @@ static inline int ZS_strncmp(ZLE_STRING_T s1, ZLE_STRING_T s2, size_t l) #define ZC_tolower tulower #define ZC_toupper tuupper +#define ZC_iword iword #define LASTFULLCHAR lastchar diff --git a/Src/Zle/zle_misc.c b/Src/Zle/zle_misc.c index 7159113a6..5a97b0a75 100644 --- a/Src/Zle/zle_misc.c +++ b/Src/Zle/zle_misc.c @@ -623,10 +623,10 @@ copyprevword(UNUSED(char **args)) int len, t0; for (t0 = zlecs - 1; t0 >= 0; t0--) - if (iword(zleline[t0])) + if (ZC_iword(zleline[t0])) break; for (; t0 >= 0; t0--) - if (!iword(zleline[t0])) + if (!ZC_iword(zleline[t0])) break; if (t0) t0++; diff --git a/Src/Zle/zle_word.c b/Src/Zle/zle_word.c index bbdd8f451..f432a46ef 100644 --- a/Src/Zle/zle_word.c +++ b/Src/Zle/zle_word.c @@ -30,11 +30,6 @@ #include "zle.mdh" #include "zle_word.pro" -/* - * TODO: use of iword needs completely rethinking for Unicode - * since we can't base it on a table lookup. - */ - /**/ int forwardword(char **args) @@ -49,11 +44,11 @@ forwardword(char **args) return ret; } while (n--) { - while (zlecs != zlell && iword(zleline[zlecs])) + while (zlecs != zlell && ZC_iword(zleline[zlecs])) zlecs++; if (wordflag && !n) return 0; - while (zlecs != zlell && !iword(zleline[zlecs])) + while (zlecs != zlell && !ZC_iword(zleline[zlecs])) zlecs++; } return 0; @@ -125,11 +120,11 @@ emacsforwardword(char **args) return ret; } while (n--) { - while (zlecs != zlell && !iword(zleline[zlecs])) + while (zlecs != zlell && !ZC_iword(zleline[zlecs])) zlecs++; if (wordflag && !n) return 0; - while (zlecs != zlell && iword(zleline[zlecs])) + while (zlecs != zlell && ZC_iword(zleline[zlecs])) zlecs++; } return 0; @@ -197,9 +192,9 @@ backwardword(char **args) return ret; } while (n--) { - while (zlecs && !iword(zleline[zlecs - 1])) + while (zlecs && !ZC_iword(zleline[zlecs - 1])) zlecs--; - while (zlecs && iword(zleline[zlecs - 1])) + while (zlecs && ZC_iword(zleline[zlecs - 1])) zlecs--; } return 0; @@ -267,9 +262,9 @@ emacsbackwardword(char **args) return ret; } while (n--) { - while (zlecs && !iword(zleline[zlecs - 1])) + while (zlecs && !ZC_iword(zleline[zlecs - 1])) zlecs--; - while (zlecs && iword(zleline[zlecs - 1])) + while (zlecs && ZC_iword(zleline[zlecs - 1])) zlecs--; } return 0; @@ -289,9 +284,9 @@ backwarddeleteword(char **args) return ret; } while (n--) { - while (x && !iword(zleline[x - 1])) + while (x && !ZC_iword(zleline[x - 1])) x--; - while (x && iword(zleline[x - 1])) + while (x && ZC_iword(zleline[x - 1])) x--; } backdel(zlecs - x); @@ -337,9 +332,9 @@ backwardkillword(char **args) return ret; } while (n--) { - while (x && !iword(zleline[x - 1])) + while (x && !ZC_iword(zleline[x - 1])) x--; - while (x && iword(zleline[x - 1])) + while (x && ZC_iword(zleline[x - 1])) x--; } backkill(zlecs - x, 1); @@ -356,9 +351,9 @@ upcaseword(UNUSED(char **args)) if (neg) n = -n; while (n--) { - while (zlecs != zlell && !iword(zleline[zlecs])) + while (zlecs != zlell && !ZC_iword(zleline[zlecs])) zlecs++; - while (zlecs != zlell && iword(zleline[zlecs])) { + while (zlecs != zlell && ZC_iword(zleline[zlecs])) { zleline[zlecs] = ZC_toupper(zleline[zlecs]); zlecs++; } @@ -378,9 +373,9 @@ downcaseword(UNUSED(char **args)) if (neg) n = -n; while (n--) { - while (zlecs != zlell && !iword(zleline[zlecs])) + while (zlecs != zlell && !ZC_iword(zleline[zlecs])) zlecs++; - while (zlecs != zlell && iword(zleline[zlecs])) { + while (zlecs != zlell && ZC_iword(zleline[zlecs])) { zleline[zlecs] = ZC_tolower(zleline[zlecs]); zlecs++; } @@ -401,11 +396,11 @@ capitalizeword(UNUSED(char **args)) n = -n; while (n--) { first = 1; - while (zlecs != zlell && !iword(zleline[zlecs])) + while (zlecs != zlell && !ZC_iword(zleline[zlecs])) zlecs++; - while (zlecs != zlell && iword(zleline[zlecs]) && !isalpha(zleline[zlecs])) + while (zlecs != zlell && ZC_iword(zleline[zlecs]) && !isalpha(zleline[zlecs])) zlecs++; - while (zlecs != zlell && iword(zleline[zlecs])) { + while (zlecs != zlell && ZC_iword(zleline[zlecs])) { zleline[zlecs] = (first) ? ZC_toupper(zleline[zlecs]) : ZC_tolower(zleline[zlecs]); first = 0; @@ -432,9 +427,9 @@ deleteword(char **args) return ret; } while (n--) { - while (x != zlell && !iword(zleline[x])) + while (x != zlell && !ZC_iword(zleline[x])) x++; - while (x != zlell && iword(zleline[x])) + while (x != zlell && ZC_iword(zleline[x])) x++; } foredel(x - zlecs); @@ -456,9 +451,9 @@ killword(char **args) return ret; } while (n--) { - while (x != zlell && !iword(zleline[x])) + while (x != zlell && !ZC_iword(zleline[x])) x++; - while (x != zlell && iword(zleline[x])) + while (x != zlell && ZC_iword(zleline[x])) x++; } forekill(x - zlecs, 0); @@ -469,36 +464,43 @@ killword(char **args) int transposewords(UNUSED(char **args)) { - int p1, p2, p3, p4, x = zlecs; - char *temp, *pp; + int p1, p2, p3, p4, len, x = zlecs; + ZLE_STRING_T temp, pp; int n = zmult; int neg = n < 0, ocs = zlecs; if (neg) n = -n; while (n--) { - while (x != zlell && zleline[x] != '\n' && !iword(zleline[x])) + while (x != zlell && zleline[x] != ZWC('\n') && !ZC_iword(zleline[x])) x++; - if (x == zlell || zleline[x] == '\n') { + if (x == zlell || zleline[x] == ZWC('\n')) { x = zlecs; - while (x && zleline[x - 1] != '\n' && !iword(zleline[x])) + while (x && zleline[x - 1] != ZWC('\n') && !ZC_iword(zleline[x])) x--; - if (!x || zleline[x - 1] == '\n') + if (!x || zleline[x - 1] == ZWC('\n')) return 1; } - for (p4 = x; p4 != zlell && iword(zleline[p4]); p4++); - for (p3 = p4; p3 && iword(zleline[p3 - 1]); p3--); + for (p4 = x; p4 != zlell && ZC_iword(zleline[p4]); p4++); + for (p3 = p4; p3 && ZC_iword(zleline[p3 - 1]); p3--); if (!p3) return 1; - for (p2 = p3; p2 && !iword(zleline[p2 - 1]); p2--); + for (p2 = p3; p2 && !ZC_iword(zleline[p2 - 1]); p2--); if (!p2) return 1; - for (p1 = p2; p1 && iword(zleline[p1 - 1]); p1--); - pp = temp = (char *)zhalloc(p4 - p1 + 1); - struncpy(&pp, (char *) zleline + p3, p4 - p3); - struncpy(&pp, (char *) zleline + p2, p3 - p2); - struncpy(&pp, (char *) zleline + p1, p2 - p1); - strncpy((char *)zleline + p1, temp, p4 - p1); + for (p1 = p2; p1 && ZC_iword(zleline[p1 - 1]); p1--); + + pp = temp = (ZLE_STRING_T)zhalloc((p4 - p1)*ZLE_CHAR_SIZE); + len = p4 - p3; + ZS_memcpy(pp, zleline + p3, len); + pp += len; + len = p3 - p2; + ZS_memcpy(pp, zleline + p2, len); + pp += len; + ZS_memcpy(pp, zleline + p1, p2 - p1); + + ZS_memcpy(zleline + p1, temp, p4 - p1); + zlecs = p4; } if (neg) diff --git a/Src/utils.c b/Src/utils.c index 8a887fa37..8b1326444 100644 --- a/Src/utils.c +++ b/Src/utils.c @@ -2469,6 +2469,42 @@ inittyptab(void) typtab[bangchar] |= ISPECIAL; } + +#ifdef ZLE_UNICODE_SUPPORT +/* + * iword() macro extended to support wide characters. + */ + +/**/ +mod_export int +wcsiword(wchar_t c) +{ + int len; + VARARR(char, outstr, MB_CUR_MAX); + /* + * Strategy: the shell requires that the multibyte representation + * be an extension of ASCII. So see if converting the character + * produces an ASCII character. If it does, use iword on that. + * If it doesn't, use iswalnum on the original character. This + * is pretty good most of the time. + * + * TODO: extend WORDCHARS to handle multibyte chars by some kind + * of hierarchical list or hash table. + */ + len = wctomb(outstr, c); + + if (len == 0) { + /* NULL is special */ + return iword(0); + } else if (len == 1 && isascii(*outstr)) { + return iword(*outstr); + } else { + return iswalnum(c); + } +} +#endif + + /**/ mod_export char ** arrdup(char **s) |