From 97dcf155566083a4b6a43040184cb477d3ff2baf Mon Sep 17 00:00:00 2001 From: Peter Stephenson Date: Sun, 13 Apr 2008 16:58:42 +0000 Subject: 24808: initial support for combining characters in zle --- ChangeLog | 8 ++ Doc/Zsh/options.yo | 18 +++ Src/Zle/compcore.c | 8 +- Src/Zle/complist.c | 12 +- Src/Zle/compresult.c | 12 +- Src/Zle/deltochar.c | 8 +- Src/Zle/zle.h | 33 ++++- Src/Zle/zle_hist.c | 4 +- Src/Zle/zle_misc.c | 44 +++--- Src/Zle/zle_move.c | 192 ++++++++++++++++++++++---- Src/Zle/zle_refresh.c | 375 +++++++++++++++++++++++++++++++++++++++++--------- Src/Zle/zle_tricky.c | 18 +-- Src/Zle/zle_utils.c | 129 ++++++++++++----- Src/Zle/zle_vi.c | 56 +++++--- Src/Zle/zle_word.c | 102 +++++++++----- Src/options.c | 1 + Src/utils.c | 8 ++ Src/zsh.h | 11 ++ 18 files changed, 801 insertions(+), 238 deletions(-) diff --git a/ChangeLog b/ChangeLog index e4a3e7d77..e1f660e98 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,13 @@ 2008-04-13 Peter Stephenson + * 24808: Doc/Zsh/options.yo, Src/options.c, Src/utils.c, Src/zsh.h, + Src/Zle/compcore.c, Src/Zle/complist.c, Src/Zle/compresult.c, + Src/Zle/deltochar.c, Src/Zle/zle.h, Src/Zle/zle_hist.c, + Src/Zle/zle_misc.c, Src/Zle/zle_move.c, Src/Zle/zle_refresh.c, + Src/Zle/zle_tricky.c, Src/Zle/zle_utils.c, Src/Zle/zle_vi.c, + Src/Zle/zle_word.c: add COMBINING_CHARS option and basic + zle support. + * Miek Gieben: users/12781: Functions/Misc/zmv: too many backslashes in Usage message. diff --git a/Doc/Zsh/options.yo b/Doc/Zsh/options.yo index b0b575a23..5b41de725 100644 --- a/Doc/Zsh/options.yo +++ b/Doc/Zsh/options.yo @@ -1444,6 +1444,24 @@ cindex(enabling the beep) item(tt(BEEP) (tt(PLUS()B)) )( Beep on error in ZLE. ) +pindex(COMBINING_CHARS) +cindex(characters, (Unicode) combining) +cindex(combining characters (Unicode)) +cindex(Unicode combining characters) +item(tt(COMBINING_CHARS))( +Assume that the terminal displays combining characters correctly. +Specifically, if a base alphanumeric character is followed by one or more +zero-width punctuation characters, assume that the zero-width charaters +will be displayed as modifications to the base character within the +same width. Not all terminals handle this. If this option is not +set, zero-width characters are displayed separately with special +mark-up. + +If this option is set, the pattern test tt([[:WORD:]]) matches a +zero-width punctuation character on the assumption that it will be +used as part of a word in combination with a word character. +Otherwise the base shell does not handle combining characters specially. +) pindex(EMACS) item(tt(EMACS))( If ZLE is loaded, turning on this option has the equivalent effect diff --git a/Src/Zle/compcore.c b/Src/Zle/compcore.c index 32776dc83..e66c1f71d 100644 --- a/Src/Zle/compcore.c +++ b/Src/Zle/compcore.c @@ -349,7 +349,7 @@ do_completion(UNUSED(Hookdef dummy), Compldat dat) if (makecomplist(s, incmd, lst)) { /* Error condition: feeeeeeeeeeeeep(). */ zlemetacs = 0; - foredel(zlemetall); + foredel(zlemetall, CUT_RAW); inststr(origline); zlemetacs = origcs; clearlist = 1; @@ -381,7 +381,7 @@ do_completion(UNUSED(Hookdef dummy), Compldat dat) } else if (!useline && uselist) { /* All this and the guy only wants to see the list, sigh. */ zlemetacs = 0; - foredel(zlemetall); + foredel(zlemetall, CUT_RAW); inststr(origline); zlemetacs = origcs; showinglist = -2; @@ -429,7 +429,7 @@ do_completion(UNUSED(Hookdef dummy), Compldat dat) if (forcelist) clearlist = 1; zlemetacs = 0; - foredel(zlemetall); + foredel(zlemetall, CUT_RAW); inststr(origline); zlemetacs = origcs; } @@ -519,7 +519,7 @@ after_complete(UNUSED(Hookdef dummy), int *dat) if (ret >= 2) { fixsuffix(); zlemetacs = 0; - foredel(zlemetall); + foredel(zlemetall, CUT_RAW); inststr(origline); zlemetacs = origcs; if (ret == 2) { diff --git a/Src/Zle/complist.c b/Src/Zle/complist.c index 698837942..ad5dce221 100644 --- a/Src/Zle/complist.c +++ b/Src/Zle/complist.c @@ -2052,7 +2052,7 @@ setmstatus(char *status, char *sline, int sll, int scs, s[lastend - zlemetacs] = '\0'; } zlemetacs = 0; - foredel(zlemetall); + foredel(zlemetall, CUT_RAW); spaceinline(sll); memcpy(zlemetaline, sline, sll); zlemetacs = scs; @@ -2298,7 +2298,7 @@ domenuselect(Hookdef dummy, Chdata dat) */ mode = MM_INTER; zlemetacs = 0; - foredel(zlemetall); + foredel(zlemetall, CUT_RAW); spaceinline(l); strncpy(zlemetaline, origline, l); zlemetacs = origcs; @@ -2501,7 +2501,7 @@ domenuselect(Hookdef dummy, Chdata dat) */ mode = MM_INTER; zlemetacs = 0; - foredel(zlemetall); + foredel(zlemetall, CUT_RAW); spaceinline(l); strncpy(zlemetaline, origline, l); zlemetacs = origcs; @@ -2560,7 +2560,7 @@ domenuselect(Hookdef dummy, Chdata dat) * characters typed by the user. */ zlemetacs = 0; - foredel(zlemetall); + foredel(zlemetall, CUT_RAW); spaceinline(l); strncpy(zlemetaline, origline, l); zlemetacs = origcs; @@ -2701,7 +2701,7 @@ domenuselect(Hookdef dummy, Chdata dat) handleundo(); zlemetacs = 0; - foredel(zlemetall); + foredel(zlemetall, CUT_RAW); spaceinline(l = strlen(u->line)); strncpy(zlemetaline, u->line, l); zlemetacs = u->cs; @@ -3090,7 +3090,7 @@ domenuselect(Hookdef dummy, Chdata dat) origcs = modecs; origll = modell; zlemetacs = 0; - foredel(zlemetall); + foredel(zlemetall, CUT_RAW); spaceinline(origll); strncpy(zlemetaline, origline, origll); zlemetacs = origcs; diff --git a/Src/Zle/compresult.c b/Src/Zle/compresult.c index 471e7c2c6..61fcceaaa 100644 --- a/Src/Zle/compresult.c +++ b/Src/Zle/compresult.c @@ -469,7 +469,7 @@ cline_str(Cline l, int ins, int *csp, LinkList posl) memcpy(r, zlemetaline + ocs, i); r[i] = '\0'; zlemetacs = ocs; - foredel(i); + foredel(i, CUT_RAW); if (csp) *csp = ncs - ocs; @@ -707,7 +707,7 @@ hasbrpsfx(Cmatch m, char *pre, char *suf) instmatch(m, NULL); zlemetacs = 0; - foredel(zlemetall); + foredel(zlemetall, CUT_RAW); spaceinline(oll); memcpy(zlemetaline, oline, oll); zlemetacs = ocs; @@ -778,7 +778,7 @@ do_ambiguous(void) tcs = zlemetacs; zlemetacs = wb; memcpy(old, zlemetaline + wb, we - wb); - foredel(we - wb); + foredel(we - wb, CUT_RAW); /* Now get the unambiguous string and insert it into the line. */ cline_str(ainfo->line, 1, NULL, NULL); @@ -789,7 +789,7 @@ do_ambiguous(void) if (lastend < we && !lenchanged && !hasunmatched) { zlemetacs = wb; - foredel(lastend - wb); + foredel(lastend - wb, CUT_RAW); inststrlen(old, 0, we - wb); lastend = we; zlemetacs = tcs; @@ -978,7 +978,7 @@ do_single(Cmatch m) minfo.insc = 0; zlemetacs = minfo.pos; - foredel(l); + foredel(l, CUT_RAW); if (m->flags & CMF_ALL) { do_allmatches(0); @@ -1316,7 +1316,7 @@ accept_last(void) l = zlemetacs; zlemetacs = minfo.pos + minfo.len + minfo.insc - (*(minfo.cur))->qisl; if (zlemetacs < l) - foredel(l - zlemetacs); + foredel(l - zlemetacs, CUT_RAW); else if (zlemetacs > zlemetall) zlemetacs = zlemetall; inststrlen(" ", 1, 1); diff --git a/Src/Zle/deltochar.c b/Src/Zle/deltochar.c index d56798687..941898387 100644 --- a/Src/Zle/deltochar.c +++ b/Src/Zle/deltochar.c @@ -46,10 +46,11 @@ deltochar(UNUSED(char **args)) while (dest != zlell && (ZLE_INT_T)zleline[dest] != c) dest++; if (dest != zlell) { + /* HERE adjust dest for trailing combining chars */ if (!zap || n > 0) - dest++; + INCCS(); if (!n) { - forekill(dest - zlecs, 0); + forekill(dest - zlecs, CUT_RAW); ok++; } } @@ -63,7 +64,8 @@ deltochar(UNUSED(char **args)) dest--; if ((ZLE_INT_T)zleline[dest] == c) { if (!n) { - backkill(zlecs - dest - zap, 1); + /* HERE adjust zap for trailing combining chars */ + backkill(zlecs - dest - zap, CUT_RAW|CUT_FRONT); ok++; } if (dest) diff --git a/Src/Zle/zle.h b/Src/Zle/zle.h index 44450ab16..40230e784 100644 --- a/Src/Zle/zle.h +++ b/Src/Zle/zle.h @@ -74,6 +74,16 @@ typedef wint_t ZLE_INT_T; #define LASTFULLCHAR lastchar_wide #define LASTFULLCHAR_T ZLE_INT_T +/* We may need to handle combining character alignment */ +#define CCLEFT() alignmultiwordleft(1) +#define CCRIGHT() alignmultiwordright(1) +/* + * Increment or decrement the cursor position, skipping over + * combining characters. + */ +#define INCCS() inccs() +#define DECCS() deccs() + #else /* Not MULTIBYTE_SUPPORT: old single-byte code */ typedef char ZLE_CHAR_T; @@ -133,6 +143,15 @@ static inline int ZS_strncmp(ZLE_STRING_T s1, ZLE_STRING_T s2, size_t l) #define LASTFULLCHAR lastchar #define LASTFULLCHAR_T int +/* Combining character alignment: none in this mode */ +#define CCLEFT() +#define CCRIGHT() +/* + * Increment or decrement the cursor position: simple in this case. + */ +#define INCCS() ((void)(zlecs++)) +#define DECCS() ((void)(zlecs--)) + #endif @@ -202,6 +221,12 @@ struct modifier { #define zmult (zmod.mult) +/* flags to cut() and cuttext() and other front-ends */ + +#define CUT_FRONT (1<<0) /* Text goes in front of cut buffer */ +#define CUT_REPLACE (1<<1) /* Text replaces cut buffer */ +#define CUT_RAW (1<<2) /* Raw character counts (not used in cut itself) */ + /* undo system */ struct change { @@ -340,7 +365,11 @@ typedef char REFRESH_CHAR; * Description of one screen cell in zle_refresh.c */ typedef struct { - /* The (possibly wide) character */ + /* + * The (possibly wide) character. + * If atr contains TXT_MULTIWORD_MASK, an index into the set of multiword + * symbols (only if MULTIBYTE_SUPPORT is present). + */ REFRESH_CHAR chr; /* * Its attributes. 'On' attributes (TXT_ATTR_ON_MASK) are @@ -349,7 +378,7 @@ typedef struct { * need the effect; 'off' attributes are only present for the * last character in the sequence. */ - REFRESH_CHAR atr; + int atr; } REFRESH_ELEMENT; /* A string of screen cells */ diff --git a/Src/Zle/zle_hist.c b/Src/Zle/zle_hist.c index 2fdda1696..ddd17b760 100644 --- a/Src/Zle/zle_hist.c +++ b/Src/Zle/zle_hist.c @@ -640,7 +640,7 @@ insertlastword(char **args) if (deleteword) { int pos = zlemetacs; zlemetacs = lastpos; - foredel(pos - zlemetacs); + foredel(pos - zlemetacs, CUT_RAW); /* * Mark that this has been deleted. * For consistency with history lines, we really ought to @@ -701,7 +701,7 @@ insertlastword(char **args) if (deleteword > 0) { int pos = zlemetacs; zlemetacs = lastpos; - foredel(pos - zlemetacs); + foredel(pos - zlemetacs, CUT_RAW); } if (lastinsert) { zfree(lastinsert, lastlen); diff --git a/Src/Zle/zle_misc.c b/Src/Zle/zle_misc.c index 6a42f157e..fb8070e0a 100644 --- a/Src/Zle/zle_misc.c +++ b/Src/Zle/zle_misc.c @@ -105,6 +105,7 @@ selfinsertunmeta(char **args) int deletechar(char **args) { + int n; if (zmult < 0) { int ret; zmult = -zmult; @@ -112,12 +113,15 @@ deletechar(char **args) zmult = -zmult; return ret; } - if (zlecs + zmult <= zlell) { - zlecs += zmult; - backdel(zmult); - return 0; + + n = zmult; + while (n--) { + if (zlecs == zlell) + return 1; + INCCS(); } - return 1; + backdel(zmult, 0); + return 0; } /**/ @@ -131,7 +135,7 @@ backwarddeletechar(char **args) zmult = -zmult; return ret; } - backdel(zmult > zlecs ? zlecs : zmult); + backdel(zmult > zlecs ? zlecs : zmult, 0); return 0; } @@ -149,7 +153,7 @@ killwholeline(UNUSED(char **args)) while (zlecs && zleline[zlecs - 1] != '\n') zlecs--; for (i = zlecs; i != zlell && zleline[i] != '\n'; i++); - forekill(i - zlecs + (i != zlell), fg); + forekill(i - zlecs + (i != zlell), fg ? (CUT_FRONT|CUT_RAW) : CUT_RAW); } clearlist = 1; return 0; @@ -160,7 +164,7 @@ int killbuffer(UNUSED(char **args)) { zlecs = 0; - forekill(zlell, 0); + forekill(zlell, CUT_RAW); clearlist = 1; return 0; } @@ -185,7 +189,7 @@ backwardkillline(char **args) while (zlecs && zleline[zlecs - 1] != '\n') zlecs--, i++; } - forekill(i, 1); + forekill(i, CUT_FRONT|CUT_RAW); clearlist = 1; return 0; } @@ -267,13 +271,13 @@ poundinsert(UNUSED(char **args)) zlecs = findeol(); } } else { - foredel(1); + foredel(1, 0); zlecs = findeol(); while(zlecs != zlell) { zlecs++; vifirstnonblank(zlenoargs); if(zleline[zlecs] == '#') - foredel(1); + foredel(1, 0); zlecs = findeol(); } } @@ -319,7 +323,7 @@ killline(char **args) while (zlecs != zlell && zleline[zlecs] != ZWC('\n')) zlecs++, i++; } - backkill(i, 0); + backkill(i, CUT_RAW); clearlist = 1; return 0; } @@ -331,9 +335,9 @@ killregion(UNUSED(char **args)) if (mark > zlell) mark = zlell; if (mark > zlecs) - forekill(mark - zlecs, 0); + forekill(mark - zlecs, CUT_RAW); else - backkill(zlecs - mark, 1); + backkill(zlecs - mark, CUT_FRONT|CUT_RAW); return 0; } @@ -344,7 +348,7 @@ copyregionaskill(char **args) if (*args) { int len; ZLE_STRING_T line = stringaszleline(*args, 0, &len, NULL, NULL); - cuttext(line, len, -1); + cuttext(line, len, CUT_REPLACE); free(line); } else { if (mark > zlell) @@ -352,7 +356,7 @@ copyregionaskill(char **args) if (mark > zlecs) cut(zlecs, mark - zlecs, 0); else - cut(mark, zlecs - mark, 1); + cut(mark, zlecs - mark, CUT_FRONT); } return 0; } @@ -441,7 +445,7 @@ yankpop(UNUSED(char **args)) } while (!buf->buf || *buf->buf == ZWC('\0')); zlecs = yankb; - foredel(yanke - yankb); + foredel(yanke - yankb, CUT_RAW); cc = buf->len; spaceinline(cc); ZS_memcpy(zleline + zlecs, buf->buf, cc); @@ -766,7 +770,7 @@ quoteregion(UNUSED(char **args)) } str = (ZLE_STRING_T)hcalloc((len = mark - zlecs) * ZLE_CHAR_SIZE); ZS_memcpy(str, zleline + zlecs, len); - foredel(len); + foredel(len, CUT_RAW); str = makequote(str, &len); spaceinline(len); ZS_memcpy(zleline + zlecs, str, len); @@ -1321,10 +1325,10 @@ iremovesuffix(ZLE_INT_T c, int keep) /* must be shifting wide character lengths */ if (zlemetaline != NULL) { unmetafy_line(); - backdel(sl); + backdel(sl, CUT_RAW); metafy_line(); } else - backdel(sl); + backdel(sl, CUT_RAW); if (!keep) invalidatelist(); } diff --git a/Src/Zle/zle_move.c b/Src/Zle/zle_move.c index 7f2748da6..eef009390 100644 --- a/Src/Zle/zle_move.c +++ b/Src/Zle/zle_move.c @@ -32,6 +32,112 @@ static int vimarkcs[27], vimarkline[27]; +#ifdef MULTIBYTE_SUPPORT +/* + * Take account of combining characters when moving left. If + * we are on a zero-width printable wide character and are + * treating these as part of the base character for display purposes, + * move left until we reach a non-zero-width printable character + * (the base character). If we reach something else first, stay where we + * were. + * + * If setpos is non-zero, update zlecs on success. + * Return 1 if we were on a combining char and could move, else 0. + */ +/**/ +int +alignmultiwordleft(int setpos) +{ + int loccs; + + /* generic nothing to do test */ + if (!isset(COMBININGCHARS) || zlecs == zlell || zlecs == 0) + return 0; + + /* need to be on zero-width punctuation character */ + if (!iswpunct(zleline[zlecs]) || wcwidth(zleline[zlecs]) != 0) + return 0; + + /* yes, go left */ + loccs = zlecs - 1; + + for (;;) { + /* second test here is paranoia */ + if (iswalnum(zleline[loccs]) && wcwidth(zleline[loccs]) > 0) { + /* found start position */ + if (setpos) + zlecs = loccs; + return 1; + } else if (!iswpunct(zleline[loccs]) || + wcwidth(zleline[loccs]) != 0) { + /* no go */ + return 0; + } + /* combining char, keep going */ + if (loccs-- == 0) + return 0; + } +} + + +/* + * Same principle when moving right. We need to check if + * alignmultiwordleft() would be successful in order to decide + * if we're on a combining character, and if so we move right to + * anything that isn't one. + */ +/**/ +int +alignmultiwordright(int setpos) +{ + int loccs; + + /* + * Are we on a suitable character? + */ + if (!alignmultiwordleft(0)) + return 0; + + /* yes, go right */ + loccs = zlecs + 1; + + while (loccs < zlell) { + /* Anything other than a combining char will do here */ + if (!iswpunct(zleline[loccs]) || wcwidth(zleline[loccs]) != 0) { + if (setpos) + zlecs = loccs; + return 1; + } + loccs++; + } + + zlecs = zlell; + return 1; +} + + +/* Move cursor right, checking for combining characters */ + +/**/ +mod_export void +inccs(void) +{ + zlecs++; + alignmultiwordright(1); +} + + +/* Move cursor left, checking for combining characters */ + +/**/ +mod_export void +deccs(void) +{ + zlecs--; + alignmultiwordleft(1); +} +#endif + /**/ int beginningofline(char **args) @@ -157,25 +263,43 @@ endoflinehist(char **args) /**/ int -forwardchar(UNUSED(char **args)) +forwardchar(char **args) { - zlecs += zmult; - if (zlecs > zlell) - zlecs = zlell; - if (zlecs < 0) - zlecs = 0; + int n = zmult; + + if (n < 0) { + int ret; + zmult = -n; + ret = backwardchar(args); + zmult = n; + return ret; + } + + /* + * If handling combining characters with the base character, + * we skip over the whole set in one go, so need to check. + */ + while (zlecs < zlell && n--) + INCCS(); return 0; } /**/ int -backwardchar(UNUSED(char **args)) +backwardchar(char **args) { - zlecs -= zmult; - if (zlecs > zlell) - zlecs = zlell; - if (zlecs < 0) - zlecs = 0; + int n = zmult; + + if (n < 0) { + int ret; + zmult = -n; + ret = forwardchar(args); + zmult = n; + return ret; + } + + while (zlecs > 0 && n--) + DECCS(); return 0; } @@ -216,17 +340,21 @@ exchangepointandmark(UNUSED(char **args)) int vigotocolumn(UNUSED(char **args)) { - int x, y; + int x, y, n = zmult; findline(&x, &y); - if (zmult >= 0) - zlecs = x + zmult - (zmult > 0); - else - zlecs = y + zmult; - if (zlecs > y) - zlecs = y; - if (zlecs < x) + if (n >= 0) { + if (n) + n--; zlecs = x; + while (zlecs < y && n--) + INCCS(); + } else { + zlecs = y; + n = -n; + while (zlecs > x && n--) + DECCS(); + } return 0; } @@ -286,7 +414,7 @@ vimatchbracket(UNUSED(char **args)) zlecs = ocs; return 1; } else if(dir > 0 && virangeflag) - zlecs++; + INCCS(); return 0; } @@ -307,7 +435,7 @@ viforwardchar(char **args) if (zlecs >= lim) return 1; while (n-- && zlecs < lim) - zlecs++; + INCCS(); return 0; } @@ -326,9 +454,9 @@ vibackwardchar(char **args) } if (zlecs == findbol()) return 1; - while (n--) { - zlecs--; - if (zlecs < 0 || zleline[zlecs] == '\n') { + while (n-- && zlecs > 0) { + DECCS(); + if (zleline[zlecs] == '\n') { zlecs++; break; } @@ -432,7 +560,10 @@ virepeatfind(char **args) } while (n--) { do { - zlecs += vfinddir; + if (vfinddir > 0) + INCCS(); + else + DECCS(); } while (zlecs >= 0 && zlecs < zlell && (ZLE_INT_T)zleline[zlecs] != vfindchar && zleline[zlecs] != ZWC('\n')); @@ -441,9 +572,12 @@ virepeatfind(char **args) return 1; } } - zlecs += tailadd; + if (tailadd > 0) + INCCS(); + else if (tailadd < 0) + DECCS(); if (vfinddir == 1 && virangeflag) - zlecs++; + INCCS(); return 0; } @@ -471,7 +605,7 @@ vifirstnonblank(UNUSED(char **args)) { zlecs = findbol(); while (zlecs != zlell && ZC_iblank(zleline[zlecs])) - zlecs++; + INCCS(); return 0; } diff --git a/Src/Zle/zle_refresh.c b/Src/Zle/zle_refresh.c index b1a5bc83d..670707d76 100644 --- a/Src/Zle/zle_refresh.c +++ b/Src/Zle/zle_refresh.c @@ -29,7 +29,58 @@ #include "zle.mdh" +#ifdef MULTIBYTE_SUPPORT +/* + * Handling for glyphs that contain more than one wide character, + * if ZLE_COMBINING_CHARS is set. Each glyph is one character with + * non-zero width followed by an arbitrary (but typically small) + * number of characters that have zero width (combining characters). + * + * The allocated size for each array is given by ?mw_size; nmw_ind + * is the next free element, i.e. nmwbuf[nmw_ind] will be the next + * element to be written (we never insert into omwbuf). We initialise + * nmw_ind to 1 to avoid the index stored in the character looking like a + * NULL. This wastees a word but it's safer than messing with pointers. + * + * The layout of the buffer is as a string of entries that consist of multiple + * elements of the allocated array with no boundary (the code keeps track of + * where each entry starts). Note distinction between (logical) entries and + * (array) elements. Each entry consists of an element giving the total + * number of wide characters for the entry (there are N+1 wide characters, + * where N >= 1 is the number of trailing zero width characters), followed by + * those characters. + */ +static REFRESH_CHAR + *omwbuf = NULL, /* old multiword glyph buffer */ + *nmwbuf = NULL; /* new multiword glyph buffer */ +#endif + +/* + * Compare if two characters are equal. + */ +#ifdef MULTIBYTE_SUPPORT +/* + * We may need to compare values in multiword arrays. As the arrays are + * different for the old and new video arrays, it is vital that the comparison + * always be done in the correct order: an element of the old video array, + * followed by an element of the new one. In this case, having ascertained + * that both elements are multiword (because they have the some attributes), + * we do the character comparison in two stages: first we check that the + * lengths are the same, then we check that the characters stored are the + * same. This ensures we can't read past the end of either array. If either + * character is a constant, then TXT_MULTIWORD_MASK is guaranteed not to be + * set and this doesn't matter. + */ +#define ZR_equal(oldzr, newzr) \ + ((oldzr).atr == (newzr).atr && \ + (((oldzr).atr & TXT_MULTIWORD_MASK) ? \ + (omwbuf[(oldzr).chr] == nmwbuf[(newzr).chr] && \ + !memcmp(omwbuf + (oldzr).chr + 1, nmwbuf + (newzr).chr + 1, \ + omwbuf[(oldzr).chr] * sizeof(*omwbuf))) : \ + (oldzr).chr == (newzr).chr)) +#else #define ZR_equal(zr1, zr2) ((zr1).chr == (zr2).chr && (zr1).atr == (zr2).atr) +#endif static void ZR_memset(REFRESH_ELEMENT *dst, REFRESH_ELEMENT rc, int len) @@ -61,17 +112,22 @@ ZR_strlen(const REFRESH_ELEMENT *wstr) /* * Simplified strcmp: we don't need the sign, just whether * the strings and their attributes are equal. + * + * In the multibyte case, the two elements must be in the order + * element from old video array, element from new video array. */ static int -ZR_strncmp(const REFRESH_ELEMENT *wstr1, const REFRESH_ELEMENT *wstr2, int len) +ZR_strncmp(const REFRESH_ELEMENT *oldwstr, const REFRESH_ELEMENT *newwstr, + int len) { while (len--) { - if (!wstr1->chr || !wstr2->chr) - return !ZR_equal(*wstr1, *wstr2); - if (!ZR_equal(*wstr1, *wstr2)) + if ((!(oldwstr->atr & TXT_MULTIWORD_MASK) && !oldwstr->chr) || + (!(newwstr->atr & TXT_MULTIWORD_MASK) && !newwstr->chr)) + return !ZR_equal(*oldwstr, *newwstr); + if (!ZR_equal(*oldwstr, *newwstr)) return 1; - wstr1++; - wstr2++; + oldwstr++; + newwstr++; } return 0; @@ -502,9 +558,19 @@ unset_region_highlight(Param pm, int exp) } +/* + * Output the character. This must come from the new video + * buffer, nbuf, since we access the multiword buffer nmwbuf + * directly. + * + * curatrp may be NULL, otherwise points to an integer specifying + * what attributes were turned on for a character output immediately + * before, in order to optimise output of attribute changes. + */ + /**/ void -zwcputc(const REFRESH_ELEMENT *c, REFRESH_CHAR *curatrp) +zwcputc(const REFRESH_ELEMENT *c, int *curatrp) { /* * Safety: turn attributes off if last heard of turned on. @@ -536,7 +602,17 @@ zwcputc(const REFRESH_ELEMENT *c, REFRESH_CHAR *curatrp) } #ifdef MULTIBYTE_SUPPORT - if (c->chr != WEOF) { + if (c->atr & TXT_MULTIWORD_MASK) { + /* Multiword glyph stored in nmwbuf */ + int nchars = nmwbuf[c->chr]; + REFRESH_CHAR *wcptr = nmwbuf + c->chr + 1; + + memset(&mbstate, 0, sizeof(mbstate_t)); + while (nchars--) { + if ((i = wcrtomb(mbtmp, (wchar_t)*wcptr++, &mbstate)) > 0) + fwrite(mbtmp, i, 1, shout); + } + } else if (c->chr != WEOF) { memset(&mbstate, 0, sizeof(mbstate_t)); if ((i = wcrtomb(mbtmp, (wchar_t)c->chr, &mbstate)) > 0) fwrite(mbtmp, i, 1, shout); @@ -545,6 +621,10 @@ zwcputc(const REFRESH_ELEMENT *c, REFRESH_CHAR *curatrp) fputc(c->chr, shout); #endif + /* + * Always output "off" attributes since we only turn off at + * the end of a chunk of highlighted text. + */ if (c->atr & TXT_ATTR_OFF_MASK) { settextattributes(c->atr & TXT_ATTR_OFF_MASK); lastatr &= ~((c->atr & TXT_ATTR_OFF_MASK) >> TXT_ATTR_OFF_ON_SHIFT); @@ -563,7 +643,7 @@ static int zwcwrite(const REFRESH_STRING s, size_t i) { size_t j; - REFRESH_CHAR curatr = 0; + int curatr = 0; for (j = 0; j < i; j++) zwcputc(s + j, &curatr); @@ -593,6 +673,17 @@ static int more_start, /* more text before start of screen? */ winprompt, /* singlelinezle: part of lprompt showing */ winw_alloc = -1, /* allocated window width */ winh_alloc = -1; /* allocates window height */ +#ifdef MULTIBYTE_SUPPORT +static int + omw_size, /* allocated size of omwbuf */ + nmw_size, /* allocated size of nmwbuf */ + nmw_ind; /* next insert point in nmw_ind */ +#endif + +/* + * Number of words to allocate in one go for the multiword buffers. + */ +#define DEF_MWBUF_ALLOC (32) static void freevideo(void) @@ -605,6 +696,12 @@ freevideo(void) } free(nbuf); free(obuf); +#ifdef MULTIBYTE_SUPPORT + zfree(nmwbuf, nmw_size * sizeof(*nmwbuf)); + zfree(omwbuf, omw_size * sizeof(*omwbuf)); + omw_size = nmw_size = 0; + nmw_ind = 1; +#endif nbuf = NULL; obuf = NULL; } @@ -631,6 +728,15 @@ resetvideo(void) nbuf[0] = (REFRESH_STRING)zalloc((winw + 2) * sizeof(**nbuf)); obuf[0] = (REFRESH_STRING)zalloc((winw + 2) * sizeof(**obuf)); +#ifdef MULTIBYTE_SUPPORT + nmw_size = DEF_MWBUF_ALLOC; + nmw_ind = 1; + nmwbuf = (REFRESH_CHAR *)zalloc(nmw_size * sizeof(*nmwbuf)); + + omw_size = DEF_MWBUF_ALLOC; + omwbuf = (REFRESH_CHAR *)zalloc(omw_size * sizeof(*omwbuf)); +#endif + winw_alloc = winw; winh_alloc = winh; } @@ -803,6 +909,72 @@ settextattributes(int atr) tsetcap(TCUNDERLINEBEG, 0); } +#ifdef MULTIBYTE_SUPPORT +/* + * Add a multiword glyph at the screen location base. + * tptr points to the source and there are ichars characters. + */ +static void +addmultiword(REFRESH_ELEMENT *base, ZLE_STRING_T tptr, int ichars) +{ + /* Number of characters needed in buffer incl. count */ + int iadd = ichars + 1, icnt; + REFRESH_CHAR *nmwptr; + base->atr |= TXT_MULTIWORD_MASK; + /* check allocation */ + if (nmw_ind + iadd > nmw_size) { + /* need more space in buffer */ + int mw_more = (iadd > DEF_MWBUF_ALLOC) ? iadd : + DEF_MWBUF_ALLOC; + nmwbuf = (REFRESH_CHAR *) + zrealloc(nmwbuf, (nmw_size += mw_more) * + sizeof(*nmwbuf)); + } + /* make buffer entry: count, then characters */ + nmwptr = nmwbuf + nmw_ind; + *nmwptr++ = ichars; + for (icnt = 0; icnt < ichars; icnt++) + *nmwptr++ = tptr[icnt]; + /* save index and update */ + base->chr = (wint_t)nmw_ind; + nmw_ind += iadd; +} +#endif + + +/* + * Swap the old and new video buffers, plus any associated multiword + * buffers. The new buffer becomes the old one; the new new buffer + * will be filled with the command line next time. + */ +static void +bufswap(void) +{ + REFRESH_STRING *qbuf; +#ifdef MULTIBYTE_SUPPORT + REFRESH_CHAR *qmwbuf; + int itmp; +#endif + + qbuf = nbuf; + nbuf = obuf; + obuf = qbuf; + +#ifdef MULTIBYTE_SUPPORT +/* likewise multiword buffers */ + qmwbuf = nmwbuf; + nmwbuf = omwbuf; + omwbuf = qmwbuf; + + itmp = nmw_size; + nmw_size = omw_size; + omw_size = itmp; + + nmw_ind = 1; +#endif +} + + /**/ mod_export void zrefresh(void) @@ -814,7 +986,6 @@ zrefresh(void) t, /* pointer into the real buffer */ scs, /* pointer to cursor position in real buffer */ u; /* pointer for status line stuff */ - REFRESH_STRING *qbuf; /* tmp */ int tmpcs, tmpll; /* ditto cursor position and line length */ int tmppos; /* t - tmpline */ int tmpalloced; /* flag to free tmpline when finished */ @@ -867,6 +1038,7 @@ zrefresh(void) /* this will create region_highlights if it's still NULL */ zle_set_highlight(); + /* check for region between point ($CURSOR) and mark ($MARK) */ if (region_active) { if (zlecs <= mark) { region_highlights->start = zlecs; @@ -1009,9 +1181,6 @@ zrefresh(void) struct region_highlight *rhp; /* * Calculate attribute based on region. - * HERE: we may need to be smarter about turning - * attributes off if bailing out before the end of the - * region. */ for (ireg = 0, rhp = region_highlights; ireg < n_region_highlights; @@ -1054,13 +1223,13 @@ zrefresh(void) } #ifdef MULTIBYTE_SUPPORT else if (iswprint(*t) && (width = wcwidth(*t)) > 0) { + int ichars; if (width > rpms.sen - rpms.s) { int started = 0; /* * Too wide to fit. Insert spaces to end of current line. */ do { - /* HERE highlight */ rpms.s->chr = ZWC(' '); if (!started) started = 1; @@ -1076,12 +1245,21 @@ zrefresh(void) rpms.nvcs = rpms.s - nbuf[rpms.nvln = rpms.ln]; } } + if (isset(COMBININGCHARS) && iswalnum(*t)) { + /* + * Look for combining characters: trailing punctuation + * characters with printing width zero. + */ + for (ichars = 1; tmppos + ichars < tmpll; ichars++) { + if (!iswpunct(t[ichars]) || wcwidth(t[ichars]) != 0) + break; + } + } else + ichars = 1; if (width > rpms.sen - rpms.s || width == 0) { /* * The screen width is too small to fit even one * occurrence. - * - * HERE highlight */ rpms.s->chr = ZWC('?'); rpms.s->atr = special_atr_on | special_atr_off | @@ -1089,12 +1267,22 @@ zrefresh(void) rpms.s++; } else { /* We can fit it without reaching the end of the line. */ - rpms.s->chr = *t; /* * As we don't actually output the WEOF, we attach * any off attributes to the character itself. */ rpms.s->atr = base_atr_on | base_atr_off; + if (ichars > 1) { + /* + * Glyph includes combining characters. + * Write these into the multiword buffer and put + * the index into the value at the screen location. + */ + addmultiword(rpms.s, t, ichars); + } else { + /* Single wide character */ + rpms.s->chr = *t; + } rpms.s++; while (--width > 0) { rpms.s->chr = WEOF; @@ -1103,6 +1291,11 @@ zrefresh(void) rpms.s++; } } + if (ichars > 1) { + /* allow for normal increment */ + tmppos += ichars - 1; + t += ichars - 1; + } } #endif else if (ZC_icntrl(*t) @@ -1110,7 +1303,6 @@ zrefresh(void) && (unsigned)*t <= 0xffU #endif ) { /* other control character */ - /* HERE highlight */ rpms.s->chr = ZWC('^'); rpms.s->atr = special_atr_on | base_atr_on; rpms.s++; @@ -1131,8 +1323,6 @@ zrefresh(void) /* * Not printable or zero width. * Resort to hackery. - * - * HERE: highlight */ char dispchars[11]; char *dispptr = dispchars; @@ -1214,7 +1404,6 @@ zrefresh(void) snextline(&rpms); } if (width > rpms.sen - rpms.s) { - /* HERE: highlight */ rpms.s->chr = ZWC('?'); rpms.s->atr = special_atr_on | special_atr_off; rpms.s++; @@ -1232,7 +1421,6 @@ zrefresh(void) else #endif if (ZC_icntrl(*u)) { /* simplified processing in the status line */ - /* HERE: highlight */ rpms.s->chr = ZWC('^'); rpms.s->atr = special_atr_on; rpms.s++; @@ -1375,10 +1563,10 @@ zrefresh(void) if (!clearf && iln > 0 && iln < olnct - 1 && !(hasam && vcs == winw) && nbuf[iln] && obuf[iln] && - ZR_strncmp(nbuf[iln], obuf[iln], 16)) { + ZR_strncmp(obuf[iln], nbuf[iln], 16)) { if (tccan(TCDELLINE) && obuf[iln + 1] && obuf[iln + 1][0].chr && nbuf[iln] && - !ZR_strncmp(nbuf[iln], obuf[iln + 1], 16)) { + !ZR_strncmp(obuf[iln + 1], nbuf[iln], 16)) { moveto(iln, 0); tcout(TCDELLINE); zfree(obuf[iln], (winw + 2) * sizeof(**obuf)); @@ -1391,8 +1579,7 @@ zrefresh(void) go off the end of the screen. */ else if (tccan(TCINSLINE) && olnct < vmaxln && nbuf[iln + 1] && - obuf[iln] && !ZR_strncmp(nbuf[iln + 1], - obuf[iln], 16)) { + obuf[iln] && !ZR_strncmp(obuf[iln], nbuf[iln + 1], 16)) { moveto(iln, 0); tcout(TCINSLINE); for (t0 = olnct; t0 != iln; t0--) @@ -1454,9 +1641,8 @@ individually */ moveto(rpms.nvln, rpms.nvcs); /* swap old and new buffers - better than freeing/allocating every time */ - qbuf = nbuf; - nbuf = obuf; - obuf = qbuf; + bufswap(); + /* store current values so we can use them next time */ ovln = rpms.nvln; olnct = nlnct; @@ -1496,13 +1682,17 @@ singlelineout: #define tc_upcurs(X) (void) tcmultout(TCUP, TCMULTUP, (X)) #define tc_leftcurs(X) (void) tcmultout(TCLEFT, TCMULTLEFT, (X)) +/* + * Once again, in the multibyte case the arguments must be in the + * order: element of old video array, element of new video array. + */ static int -wpfxlen(const REFRESH_ELEMENT *s, const REFRESH_ELEMENT *t) +wpfxlen(const REFRESH_ELEMENT *olds, const REFRESH_ELEMENT *news) { int i = 0; - while (s->chr && ZR_equal(*s, *t)) - s++, t++, i++; + while (olds->chr && ZR_equal(*olds, *news)) + olds++, news++, i++; return i; } @@ -1579,13 +1769,12 @@ refreshline(int ln) else { col_cleareol = -1; if (tccan(TCCLEAREOL) && (nllen == winw || put_rpmpt != oput_rpmpt)) { - /* HERE: watch for change of attributes */ - for (i = nllen; i && ZR_equal(nl[i - 1], zr_sp); i--) + for (i = nllen; i && ZR_equal(zr_sp, nl[i - 1]); i--) ; for (j = ollen; j && ZR_equal(ol[j - 1], zr_sp); j--) ; if ((j > i + tclen[TCCLEAREOL]) /* new buf has enough spaces */ - || (nllen == winw && ZR_equal(nl[winw - 1], zr_sp))) + || (nllen == winw && ZR_equal(zr_sp, nl[winw - 1]))) col_cleareol = i; } } @@ -1641,13 +1830,13 @@ refreshline(int ln) #ifdef MULTIBYTE_SUPPORT if ((!nl->chr || nl->chr != WEOF) && (!ol->chr || ol->chr != WEOF)) { #endif - if (nl->chr && ol->chr && ZR_equal(nl[1], ol[1])) { + if (nl->chr && ol->chr && ZR_equal(ol[1], nl[1])) { /* skip only if second chars match */ #ifdef MULTIBYTE_SUPPORT int ccs_was = ccs; #endif /* skip past all matching characters */ - for (; nl->chr && ZR_equal(*nl, *ol); nl++, ol++, ccs++) + for (; nl->chr && ZR_equal(*ol, *nl); nl++, ol++, ccs++) ; #ifdef MULTIBYTE_SUPPORT /* Make sure ol and nl are pointing to real characters */ @@ -1723,7 +1912,7 @@ refreshline(int ln) #ifdef MULTIBYTE_SUPPORT && ol->chr != WEOF && nl->chr != WEOF #endif - && nl[1].chr && ol[1].chr && !ZR_equal(nl[1], ol[1])) { + && nl[1].chr && ol[1].chr && !ZR_equal(ol[1], nl[1])) { /* deleting characters - see if we can find a match series that makes it cheaper to delete intermediate characters @@ -1762,7 +1951,7 @@ refreshline(int ln) if (tccan(TCINS) && (vln != lines - 1)) { /* not on last line */ for (i = 1; nl[i].chr; i++) - if (tcinscost(i) < wpfxlen(nl + i, ol)) { + if (tcinscost(i) < wpfxlen(ol, nl + i)) { tc_inschars(i); zwrite(nl, i); nl += i; @@ -2055,25 +2244,45 @@ static void singlerefresh(ZLE_STRING_T tmpline, int tmpll, int tmpcs) { REFRESH_STRING vbuf, vp, /* video buffer and pointer */ - *qbuf, /* tmp */ refreshop; /* pointer to old video buffer */ int t0, /* tmp */ vsiz, /* size of new video buffer */ nvcs = 0, /* new video cursor column */ owinpos = winpos, /* previous window position */ owinprompt = winprompt; /* previous winprompt */ +#ifdef MULTIBYTE_SUPPORT + int width; /* width of multibyte character */ +#endif nlnct = 1; /* generate the new line buffer completely */ - for (vsiz = 1 + lpromptw, t0 = 0; t0 != tmpll; t0++, vsiz++) + for (vsiz = 1 + lpromptw, t0 = 0; t0 != tmpll; t0++) { if (tmpline[t0] == ZWC('\t')) - vsiz = (vsiz | 7) + 1; + vsiz = (vsiz | 7) + 2; +#ifdef MULTIBYTE_SUPPORT + else if (iswprint(tmpline[t0]) && (width = wcwidth(tmpline[t0]) > 0)) { + vsiz += width; + if (isset(COMBININGCHARS) && iswalnum(tmpline[t0])) { + while (t0 < tmpll-1 && iswpunct(tmpline[t0+1]) && + wcwidth(tmpline[t0+1]) == 0) + t0++; + } + } +#endif + else if (ZC_icntrl(tmpline[t0]) #ifdef MULTIBYTE_SUPPORT - else if (iswprint(tmpline[t0])) - vsiz += wcwidth(tmpline[t0]); + && (unsigned)tmpline[t0] <= 0xffU #endif - else if (ZC_icntrl(tmpline[t0])) + ) + vsiz += 2; +#ifdef MULTIBYTE_SUPPORT + else + vsiz += 10; +#else + else vsiz++; +#endif + } vbuf = (REFRESH_STRING)zalloc(vsiz * sizeof(*vbuf)); if (tmpcs < 0) { @@ -2094,9 +2303,6 @@ singlerefresh(ZLE_STRING_T tmpline, int tmpll, int tmpcs) struct region_highlight *rhp; /* * Calculate attribute based on region. - * HERE: we may need to be smarter about turning - * attributes off if bailing out before the end of the - * region. */ for (ireg = 0, rhp = region_highlights; ireg < n_region_highlights; @@ -2122,7 +2328,6 @@ singlerefresh(ZLE_STRING_T tmpline, int tmpll, int tmpcs) *vp++ = zr_sp; vp[-1].atr |= base_atr_off; } else if (tmpline[t0] == ZWC('\n')) { - /* HERE highlight */ vp->chr = ZWC('\\'); vp->atr = special_atr_on | base_atr_on; vp++; @@ -2131,21 +2336,39 @@ singlerefresh(ZLE_STRING_T tmpline, int tmpll, int tmpcs) base_atr_on | base_atr_off; vp++; #ifdef MULTIBYTE_SUPPORT - } else if (iswprint(tmpline[t0])) { - int width; - vp->chr = tmpline[t0]; - vp->atr = base_atr_on; + } else if (iswprint(tmpline[t0]) && + (width = wcwidth(tmpline[t0])) > 0) { + int ichars; + if (isset(COMBININGCHARS) && iswalnum(tmpline[t0])) { + /* + * Look for combining characters: trailing printable + * characters with printing width zero. + */ + for (ichars = 1; t0 + ichars < tmpll; ichars++) { + if (!iswpunct(tmpline[t0+ichars]) || + wcwidth(tmpline[t0+ichars]) != 0) + break; + } + } else + ichars = 1; + vp->atr = base_atr_on | base_atr_off; + if (ichars > 1) + addmultiword(vp, tmpline+t0, ichars); + else + vp->chr = tmpline[t0]; vp++; - width = wcwidth(tmpline[t0]); while (--width > 0) { vp->chr = WEOF; - vp->atr = base_atr_on; + vp->atr = base_atr_on | base_atr_off; vp++; } - vp[-1].atr |= base_atr_off; + t0 += ichars - 1; #endif - } else if (ZC_icntrl(tmpline[t0])) { - /* HERE: highlight */ + } else if (ZC_icntrl(tmpline[t0]) +#ifdef MULTIBYTE_SUPPORT + && (unsigned)tmpline[t0] <= 0xffU +#endif + ) { ZLE_INT_T t = tmpline[++t0]; vp->chr = ZWC('^'); @@ -2156,11 +2379,39 @@ singlerefresh(ZLE_STRING_T tmpline, int tmpll, int tmpcs) vp->atr = special_atr_on | special_atr_off | base_atr_on | base_atr_off; vp++; - } else { + } +#ifdef MULTIBYTE_SUPPORT + else { + char dispchars[11]; + char *dispptr = dispchars; + wchar_t wc; + int started = 0; + + if ((unsigned)tmpline[t0] > 0xffffU) { + sprintf(dispchars, "<%.08x>", (unsigned)tmpline[t0]); + } else { + sprintf(dispchars, "<%.04x>", (unsigned)tmpline[t0]); + } + while (*dispptr) { + if (mbtowc(&wc, dispptr, 1) == 1 /* paranoia */) { + vp->chr = wc; + if (!started) + started = 1; + vp->atr = special_atr_on | base_atr_on; + vp++; + } + dispptr++; + } + if (started) + vp[-1].atr |= special_atr_off | base_atr_off; + } +#else + else { vp->chr = tmpline[t0]; vp->atr = base_atr_on | base_atr_off; vp++; } +#endif if (t0 == tmpcs) nvcs = vp - vbuf - 1; } @@ -2252,7 +2503,7 @@ singlerefresh(ZLE_STRING_T tmpline, int tmpll, int tmpcs) * nastiness may be around. */ if (vp - *nbuf >= owinprompt) - for (; vp->chr && ZR_equal(*vp, *refreshop); + for (; vp->chr && ZR_equal(*refreshop, *vp); t0++, vp++, refreshop++) ; @@ -2282,9 +2533,7 @@ singlerefresh(ZLE_STRING_T tmpline, int tmpll, int tmpcs) /* move to the new cursor position */ singmoveto(nvcs); - qbuf = nbuf; - nbuf = obuf; - obuf = qbuf; + bufswap(); } /**/ diff --git a/Src/Zle/zle_tricky.c b/Src/Zle/zle_tricky.c index 2266b56d7..e8e14cf14 100644 --- a/Src/Zle/zle_tricky.c +++ b/Src/Zle/zle_tricky.c @@ -685,7 +685,7 @@ docomplete(int lst) } ocs = zlemetacs; zlemetacs = 0; - foredel(chl); + foredel(chl, CUT_RAW); zlemetacs = ocs; } freeheap(); @@ -796,7 +796,7 @@ docomplete(int lst) if (inull(*q)) *q = Nularg; zlemetacs = wb; - foredel(we - wb); + foredel(we - wb, CUT_RAW); untokenize(x = ox = dupstring(w)); if (*w == Tilde || *w == Equals || *w == String) @@ -848,7 +848,7 @@ docomplete(int lst) * parts of the code re-install them, but for expansion * we have to do it here. */ zlemetacs = 0; - foredel(zlemetall); + foredel(zlemetall, CUT_RAW); spaceinline(origll); memcpy(zlemetaline, origline, origll); zlemetacs = origcs; @@ -1669,7 +1669,7 @@ get_comp_string(void) if (i < ocs) offs -= skipchars; /* Move the tail of the line up */ - foredel(skipchars); + foredel(skipchars, CUT_RAW); /* * Update the offset into the command line to the * cursor position if that's after the current position. @@ -1724,7 +1724,7 @@ get_comp_string(void) } else { ocs = zlemetacs; zlemetacs = i; - foredel(skipchars); + foredel(skipchars, CUT_RAW); if ((zlemetacs = ocs) > (i -= skipchars)) zlemetacs -= skipchars; we -= skipchars; @@ -1732,7 +1732,7 @@ get_comp_string(void) } else { ocs = zlemetacs; zlemetacs = we; - backdel(skipchars); + backdel(skipchars, CUT_RAW); if (ocs == we) zlemetacs = we - skipchars; else @@ -2085,7 +2085,7 @@ doexpansion(char *s, int lst, int olst, int explincmd) /* Only the list of expansions was requested. Restore the * command line. */ zlemetacs = 0; - foredel(zlemetall); + foredel(zlemetall, CUT_RAW); spaceinline(origll); memcpy(zlemetaline, origline, origll); zlemetacs = origcs; @@ -2095,7 +2095,7 @@ doexpansion(char *s, int lst, int olst, int explincmd) } /* Remove the current word and put the expansions there. */ zlemetacs = wb; - foredel(we - wb); + foredel(we - wb, CUT_RAW); while ((ss = (char *)ugetnode(vl))) { ret = 0; ss = quotename(ss, NULL); @@ -2811,7 +2811,7 @@ expandcmdpath(UNUSED(char **args)) if (!str) return 1; zlecs = cmdwb; - foredel(cmdwe - cmdwb); + foredel(cmdwe - cmdwb, CUT_RAW); zlestr = stringaszleline(str, 0, &strll, NULL, NULL); spaceinline(strll); ZS_strncpy(zleline + zlecs, zlestr, strll); diff --git a/Src/Zle/zle_utils.c b/Src/Zle/zle_utils.c index cacf6b483..cd8e2b26c 100644 --- a/Src/Zle/zle_utils.c +++ b/Src/Zle/zle_utils.c @@ -357,6 +357,17 @@ zlegetline(int *ll, int *cs) } +/* + * Basic utility functions for adding to line or removing from line. + * At this level the counts supplied are raw character counts, so + * the calling code must be aware of combining characters where + * necessary, e.g. if we want to delete a + combing grave forward + * from the cursor, then shiftchars() gets the count 2 (not 1). + * + * This is necessary because these utility functions don't know about + * zlecs, and we need to count combined characters from there. + */ + /* insert space for ct chars at cursor position */ /**/ @@ -412,27 +423,6 @@ shiftchars(int to, int cnt) region_active = 0; } -/**/ -mod_export void -backkill(int ct, int dir) -{ - int i = (zlecs -= ct); - - cut(i, ct, dir); - shiftchars(i, ct); -} - -/**/ -mod_export void -forekill(int ct, int dir) -{ - int i = zlecs; - - cut(i, ct, dir); - shiftchars(i, ct); -} - - /* * Put the ct characters starting at zleline + i into the * cutbuffer, circling the kill ring if necessary (it's @@ -448,9 +438,9 @@ forekill(int ct, int dir) /**/ void -cut(int i, int ct, int dir) +cut(int i, int ct, int flags) { - cuttext(zleline + i, ct, dir); + cuttext(zleline + i, ct, flags); } /* @@ -459,7 +449,7 @@ cut(int i, int ct, int dir) /**/ void -cuttext(ZLE_STRING_T line, int ct, int dir) +cuttext(ZLE_STRING_T line, int ct, int flags) { if (!ct) return; @@ -504,7 +494,7 @@ cuttext(ZLE_STRING_T line, int ct, int dir) cutbuf.buf = (ZLE_STRING_T)zalloc(ZLE_CHAR_SIZE); cutbuf.buf[0] = ZWC('\0'); cutbuf.len = cutbuf.flags = 0; - } else if (!(lastcmd & ZLE_KILL) || dir < 0) { + } else if (!(lastcmd & ZLE_KILL) || (flags & CUT_RAW)) { Cutbuffer kptr; if (!kring) { kringsize = KRINGCTDEF; @@ -519,7 +509,7 @@ cuttext(ZLE_STRING_T line, int ct, int dir) cutbuf.buf[0] = ZWC('\0'); cutbuf.len = cutbuf.flags = 0; } - if (dir) { + if (flags & (CUT_FRONT|CUT_REPLACE)) { ZLE_STRING_T s = (ZLE_STRING_T)zalloc((cutbuf.len + ct)*ZLE_CHAR_SIZE); ZS_memcpy(s, line, ct); @@ -539,24 +529,87 @@ cuttext(ZLE_STRING_T line, int ct, int dir) cutbuf.flags &= ~CUTBUFFER_LINE; } +/* + * Now we're back in the world of zlecs where we need to keep + * track of whether we're on a combining character. + */ + /**/ mod_export void -backdel(int ct) +backkill(int ct, int flags) { - if (zlemetaline != NULL) - shiftchars(zlemetacs -= ct, ct); - else - shiftchars(zlecs -= ct, ct); + int i; + + if (flags & CUT_RAW) { + i = (zlecs -= ct); + } else { + int n = ct; + while (n--) + DECCS(); + i = zlecs; + } + + cut(i, ct, flags); + shiftchars(i, ct); } /**/ mod_export void -foredel(int ct) +forekill(int ct, int flags) { - if (zlemetaline != NULL) - shiftchars(zlemetacs, ct); - else + int i = zlecs; + + if (!(flags & CUT_RAW)) { + int n = ct; + while (n--) + INCCS(); + ct = zlecs - i; + zlecs = i; + } + + cut(i, ct, flags); + shiftchars(i, ct); +} + +/**/ +mod_export void +backdel(int ct, int flags) +{ + if (flags & CUT_RAW) { + if (zlemetaline != NULL) { + shiftchars(zlemetacs -= ct, ct); + } else { + shiftchars(zlecs -= ct, ct); + } + } else { + int n = ct, origcs = zlecs; + DPUTS(zlemetaline != NULL, "backdel needs CUT_RAW when metafied"); + while (n--) + DECCS(); + shiftchars(zlecs, origcs - zlecs); + } +} + +/**/ +mod_export void +foredel(int ct, int flags) +{ + if (flags & CUT_RAW) { + if (zlemetaline != NULL) { + shiftchars(zlemetacs, ct); + } else if (flags & CUT_RAW) { + shiftchars(zlecs, ct); + } + } else { + int origcs = zlecs; + int n = ct; + DPUTS(zlemetaline != NULL, "backdel needs CUT_RAW when metafied"); + while (n--) + INCCS(); + ct = zlecs - origcs; + zlecs = origcs; shiftchars(zlecs, ct); + } } /**/ @@ -578,7 +631,7 @@ setline(char *s, int flags) zleline = stringaszleline(scp, 0, &zlell, &linesz, NULL); if ((flags & ZSL_TOEND) && (zlecs = zlell) && invicmdmode()) - zlecs--; + DECCS(); else if (zlecs > zlell) zlecs = zlell; @@ -1035,7 +1088,7 @@ unapplychange(struct change *ch) } zlecs = ch->off; if(ch->ins) - foredel(ch->insl); + foredel(ch->insl, CUT_RAW); if(ch->del) { spaceinline(ch->dell); ZS_memcpy(zleline + zlecs, ch->del, ch->dell); @@ -1075,7 +1128,7 @@ applychange(struct change *ch) } zlecs = ch->off; if(ch->del) - foredel(ch->dell); + foredel(ch->dell, CUT_RAW); if(ch->ins) { spaceinline(ch->insl); ZS_memcpy(zleline + zlecs, ch->ins, ch->insl); diff --git a/Src/Zle/zle_vi.c b/Src/Zle/zle_vi.c index 3e8e5ccec..068274a8e 100644 --- a/Src/Zle/zle_vi.c +++ b/Src/Zle/zle_vi.c @@ -210,7 +210,13 @@ getvirange(int wf) * moving to the opening bracket, meaning that we need to * * change the *starting* position. */ if(virangeflag == -1) - pos++; + { + int origcs = zlecs; + zlecs = pos; + INCCS(); + pos = zlecs; + zlecs = origcs; + } /* Get the range the right way round. zlecs is placed at the * * start of the range, and pos (the return value of this * @@ -314,12 +320,12 @@ videlete(UNUSED(char **args)) startvichange(1); if ((c2 = getvirange(0)) != -1) { - forekill(c2 - zlecs, 0); + forekill(c2 - zlecs, CUT_RAW); ret = 0; if (vilinerange && zlell) { if (zlecs == zlell) - zlecs--; - foredel(1); + DECCS(); + foredel(1, 0); vifirstnonblank(zlenoargs); } } @@ -350,7 +356,7 @@ videletechar(char **args) if (n > findeol() - zlecs) n = findeol() - zlecs; /* do the deletion */ - forekill(n, 0); + forekill(n, CUT_RAW); return 0; } @@ -363,7 +369,7 @@ vichange(UNUSED(char **args)) startvichange(1); if ((c2 = getvirange(1)) != -1) { ret = 0; - forekill(c2 - zlecs, 0); + forekill(c2 - zlecs, CUT_RAW); selectkeymap("main", 1); viinsbegin = zlecs; undoing = 0; @@ -388,7 +394,7 @@ visubstitute(UNUSED(char **args)) if (n > findeol() - zlecs) n = findeol() - zlecs; /* do the substitution */ - forekill(n, 0); + forekill(n, CUT_RAW); startvitext(1); return 0; } @@ -397,7 +403,7 @@ visubstitute(UNUSED(char **args)) int vichangeeol(UNUSED(char **args)) { - forekill(findeol() - zlecs, 0); + forekill(findeol() - zlecs, CUT_RAW); startvitext(1); return 0; } @@ -488,11 +494,22 @@ int vireplacechars(UNUSED(char **args)) { ZLE_INT_T ch; - int n = zmult; + int n = zmult, origcs = zlecs, fail = 0; + if (n > 0) { + while (n > 0) { + if (zlecs == zlell || zleline[zlell] == ZWC('\n')) { + fail = 1; + break; + } + INCCS(); + } + n = zlecs - origcs; + zlecs = origcs; + } startvichange(1); /* check argument range */ - if (n < 1 || n + zlecs > findeol()) { + if (n < 1 || fail) { if(vichgrepeat) vigetkey(); if(vichgflag) { @@ -511,9 +528,10 @@ vireplacechars(UNUSED(char **args)) if (ch == ZWC('\r') || ch == ZWC('\n')) { /* handled specially */ zlecs += n - 1; - backkill(n - 1, 0); + backkill(n - 1, CUT_RAW); zleline[zlecs++] = '\n'; } else { + /* HERE: we shouldn't replace combining chars, we should delete them */ while (n--) zleline[zlecs++] = ch; zlecs--; @@ -531,7 +549,7 @@ vicmdmode(UNUSED(char **args)) undoing = 1; vichgflag = 0; if (zlecs != findbol()) - zlecs--; + DECCS(); return 0; } @@ -575,7 +593,7 @@ vioperswapcase(UNUSED(char **args)) zleline[zlecs] = ZC_toupper(zleline[zlecs]); else if (ZC_iupper(zleline[zlecs])) zleline[zlecs] = ZC_tolower(zleline[zlecs]); - zlecs++; + INCCS(); } /* go back to the first line of the range */ zlecs = oldcs; @@ -664,7 +682,7 @@ viunindent(UNUSED(char **args)) /* remove a tab from the beginning of each line within range */ while (zlecs < c2) { if (zleline[zlecs] == '\t') - foredel(1); + foredel(1, 0); zlecs = findeol() + 1; } /* go back to the first line of the range */ @@ -699,7 +717,7 @@ vibackwarddeletechar(char **args) if (n > zlecs - findbol()) n = zlecs - findbol(); /* do the deletion */ - backkill(n, 1); + backkill(n, CUT_FRONT|CUT_RAW); return 0; } @@ -709,7 +727,7 @@ vikillline(UNUSED(char **args)) { if (viinsbegin > zlecs) return 1; - backdel(zlecs - viinsbegin); + backdel(zlecs - viinsbegin, CUT_RAW); return 0; } @@ -790,7 +808,7 @@ vijoin(UNUSED(char **args)) return 1; zlecs = x + 1; for (x = 1; zlecs != zlell && ZC_iblank(zleline[zlecs]); zlecs++, x++); - backdel(x); + backdel(x, CUT_RAW); if (zlecs && ZC_iblank(zleline[zlecs-1])) zlecs--; else { @@ -893,7 +911,7 @@ vipoundinsert(UNUSED(char **args)) viinsbegin++; zlecs = oldcs + (zlecs <= oldcs); } else { - foredel(1); + foredel(1, 0); if (zlecs < viinsbegin) viinsbegin--; zlecs = oldcs - (zlecs < oldcs); @@ -921,7 +939,7 @@ viquotedinsert(char **args) #ifndef HAS_TIO zsetterm(); #endif - foredel(1); + foredel(1, 0); if(LASTFULLCHAR == ZLEEOF) return 1; else diff --git a/Src/Zle/zle_word.c b/Src/Zle/zle_word.c index d24a0f7f7..c1fff7213 100644 --- a/Src/Zle/zle_word.c +++ b/Src/Zle/zle_word.c @@ -30,6 +30,12 @@ #include "zle.mdh" #include "zle_word.pro" +/* + * HERE: our handling of combining characters may be wrong. We + * should make sure we only consider a combining character part of + * a word if the base character is. + */ + /**/ int forwardword(char **args) @@ -45,11 +51,11 @@ forwardword(char **args) } while (n--) { while (zlecs != zlell && ZC_iword(zleline[zlecs])) - zlecs++; + INCCS(); if (wordflag && !n) return 0; while (zlecs != zlell && !ZC_iword(zleline[zlecs])) - zlecs++; + INCCS(); } return 0; } @@ -72,14 +78,14 @@ viforwardword(char **args) while (n--) { if (Z_vialnum(zleline[zlecs])) while (zlecs != zlell && Z_vialnum(zleline[zlecs])) - zlecs++; + INCCS(); else while (zlecs != zlell && !Z_vialnum(zleline[zlecs]) && !ZC_iblank(zleline[zlecs])) - zlecs++; + INCCS(); if (wordflag && !n) return 0; while (zlecs != zlell && ZC_inblank(zleline[zlecs])) - zlecs++; + INCCS(); } return 0; } @@ -99,11 +105,11 @@ viforwardblankword(char **args) } while (n--) { while (zlecs != zlell && !ZC_iblank(zleline[zlecs])) - zlecs++; + INCCS(); if (wordflag && !n) return 0; while (zlecs != zlell && ZC_iblank(zleline[zlecs])) - zlecs++; + INCCS(); } return 0; } @@ -123,11 +129,11 @@ emacsforwardword(char **args) } while (n--) { while (zlecs != zlell && !ZC_iword(zleline[zlecs])) - zlecs++; + INCCS(); if (wordflag && !n) return 0; while (zlecs != zlell && ZC_iword(zleline[zlecs])) - zlecs++; + INCCS(); } return 0; } @@ -141,13 +147,14 @@ viforwardblankwordend(UNUSED(char **args)) if (n < 0) return 1; while (n--) { + /* HERE: the zlecs + 1 here is suspect */ while (zlecs != zlell && ZC_iblank(zleline[zlecs + 1])) - zlecs++; + INCCS(); while (zlecs != zlell && !ZC_iblank(zleline[zlecs + 1])) - zlecs++; + INCCS(); } if (zlecs != zlell && virangeflag) - zlecs++; + INCCS(); return 0; } @@ -165,18 +172,19 @@ viforwardwordend(char **args) return ret; } while (n--) { + /* HERE: the zlecs + 1 here is suspect */ if (ZC_iblank(zleline[zlecs + 1])) while (zlecs != zlell && ZC_iblank(zleline[zlecs + 1])) - zlecs++; + INCCS(); if (Z_vialnum(zleline[zlecs + 1])) while (zlecs != zlell && Z_vialnum(zleline[zlecs + 1])) - zlecs++; + INCCS(); else while (zlecs != zlell && !Z_vialnum(zleline[zlecs + 1]) && !ZC_iblank(zleline[zlecs + 1])) - zlecs++; + INCCS(); } if (zlecs != zlell && virangeflag) - zlecs++; + INCCS(); return 0; } @@ -194,10 +202,11 @@ backwardword(char **args) return ret; } while (n--) { + /* HERE: the zlecs - 1 here is suspect */ while (zlecs && !ZC_iword(zleline[zlecs - 1])) - zlecs--; + DECCS(); while (zlecs && ZC_iword(zleline[zlecs - 1])) - zlecs--; + DECCS(); } return 0; } @@ -216,14 +225,15 @@ vibackwardword(char **args) return ret; } while (n--) { + /* HERE: the zlecs - 1 here is suspect */ while (zlecs && ZC_iblank(zleline[zlecs - 1])) - zlecs--; + DECCS(); if (Z_vialnum(zleline[zlecs - 1])) while (zlecs && Z_vialnum(zleline[zlecs - 1])) - zlecs--; + DECCS(); else while (zlecs && !Z_vialnum(zleline[zlecs - 1]) && !ZC_iblank(zleline[zlecs - 1])) - zlecs--; + DECCS(); } return 0; } @@ -243,9 +253,9 @@ vibackwardblankword(char **args) } while (n--) { while (zlecs && ZC_iblank(zleline[zlecs - 1])) - zlecs--; + DECCS(); while (zlecs && !ZC_iblank(zleline[zlecs - 1])) - zlecs--; + DECCS(); } return 0; } @@ -265,9 +275,9 @@ emacsbackwardword(char **args) } while (n--) { while (zlecs && !ZC_iword(zleline[zlecs - 1])) - zlecs--; + DECCS(); while (zlecs && ZC_iword(zleline[zlecs - 1])) - zlecs--; + DECCS(); } return 0; } @@ -286,12 +296,16 @@ backwarddeleteword(char **args) return ret; } while (n--) { + /* + * HERE: the zlecs - 1 here is suspect, and we should + * do the DECCS() thing. + */ while (x && !ZC_iword(zleline[x - 1])) x--; while (x && ZC_iword(zleline[x - 1])) x--; } - backdel(zlecs - x); + backdel(zlecs - x, CUT_RAW); return 0; } @@ -306,6 +320,10 @@ vibackwardkillword(UNUSED(char **args)) return 1; /* this taken from "vibackwardword" */ while (n--) { + /* + * HERE: the zlecs - 1 here is suspect, and we should + * do the DECCS() thing. + */ while ((x > lim) && ZC_iblank(zleline[x - 1])) x--; if (Z_vialnum(zleline[x - 1])) @@ -315,7 +333,7 @@ vibackwardkillword(UNUSED(char **args)) while ((x > lim) && !Z_vialnum(zleline[x - 1]) && !ZC_iblank(zleline[x - 1])) x--; } - backkill(zlecs - x, 1); + backkill(zlecs - x, CUT_FRONT); return 0; } @@ -334,12 +352,16 @@ backwardkillword(char **args) return ret; } while (n--) { + /* + * HERE: the zlecs - 1 here is suspect, and we should + * do the DECCS() thing. + */ while (x && !ZC_iword(zleline[x - 1])) x--; while (x && ZC_iword(zleline[x - 1])) x--; } - backkill(zlecs - x, 1); + backkill(zlecs - x, CUT_FRONT); return 0; } @@ -354,10 +376,10 @@ upcaseword(UNUSED(char **args)) n = -n; while (n--) { while (zlecs != zlell && !ZC_iword(zleline[zlecs])) - zlecs++; + INCCS(); while (zlecs != zlell && ZC_iword(zleline[zlecs])) { zleline[zlecs] = ZC_toupper(zleline[zlecs]); - zlecs++; + INCCS(); } } if (neg) @@ -376,10 +398,10 @@ downcaseword(UNUSED(char **args)) n = -n; while (n--) { while (zlecs != zlell && !ZC_iword(zleline[zlecs])) - zlecs++; + INCCS(); while (zlecs != zlell && ZC_iword(zleline[zlecs])) { zleline[zlecs] = ZC_tolower(zleline[zlecs]); - zlecs++; + INCCS(); } } if (neg) @@ -399,14 +421,14 @@ capitalizeword(UNUSED(char **args)) while (n--) { first = 1; while (zlecs != zlell && !ZC_iword(zleline[zlecs])) - zlecs++; + INCCS(); while (zlecs != zlell && ZC_iword(zleline[zlecs]) && !ZC_ialpha(zleline[zlecs])) - zlecs++; + INCCS(); while (zlecs != zlell && ZC_iword(zleline[zlecs])) { zleline[zlecs] = (first) ? ZC_toupper(zleline[zlecs]) : ZC_tolower(zleline[zlecs]); first = 0; - zlecs++; + INCCS(); } } if (neg) @@ -429,12 +451,13 @@ deleteword(char **args) return ret; } while (n--) { + /* HERE: we should do the INCCS() thing */ while (x != zlell && !ZC_iword(zleline[x])) x++; while (x != zlell && ZC_iword(zleline[x])) x++; } - foredel(x - zlecs); + foredel(x - zlecs, CUT_RAW); return 0; } @@ -453,12 +476,13 @@ killword(char **args) return ret; } while (n--) { + /* HERE: we should do the INCCS() thing */ while (x != zlell && !ZC_iword(zleline[x])) x++; while (x != zlell && ZC_iword(zleline[x])) x++; } - forekill(x - zlecs, 0); + forekill(x - zlecs, CUT_RAW); return 0; } @@ -474,6 +498,10 @@ transposewords(UNUSED(char **args)) if (neg) n = -n; while (n--) { + /* + * HERE: we should do the INCCS() thing. + * A great deal of the following needs rewriting. + */ while (x != zlell && zleline[x] != ZWC('\n') && !ZC_iword(zleline[x])) x++; if (x == zlell || zleline[x] == ZWC('\n')) { diff --git a/Src/options.c b/Src/options.c index 753a9cb36..a206b2910 100644 --- a/Src/options.c +++ b/Src/options.c @@ -101,6 +101,7 @@ static struct optname optns[] = { {{NULL, "chaselinks", OPT_EMULATE}, CHASELINKS}, {{NULL, "checkjobs", OPT_EMULATE|OPT_ZSH}, CHECKJOBS}, {{NULL, "clobber", OPT_EMULATE|OPT_ALL}, CLOBBER}, +{{NULL, "combiningchars", 0}, COMBININGCHARS}, {{NULL, "completealiases", 0}, COMPLETEALIASES}, {{NULL, "completeinword", 0}, COMPLETEINWORD}, {{NULL, "correct", 0}, CORRECT}, diff --git a/Src/utils.c b/Src/utils.c index 1d0b5dc67..b4770befe 100644 --- a/Src/utils.c +++ b/Src/utils.c @@ -3081,6 +3081,14 @@ wcsitype(wchar_t c, int itype) case IWORD: if (iswalnum(c)) return 1; + /* + * If we are handling combining characters, anything + * printable with zero width needs to be considered + * part of a word. + */ + if (isset(COMBININGCHARS) && + iswprint(c) && wcwidth(c) == 0) + return 1; return !!wmemchr(wordchars_wide.chars, c, wordchars_wide.len); case ISEP: diff --git a/Src/zsh.h b/Src/zsh.h index 08dd140fd..2b8646cb1 100644 --- a/Src/zsh.h +++ b/Src/zsh.h @@ -1715,6 +1715,7 @@ enum { CHASELINKS, CHECKJOBS, CLOBBER, + COMBININGCHARS, COMPLETEALIASES, COMPLETEINWORD, CORRECT, @@ -1936,6 +1937,10 @@ struct ttyinfo { #define tccan(X) (tclen[X]) +/* + * Text attributes for displaying in ZLE + */ + #define TXTBOLDFACE 0x01 #define TXTSTANDOUT 0x02 #define TXTUNDERLINE 0x04 @@ -1955,6 +1960,12 @@ struct ttyinfo { /* Bits to shift off right to get on */ #define TXT_ATTR_OFF_ON_SHIFT (4) +/* + * Indicates to zle_refresh.c that the character entry is an + * index into the list of multiword symbols. + */ +#define TXT_MULTIWORD_MASK 0x100 + #define txtchangeisset(T,X) ((T) & (X)) #define txtchangeset(X, Y) (txtchange |= (X), txtchange &= ~(Y)) -- cgit 1.4.1