From 7df83c6a1c5304506c2fd7d0444ad567493da719 Mon Sep 17 00:00:00 2001 From: Peter Stephenson Date: Mon, 22 Aug 2005 09:27:57 +0000 Subject: 21676: insert-unicode-char now insert-composed-char. Also uses RFC 1345. --- ChangeLog | 7 + Doc/Zsh/contrib.yo | 236 ++++++++++++--------- Doc/zmacros.yo | 1 + Functions/Zle/.distfiles | 33 +-- Functions/Zle/insert-composed-char | 407 +++++++++++++++++++++++++++++++++++++ Functions/Zle/insert-unicode-char | 214 ------------------- 6 files changed, 573 insertions(+), 325 deletions(-) create mode 100644 Functions/Zle/insert-composed-char delete mode 100644 Functions/Zle/insert-unicode-char diff --git a/ChangeLog b/ChangeLog index 1aec3a17b..587c3f23b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,10 @@ +2005-08-22 Peter Stephenson + + * 21676: Doc/zmacros.yo, Doc/Zsh/contrib.yo, + Functions/Zle/.distfiles, Functions/Zle/insert-composed-char, + Functions/Zle/insert-unicode-char: insert-unicode-char is now + insert-composed-char and uses RFC 1345 for composing characters. + 2005-08-22 Geoff Wing * unposted: Doc/Zsh/metafaq.yo, Etc/FAQ.yo: update some diff --git a/Doc/Zsh/contrib.yo b/Doc/Zsh/contrib.yo index 4fce28412..a2884c4c9 100644 --- a/Doc/Zsh/contrib.yo +++ b/Doc/Zsh/contrib.yo @@ -660,25 +660,23 @@ This works only with the new function based completion system. example(bindkey '^Xi' incremental-complete-word) ) -tindex(insert-files) -item(tt(insert-files))( -This function allows you type a file pattern, and see the results of the -expansion at each step. When you hit return, all expansions are inserted -into the command line. - -example(bindkey '^Xf' insert-files) -) -tindex(insert-unicode-char) -item(tt(insert-unicode-char))( -This function allows you to compose Unicode characters to be inserted -into the command line. The command is followed by two keys (there is -no prompt), of which the first indicates the type of accent or special -character, and the second indicates the base character. Both input -characters are always from the ASCII character set. For best results -zsh should have been built with support for multibyte characters -(configured with tt(--enable-multibyte)). - -The character is converted from Unicode into the local representation and +tindex(insert-composed-char) +item(tt(insert-composed-char))( +This function allows you to compose characters that don't appear on the +keyboard to be inserted into the command line. The command is followed by +two keys corresponding to ASCII characters (there is no prompt). For +accented characters, the two keys are a base character followed by a code +for the accent, while for other special characters the two characters +together form a mnemonic for the character to be inserted. The +two-character codes are a subset of those given by RFC 1345 (see for +example tt(http://www.faqs.org/rfcs/rfc1345.html)). + +For best results zsh should have been built with support for multibyte +characters (configured with tt(--enable-multibyte)); however, the function +works for the limited range of characters available in single-byte +character sets such as ISO-8859-1. + +The character is converted into the local representation and inserted into the command line at the cursor position. (The conversion is done within the shell, using whatever facilities the C library provides.) With a numeric argument, the character and its @@ -688,91 +686,139 @@ The function may be run outside zle in which case it prints the character (together with a newline) to standard output. Input is still read from keystrokes. -The set of accented characters is reasonably complete up to U+0180, the -set of special characters less so. However, it mostly gives up at that -point. Adding new Unicode characters is easy, however. Please send any -additions to tt(zsh-workers@sunsite.dk). +The set of accented characters is reasonably complete up to Unicode +character U+0180, the set of special characters less so. However, it +mostly gives up at that point. Adding new characters is easy, however. +Please send any additions to tt(zsh-workers@sunsite.dk). -The codes for the first character are as follows: +The codes for the second character when used to accent the first are as +follows. Note that not every character can take every accent. startsitem() -sitem(tt(`))( -Grave accent. -) -sitem(tt('))( -Acute accent. -) -sitem(tt(d))( -Double acute accent (only supported on a few letters). -) -sitem(tt(^))( -Circumflex. -) -sitem(tt(~))( -Tilde. -) -sitem(tt("))( -Diaeresis (Umlaut). -) -sitem(tt(o))( -Circle over the base character. -) -sitem(tt(e))( -Ligatures ending in e or E: tt(e A) gives AE, tt(e o) gives oe, etc. -) -sitem(tt(j))( -Ligatures ending in j or J: ij or IJ. -) -sitem(tt(c))( -Cedilla. +sitem(tt(!))(Grave.) +sitem(tt(RQUOTE()))(Acute.) +sitem(tt(>))(Circumflex.) +sitem(tt(?))(Tilde. (This is not tt(~) as RFC 1345 does not assume that +character is present on the keyboard.)) +sitem(tt(-))(Macron. (A horizonal bar over the base character.)) +sitem(tt(LPAR()))(Breve. (A shallow dish shape over the base character.)) +sitem(tt(.))(Dot above the base character, or in the case of tt(i) no dot, +or in the case of tt(L) and tt(l) a centered dot.) +sitem(tt(:))(Diaeresis (Umlaut).) +sitem(tt(c))(Cedilla.) +sitem(tt(_))(Underline, however there are currently no underlined characters.) +sitem(tt(/))(Stroke through the base character.) +sitem(tt("))(Double acute (only supported on a few letters).) +sitem(tt(;))(Ogonek. (A little forward facing hook at the bottom right +of the character. The "g" stands for "Ogonek" but another +mnemonic is that g has a squiggle below the line.)) +sitem(tt(<))(Caron. (A little v over the letter.)) +sitem(tt(0))(Circle over the base character.) +sitem(tt(2))(Hook over the base character.) +sitem(tt(9))(Horn over the base character.) +endsitem() + +The following other two-character sequences are understood. + +startitem() +item(ASCII characters)( +These are already present on most keyboards: +startsitem() +sitem(tt())(Right square bracket) +sitem(tt(LPAR()!))(Left brace (curly bracket)) +sitem(tt(!!))(Vertical bar (pipe symbol)) +sitem(tt(!RPAR()))(Right brace (curly bracket)) +sitem(tt(RQUOTE()?))(Tilde) +endsitem() ) -sitem(tt(/))( -Stroke through the base character. +item(Special letters)( +Characters found in various variants of the Latin alphabet: +startsitem() +sitem(tt(ss))(Eszett (scafes S)) +sitem(tt(D-), tt(d-))(Eth) +sitem(tt(TH), tt(th))(Thorn) +sitem(tt(kk))(Kra) +sitem(tt(RQUOTE()n))(RQUOTE()n) +sitem(tt(NG), tt(ng))(Ng) +sitem(tt(OI), tt(oi))(Oi) +sitem(tt(yr))(yr) +sitem(tt(ED))(ezh) +endsitem() ) -sitem(tt(-))( -Macron. (A horizonal bar over the base character.) +item(Currency symbols)( +startsitem() +sitem(tt(Ct))(Cent) +sitem(tt(Pd))(Pound sterling (also lira and others)) +sitem(tt(Cu))(Currency) +sitem(tt(Ye))(Yen) +sitem(tt(Eu))(Euro (N.B. not in RFC 1345)) +endsitem() ) -sitem(tt(u))( -Breve. (A shallow dish shape over the base character.) +item(Punctuation characters)( +References to "right" quotes indicate the shape (like a 9 rather than 6) +rather than their grammatical use. (For example, a "right" low double +quote is used to open quotations in German.) +startsitem() +sitem(tt(!I))(Inverted exclamation mark) +sitem(tt(BB))(Broken vertical bar) +sitem(tt(SE))(Section) +sitem(tt(Co))(Copyright) +sitem(tt(-a))(Spanish feminine ordinal indicator) +sitem(tt(<<))(Left guillemet) +sitem(tt(--))(Soft hyphen) +sitem(tt(Rg))(Registered trade mark) +sitem(tt(PI))(Pilcrow (paragraph)) +sitem(tt(-o))(Spanish masculine ordinal indicator) +sitem(tt(>>))(Right guillemet) +sitem(tt(?I))(Inverted question mark) +sitem(tt(RQUOTE()6))(Left single quote) +sitem(tt(RQUOTE()9))(Right single quote) +sitem(tt(.9))("Right" low quote) +sitem(tt(9+RQUOTE()))(Reversed "right" quote) +sitem(tt("6))(Left double quote) +sitem(tt("9))(Right double quote) +sitem(tt(:9))("Right" low double quote) +sitem(tt(9"))(Reversed "right" double quote) +sitem(tt(/-))(Dagger) +sitem(tt(/=))(Double dagger) +endsitem() ) -sitem(tt(.))( -Dot above the base character +item(Mathematical symbols)( +startsitem() +sitem(tt(DG))(Degree) +sitem(tt(+-))(+/- sign) +sitem(tt(2S))(Superscript 2) +sitem(tt(3S))(Superscript 3) +sitem(tt(1S))(Superscript 1) +sitem(tt(My))(Micro) +sitem(tt(.M))(Middle dot) +sitem(tt(14))(Quarter) +sitem(tt(12))(Half) +sitem(tt(34))(Three quarters) +sitem(tt(*X))(Multiplication) +sitem(tt(-:))(Division) +endsitem() ) -sitem(tt(:))( -A dot in the middle plane of the base character +item(Accents on their own)( +startsitem() +sitem(tt(RQUOTE()>))(Circumflex (same as caret, tt(^))) +sitem(tt(RQUOTE()!))(Grave (same as backtick, tt(`))) +sitem(tt(RQUOTE(),))(Cedilla) +sitem(tt(RQUOTE():))(Diaeresis (Umlaut)) +sitem(tt(RQUOTE()m))(Macron) +sitem(tt(RQUOTE()RQUOTE()))(Acute) +endsitem() ) -sitem(tt(g))( -Ogonek. (A little forward facing hook at the bottom right -of the character. The "g" stands for "Ogonek" but another -mnemonic is that g has a squiggle below the line.) -) -sitem(tt(v))( -Caron. (A little v over the letter.) -) -sitem(tt(s))( -Used only as tt(s s), a german Eszett or "scharfes S" ligature. -) -sitem(tt(h))( -Icelandic (or Runic) edh (tt(h d)) or thorn (tt(h t)). -) -sitem(tt(m))( -Various mathematical characters: not (tt(m \)), multiply (tt(m *)), divide -(tt(m /)), degree (tt(m o)), +/- (tt(m +)), superscripts 1, 2, 3 (tt(m 1), -etc.), micro (tt(m u)), quarter (tt(m q)), half (tt(m h)), three quarters -(tt(m t)). -) -sitem(tt(p))( -Various punctuation and currency characters (any non-mathematical symbol -that is not part of a word): soft space (tt(p _)), inverted ! (tt(p !)), -cent (tt(p %)), pound sign (tt(p l)) (think lira, librum), currency (tt(p -$)), yen (tt(p y)), broken bar (tt(p |)), section sign (tt(p s)), lonely -diaeresis (tt(p ")), copyright sign (tt(p C)), Spanish feminine marker -(tt(p f)), left guillemet (tt(p <)), soft hyphen (tt(p h)), registered -trade mark (tt(p R)), lonely macron (tt(p -)), lonely acute (tt(p ')), -Pilcrow (paragraph) sign (tt(p p)), middle dot (tt(p :)), -lonely cedilla (tt(p c)), Spanish masculine marker (tt(p m)), right -guillemet (tt(p >)), inverted ? (tt(p ?)), Euro sign (tt(p e)). +enditem() ) -endsitem() +tindex(insert-files) +item(tt(insert-files))( +This function allows you type a file pattern, and see the results of the +expansion at each step. When you hit return, all expansions are inserted +into the command line. + +example(bindkey '^Xf' insert-files) ) tindex(narrow-to-region) tindex(narrow-to-region-invisible) diff --git a/Doc/zmacros.yo b/Doc/zmacros.yo index 882abf41a..1c3319059 100644 --- a/Doc/zmacros.yo +++ b/Doc/zmacros.yo @@ -17,6 +17,7 @@ DEFINEMACRO(redef)(3)(\ DEFINEMACRO(ARG1)(ARG2)(ARG3)\ ) +DEFINEMACRO(RQUOTE)(0)(CHAR(39)) DEFINEMACRO(LPAR)(0)(CHAR(40)) DEFINEMACRO(RPAR)(0)(CHAR(41)) DEFINEMACRO(PLUS)(0)(CHAR(43)) diff --git a/Functions/Zle/.distfiles b/Functions/Zle/.distfiles index 499f33b6f..c6852732f 100644 --- a/Functions/Zle/.distfiles +++ b/Functions/Zle/.distfiles @@ -1,19 +1,20 @@ DISTFILES_SRC=' .distfiles -cycle-completion-positions incarg predict-on -edit-command-line incremental-complete-word smart-insert-last-word -history-search-end insert-files -copy-earlier-word -down-line-or-beginning-search -up-line-or-beginning-search -narrow-to-region narrow-to-region-invisible -read-from-minibuffer replace-string -backward-kill-word-match backward-word-match capitalize-word-match -down-case-word-match forward-word-match kill-word-match -match-words-by-style select-word-style transpose-words-match -up-case-word-match -delete-whole-word-match quote-and-complete-word url-quote-magic -zed-set-file-name history-pattern-search -keeper -which-command +backward-kill-word-match backward-word-match +capitalize-word-match copy-earlier-word +cycle-completion-positions delete-whole-word-match +down-case-word-match down-line-or-beginning-search +edit-command-line forward-word-match +history-pattern-search history-search-end +incarg incremental-complete-word +insert-composed-char insert-files +keeper kill-word-match +match-words-by-style narrow-to-region +narrow-to-region-invisible predict-on +quote-and-complete-word read-from-minibuffer +replace-string select-word-style +smart-insert-last-word transpose-words-match +up-case-word-match up-line-or-beginning-search +url-quote-magic which-command +zed-set-file-name ' diff --git a/Functions/Zle/insert-composed-char b/Functions/Zle/insert-composed-char new file mode 100644 index 000000000..60a42e089 --- /dev/null +++ b/Functions/Zle/insert-composed-char @@ -0,0 +1,407 @@ +# Accented characters. Inputs two keys. There are two types: those +# with a base character followed by an accent (see below for codes for +# accents), and those with a two-character mnemonic for the composed +# character. These are (with the exception of the Euro) the codes +# given by RFC 1345. Note that some codes in RFC 1345 require three +# characters to be input; none of these are handled. +# +# For best results zsh should have been built with support for +# multibyte characters (--enable-multibyte), but single character sets +# also work. +# +# Outputs the character converted from Unicode into the local representation. +# (The conversion is done within the shell, using whatever facilities +# the C library provides.) +# +# When used as a zle widget, the character is inserted at the cursor +# position. With a numeric argument, preview in status line; outside zle, +# print character (and newline) to standard output. +# +# The set of accented characters is reasonably complete up to U+0180, the +# set of special characters less so. However, it mostly gives up at that +# point. Adding new Unicode characters is easy, however. Please send any +# additions to zsh-workers@sunsite.dk . +# +# Some of the accent codes are a little more obscure than others. +# ! Grave +# ' Acute +# > Circumflex +# ? Tilde +# - Macron. (A horizonal bar over the letter.) +# ( Breve. (A shallow dish shape over the letter.) +# . Dot above, or no dot with lower case i, or dot in the middle of L or l. +# : Diaeresis (Umlaut) +# , Cedilla +# _ Underline (none of these currently) +# / Stroke through character +# " Double acute +# ; Ogonek. (A little forward facing hook at the bottom right +# of the character.) +# < Caron. (A little v over the letter.) +# 0 Circle +# 2 Hook +# 9 Horn +# Hence A! is upper case A with a grave, c, is lower case c with cedilla. +# +# Some other composed charaters: +# Various ligatures: +# AE ae OE oe IJ ij +# +# ASCII characters not on all keyboards: +# <( [ +# // \ +# )> ] +# (! { +# !! | +# !) } +# '? ~ +# +# Special letters: +# ss Eszett (schafes S) +# D- d- TH th Eth and thorn +# kk kra +# 'n 'n +# NG ng ng +# OI oi OI +# yr yr +# ED ezh +# +# Currency symbols: +# Ct Cent +# Pd Pound sterling +# Cu Currency +# Ye Yen +# Eu Euro (not in RFC 1345 but logical) +# +# Punctuation +# !I Inverted ! +# BB Broken vertical bar +# SE Section +# Co Copyright +# -a Spanish feminine ordinal indicator +# << Left guillemet +# -- Soft hyphen +# Rg Registered trade mark +# PI Pilcrow (paragraph) +# -o Spanish masculine ordinal indicator +# >> Right guillemet +# ?I Inverted question mark +# '6 Left single quote +# '9 Right single quote +# .9 "Right" low quote +# 9' Reversed "right" quote +# "6 Left double quote +# "9 Right double quote +# :9 "Right" low double quote +# 9" Reversed "right" double quote +# /- Dagger +# /= Double dagger +# +# Mathematical +# DG Degree +# +- +/- +# 2S Superscript 2 +# 3S Superscript 3 +# My Micro +# .M Middle dot +# 1S Superscript 1 +# 14 Quarter +# 12 Half +# 34 Three quarters +# *X Multiplication +# -: Division +# +# Accents with no base character +# '> Circumflex (caret) +# '! Grave (backtick) +# ', Cedilla +# ': Diaeresis (Umlaut) +# 'm Macron +# '' Acute + +emulate -LR zsh +setopt cbases extendedglob printeightbit + +local accent basechar ochar error + +if [[ -n $WIDGET ]]; then + error=(zle -M) +else + error=print +fi + +if (( ${+zsh_accented_chars} == 0 )); then + # The associative array zsh_accent_chars is indexed by the + # accent. The values are sets of character / Unicode pairs for + # the character with the given accent. The Unicode value is + # a hex index with no base discriminator; essentially a UCS-4 index + # with the leading zeroes suppressed. + typeset -gA zsh_accented_chars + + # grave + accent=\! + zsh_accented_chars[$accent]="\ +A C0 E C8 I CC O D2 U D9 a E0 e E8 i EC o F2 u F9 N 1F8 n 1F9 \ +" + # acute + accent=\' + zsh_accented_chars[$accent]="\ +A C1 E C9 I CD O D3 U DA Y DD a E1 e E9 i EC o F3 u FA y FD C 106 c 107 \ +L 139 l 13A N 143 n 144 R 154 r 155 S 15A s 15B Z 179 z 17A \ +" + # circumflex + accent=\> + zsh_accented_chars[$accent]="\ +A C2 E CA I CE O D4 U DB a E2 e EA i EE o F4 u FB C 108 c 109 G 11C g 11d \ +H 124 h 125 J 134 j 135 S 15C s 15D W 174 w 175 Y 176 y 177 \ +" + # tilde + accent=\? + zsh_accented_chars[$accent]="\ +A C3 E CB N D1 O D5 a E3 n F1 o F5 I 128 i 129 U 168 u 169 \ +" + # macron (d-, D- give eth) + accent=- + zsh_accented_chars[$accent]="\ +A 100 a 101 d F0 D D0 E 112 e 113 I 12a i 12b O 14C o 14D U 16A u 16B \ +" + # breve + accent=\( + zsh_accented_chars[$accent]="\ +A 102 a 103 E 114 e 115 G 11E g 11F I 12C i 12D O 14E o 14F U 16C u 16D \ +" + # dot above, small i with no dot, or l with middle dot + accent=. + zsh_accented_chars[$accent]="\ +C 10A c 10b E 116 e 117 G 120 g 121 I 130 i 131 L 13F l 140 Z 17B z 17C \ +" + # diaeresis / Umlaut + accent=: + zsh_accented_chars[$accent]="\ +A C4 I CF O D6 U DC a E4 e EB i EF o F6 u FC y FF Y 178 \ +" + # cedilla + accent=, + zsh_accented_chars[$accent]="\ +C C7 c E7 G 122 g 123 K 136 k 137 L 13B l 13C N 145 n 146 R 156 r 157 \ +S 15E s 15F T 162 t 163 \ +" + # underline (_) would go here + # stroke through + accent=/ + zsh_accented_chars[$accent]="\ +O D8 o F8 D 110 d 111 H 126 h 127 L 141 l 142 T 166 t 167 b 180 \ +" + # double acute + accent=\" + zsh_accented_chars[$accent]="\ +O 150 o 151 U 170 u 171\ +" + # ogonek + accent=\; + zsh_accented_chars[$accent]="\ +A 104 a 105 E 118 e 119 I 12E i 12F U 172 u 173 \ +" + # caron + accent=\< + zsh_accented_chars[$accent]="\ +C 10C c 10D D 10E d 10F E 11A e 11B L 13D l 13E N 147 n 148 R 158 r 159 \ +S 160 s 161 T 164 t 165 Z 17D z 17E \ +" + # ring above + accent=0 + zsh_accented_chars[$accent]="\ +A C5 a E5 U 16E u 16F \ +" + # hook above + accent=2 + zsh_accented_chars[$accent]="\ +A 1EA2 a 1EA3 E 1EBA e 1EBA \ +" + # horn, also right quotation marks + accent=9 + zsh_accented_chars[$accent]="\ +O 1A0 o 1A1 U 1Af u 1b0 ' 2019 . 201A \" 201D : 201E \ +" + # left quotation marks + accent=6 + zsh_accented_chars[$accent]="\ +' 2018 \" 201C \ +" + # reversed quotation marks for convenience + accent=\' + zsh_accented_chars[$accent]+=" \ +9 201B \ +" + accent=\" + zsh_accented_chars[$accent]+=" \ +9 201F \ +" + + # ligature with E + accent=e + zsh_accented_chars[$accent]="\ +A C6 O 152 \ +" + # ligature with e + accent=e + zsh_accented_chars[$accent]="\ +a E6 o 153 \ +" + # ligature with J + accent=J + zsh_accented_chars[$accent]="\ +I 132 \ +" + # ligature with j + accent=j + zsh_accented_chars[$accent]="\ +i 133 \ +" + # eszett + accent=s + zsh_accented_chars[$accent]="\ +s DF \ +" + # upper case thorn + accent=H + zsh_accented_chars[$accent]="\ +T DE \ +" + # lower case thorn + accent=h + zsh_accented_chars[$accent]="\ +t FE \ +" + + # Remaining characters are handled as separate pairs. + # We need to remember that the assoc array is keyed by the second character. + # Left square bracket + accent=\( + zsh_accented_chars[$accent]+=" < 5B" + # Reverse solidus (backslash to you and me). + accent=/ + zsh_accented_chars[$accent]+=" / 5C" + # Right square bracket, circumflex + accent=\> + zsh_accented_chars[$accent]+=" ) 5D ' 5E" + # Grave accent + accent=\! + zsh_accented_chars[$accent]+=" ' 60" + # diglyphys for (usually) standard characters {, |, }, ~ + accent=\! + zsh_accented_chars[$accent]+=" ( 7B" + zsh_accented_chars[$accent]+=" ! 7C" + accent=\) + zsh_accented_chars[$accent]+=" ! 7D" + accent=\? + zsh_accented_chars[$accent]+=" ' 7E" + # non-breaking space + zsh_accented_chars[S]+=" N A0" + # inverted exclamation mark + zsh_accented_chars[I]+=" ! A1" + # cent + zsh_accented_chars[t]+=" C A2" + # pound sterling + zsh_accented_chars[d]+=" P A3" + # currency + zsh_accented_chars[u]+=" C A4" + # yen + zsh_accented_chars[e]+=" Y A5" + # broken bar + zsh_accented_chars[B]+=" B A6" + # section + zsh_accented_chars[E]+=" S A7" + # lonely diaeresis + zsh_accented_chars[:]+=" ' A8" + # copyright + zsh_accented_chars[o]+=" C A9" + # spanish feminine ordinal + zsh_accented_chars[a]+=" - AA" + # left guillemet + accent=\< + zsh_accented_chars[$accent]+=" < AB" + zsh_accented_chars[O]+=" N AC" + # soft hyphen + zsh_accented_chars[-]+=" - AD" + # registered + zsh_accented_chars[g]+=" R AE" + # lonely macron + zsh_accented_chars[m]+=" ' AF" + # degree + zsh_accented_chars[G]+=" D B0" + # +/- + zsh_accented_chars[-]+=" + B1" + # superscripts + zsh_accented_chars[S]+=" 2 B2 3 B3" + # lonely acute + accent=\' + zsh_accented_chars[$accent]+=" ' B4" + # micro + zsh_accented_chars[y]+=" M B5" + # pilcrow (paragraph) + zsh_accented_chars[I]+=" P B6" + # Middle dot + zsh_accented_chars[M]+=" . B7" + # Lonely cedilla + zsh_accented_chars[,]+=" ' B8" + # Superscript one + zsh_accented_chars[S]+=" 1 B9" + # spanish masculine ordinal + zsh_accented_chars[o]+=" - BA" + # right guillemet + accent=\> + zsh_accented_chars[$accent]+=" > BB" + # fractions + zsh_accented_chars[4]+=" 1 BC 3 BE" + zsh_accented_chars[2]+=" 1 BD" + # inverted question mark + zsh_accented_chars[I]+=" ? BF" + # multiplication + zsh_accented_chars[X]+=" * D7" + # division + zsh_accented_chars[:]+=" - F7" + # kra + zsh_accented_chars[k]+=" k 138" + # apostrophe n + zsh_accented_chars[n]+=" ' 149" + # Lappish ng + zsh_accented_chars[G]+=" N 14A" + zsh_accented_chars[g]+=" n 14B" + # OI + zsh_accented_chars[I]+=" O 1A2" + zsh_accented_chars[i]+=" o 1A3" + # yr + zsh_accented_chars[r]+=" y 1A6" + # ezh + zsh_accented_chars[D]+=" E 1B7" + # euro (I invented this but it's logical) + zsh_accented_chars[u]+=" E 20AC" + # dagger and double dagger + zsh_accented_chars[-]+=" / 2020" + zsh_accented_chars[=]+=" / 2021" +fi + +read -k basechar || return 1 +read -k accent || return 1 + +local -A charmap +charmap=(${=zsh_accented_chars[$accent]}) + +if [[ ${#charmap} -eq 0 || -z $charmap[$basechar] ]]; then + $error "Combination ${basechar}${accent} is not available." + return 1 +fi + +if [[ -z $WIDGET ]]; then + [[ -t 1 ]] && print + print "\U${(l.8..0.)charmap[$basechar]}" +else + ochar="$(print -n "\U${(l.8..0.)charmap[$basechar]}")" + + if (( ${+NUMERIC} )); then + $error "Character ${(l.8..0.)charmap[$basechar]}: $ochar" + else + LBUFFER+=$ochar + fi +fi diff --git a/Functions/Zle/insert-unicode-char b/Functions/Zle/insert-unicode-char deleted file mode 100644 index f8767fdd2..000000000 --- a/Functions/Zle/insert-unicode-char +++ /dev/null @@ -1,214 +0,0 @@ -# Accented characters. Inputs two keys: first the code for the accent, then -# the base character being accented. Note that all input characters are -# ASCII. For best results zsh should have been built with support for -# multibyte characters (--enable-multibyte). -# -# Outputs the character converted from Unicode into the local representation. -# (The conversion is done within the shell, using whatever facilities -# the C library provides.) -# -# When used as a zle widget, the character is inserted at the cursor -# position. With a numeric argument, preview in status line; outside zle, -# print character (and newline) to standard output. -# -# The set of accented characters is reasonably complete up to U+0180, the -# set of special characters less so. However, it mostly gives up at that -# point. Adding new Unicode characters is easy, however. Please send any -# additions to zsh-workers@sunsite.dk . -# -# Some of the accent codes are a little more obscure than others. -# Only the base character changes for upper case: A with circle is "o A". -# ` Grave -# ' Acute -# d Double acute -# ^ Circumflex -# ~ Tilde -# " Diaeresis (Umlaut) -# o Circle -# e Ligatures ending in e or E: e A gives AE, e o gives oe, etc. -# j Ligatures ending in j or J: ij or IJ -# c Cedilla -# / Stroke through character -# - Macron. (A horizonal bar over the letter.) -# u Breve. (A shallow dish shape over the letter.) -# . Dot above -# : Middle dot -# g Ogonek. (A little forward facing hook at the bottom right -# of the character. The "g" stands for "Ogonek" but another -# mnemonic is that g has a squiggle below the line.) -# v Caron. (A little v over the letter.) -# s s s = Eszett (lower case only) -# h Icelandic (or Runic) edh (h d) or thorn (h t) -# m Mathematical: not (m \), multiply (m *), divide (m /), degree (m o), -# +/- (m +), superscripts 1, 2, 3 (m 1 etc.), micro (m u), quarter (m q), -# half (m h), three quarters (m t) -# p Punctuation (and currency etc.): soft space (p _), inverted ! (p !), -# cent (p C), pound sign (p l) (think lira, librum), currency (p $), -# yen (p y), broken bar (p |), section (p s), lonely diaeresis (p "), -# copyright (p C), Spanish feminine marker (p f), left guillemet (p -# <), soft hyphen (p h), registered trade mark (p R), lonely macron (p -# -), lonely acute (p '), Pilcrow (paragraph) (p p), middle dot (p :), -# lonely cedilla (p c), Spanish masculine marker (p m), right -# guillemet (p >), inverted ? (p ?), Euro sign (p e). -# - -emulate -LR zsh -setopt cbases extendedglob printeightbit - -local accent basechar ochar error - -if [[ -n $WIDGET ]]; then - error=(zle -M) -else - error=print -fi - -if (( ${+zsh_accented_chars} == 0 )); then - # The associative array zsh_accent_chars is indexed by the - # accent. The values are sets of character / Unicode pairs for - # the character with the given accent. The Unicode value is - # a hex index with no base discriminator; essentially a UCS-4 index - # with the leading zeroes suppressed. - typeset -gA zsh_accented_chars - - # grave - accent=\` - zsh_accented_chars[$accent]="\ -A C0 E C8 I CC O D2 U D9 a E0 e E8 i EC o F2 u F9 N 1F8 n 1F9 \ -" - # acute - accent=\' - zsh_accented_chars[$accent]="\ -A C1 E C9 I CD O D3 U DA Y DD a E1 e E9 i EC o F3 u FA y FD C 106 c 107 \ -L 139 l 13A N 143 n 144 R 154 r 155 S 15A s 15B Z 179 z 17A \ -" - # double acute - accent=d - zsh_accented_chars[$accent]="\ -O 150 o 151 U 170 u 171\ -" - # circumflex - accent=\^ - zsh_accented_chars[$accent]="\ -A C2 E CA I CE O D4 U DB a E2 e EA i EE o F4 u FB C 108 c 109 G 11C g 11d \ -H 124 h 125 J 134 j 135 S 15C s 15D W 174 w 175 Y 176 y 177 \ -" - # tilde - accent=\~ - zsh_accented_chars[$accent]="\ -A C3 E CB N D1 O D5 a E3 n F1 o F5 I 128 i 129 U 168 u 169 \ -" - # diaeresis / Umlaut - accent=\" - zsh_accented_chars[$accent]="\ -A C4 I CF O D6 U DC a E4 e EB i EF o F6 u FC y FF Y 178 \ -" - # ring above - accent=o - zsh_accented_chars[$accent]="\ -A C5 a E5 U 16E u 16F \ -" - # ligature with e or E - accent=e - zsh_accented_chars[$accent]="\ -A C6 a E6 O 152 o 153 \ -" - # ligature with j or J - accent=j - zsh_accented_chars[$accent]="\ -I 132 i 133\ -" - # cedilla - accent=c - zsh_accented_chars[$accent]="\ -C C7 c E7 G 122 g 123 K 136 k 137 L 13B l 13C N 145 n 146 R 156 r 157 \ -S 15E s 15F T 162 t 163 \ -" - # stroke through - accent=/ - zsh_accented_chars[$accent]="\ -O D8 o F8 D 110 d 111 H 126 h 127 L 141 l 142 T 166 t 167 b 180 \ -" - # macron - accent=- - zsh_accented_chars[$accent]="\ -A 100 a 101 E 112 e 113 I 12a i 12b O 14C o 14D U 16A u 16B \ -" - # breve - accent=u - zsh_accented_chars[$accent]="\ -A 102 a 103 E 114 e 115 G 11E g 11F I 12C i 12D O 14E o 14F U 16C u 16D \ -" - # dot above - accent=. - zsh_accented_chars[$accent]="\ -C 10A c 10b E 116 e 117 G 120 g 121 I 130 i 131 Z 17B z 17C \ -" - # middle dot - accent=: - zsh_accented_chars[$accent]="\ -L 13F l 140 \ -" - # ogonek - accent=g - zsh_accented_chars[$accent]="\ -A 104 a 105 E 118 e 119 I 12E i 12F U 172 u 173 \ -" - # caron - accent=v - zsh_accented_chars[$accent]="\ -C 10C c 10D D 10E d 10F E 11A e 11B L 13D l 13E N 147 n 148 R 158 r 159 \ -S 160 s 161 T 164 t 165 Z 17D z 17E \ -" - # eszett - accent=s - zsh_accented_chars[$accent]="\ -s DF \ -" - # edh or thorn - accent=h - zsh_accented_chars[$accent]="\ -D D0 d F0 t FE \ -" - # mathematical - accent=m - zsh_accented_chars[$accent]="\ -\\ AC o B0 * D7 / F7 + B1 2 B2 3 B3 u B5 1 B9 q BC h BD t BE\ -" - # punctuation and currency - accent=p - zsh_accented_chars[$accent]="\ -_ A0 ! A1 C A2 l A3 $ A4 y A5 | A6 s A7 \" A8 C A9 f AA < AB \ -h AD R AE - AF ' B4 p B6 : B7 c B8 m BA > BB ? BF e 20AC \ -" -fi - -read -k accent || return 1 - -if [[ -z $zsh_accented_chars[$accent] ]]; then - $error "No accented characters with accent: $accent" - return 1 -fi - -local -A charmap -charmap=(${=zsh_accented_chars[$accent]}) - -read -k basechar - -if [[ -z $charmap[$basechar] ]]; then - $error "Accent $accent not available with character $basechar" - return 1 -fi - -if [[ -z $WIDGET ]]; then - [[ -t 1 ]] && print - print "\U${(l.8..0.)charmap[$basechar]}" -else - ochar="$(print -n "\U${(l.8..0.)charmap[$basechar]}")" - - if (( ${+NUMERIC} )); then - $error "Character ${(l.8..0.)charmap[$basechar]}: $ochar" - else - LBUFFER+=$ochar - fi -fi -- cgit 1.4.1