about summary refs log tree commit diff
path: root/iconvdata/utf-7.c
diff options
context:
space:
mode:
authorMax Gautier <mg@max.gautier.name>2022-03-21 09:25:05 -0300
committerAdhemerval Zanella <adhemerval.zanella@linaro.org>2022-03-21 13:23:57 -0300
commit9df157b4ed52108495de9f8bc77ef922034e3b66 (patch)
tree5854a8896b112f8cc138357380ae98b2b295203c /iconvdata/utf-7.c
parentef7b963280f0b110e589cbe29cdf31d88fb171f0 (diff)
downloadglibc-9df157b4ed52108495de9f8bc77ef922034e3b66.tar.gz
glibc-9df157b4ed52108495de9f8bc77ef922034e3b66.tar.xz
glibc-9df157b4ed52108495de9f8bc77ef922034e3b66.zip
iconv: Add UTF-7-IMAP variant in utf-7.c
UTF-7-IMAP differs from UTF-7 in the followings ways (see RFC 3501[1]
for reference) :

- The shift character is '&' instead of '+'
- There is no "optional direct characters" and the "direct characters"
  set is different
- There is no implicit shift back to US-ASCII from BASE64, all BASE64
  sequences MUST be terminated with '-'

[1]: https://datatracker.ietf.org/doc/html/rfc3501#section-5.1.3

Signed-off-by: Max Gautier <mg@max.gautier.name>
Reviewed-by: Adhemerval Zanella  <adhemerval.zanella@linaro.org>
Diffstat (limited to 'iconvdata/utf-7.c')
-rw-r--r--iconvdata/utf-7.c30
1 files changed, 24 insertions, 6 deletions
diff --git a/iconvdata/utf-7.c b/iconvdata/utf-7.c
index 393fd3583f..1b1f68e1f2 100644
--- a/iconvdata/utf-7.c
+++ b/iconvdata/utf-7.c
@@ -33,11 +33,13 @@
 enum variant
 {
   UTF7,
+  UTF_7_IMAP
 };
 
 /* Must be in the same order as enum variant above.  */
 static const char names[] =
   "UTF-7//\0"
+  "UTF-7-IMAP//\0"
   "\0";
 
 static uint32_t
@@ -45,6 +47,8 @@ shift_character (enum variant const var)
 {
   if (var == UTF7)
     return '+';
+  else if (var == UTF_7_IMAP)
+    return '&';
   else
     abort ();
 }
@@ -58,6 +62,9 @@ between (uint32_t const ch,
 
 /* The set of "direct characters":
    A-Z a-z 0-9 ' ( ) , - . / : ? space tab lf cr
+   FOR UTF-7-IMAP
+   A-Z a-z 0-9 ' ( ) , - . / : ? space
+   ! " # $ % + * ; < = > @ [ \ ] ^ _ ` { | } ~
 */
 
 static bool
@@ -71,6 +78,8 @@ isdirect (uint32_t ch, enum variant var)
 	    || between (ch, ',', '/')
 	    || ch == ':' || ch == '?'
 	    || ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r');
+  else if (var == UTF_7_IMAP)
+    return (ch != '&' && between (ch, ' ', '~'));
   abort ();
 }
 
@@ -124,6 +133,8 @@ base64 (unsigned int i, enum variant var)
     return '+';
   else if (i == 63 && var == UTF7)
     return '/';
+  else if (i == 63 && var == UTF_7_IMAP)
+    return ',';
   else
     abort ();
 }
@@ -308,7 +319,8 @@ gconv_end (struct __gconv_step *data)
 	  i = ch - '0' + 52;						      \
 	else if (ch == '+')						      \
 	  i = 62;							      \
-	else if (ch == '/')						      \
+	else if ((var == UTF7 && ch == '/')                                   \
+		  || (var == UTF_7_IMAP && ch == ','))			      \
 	  i = 63;							      \
 	else								      \
 	  {								      \
@@ -316,8 +328,10 @@ gconv_end (struct __gconv_step *data)
 									      \
 	    /* If accumulated data is nonzero, the input is invalid.  */      \
 	    /* Also, partial UTF-16 characters are invalid.  */		      \
-	    if (__builtin_expect (statep->__value.__wch != 0, 0)	      \
-		|| __builtin_expect ((statep->__count >> 3) <= 26, 0))	      \
+	    /* In IMAP variant, must be terminated by '-'.  */		      \
+	    if (__glibc_unlikely (statep->__value.__wch != 0)		      \
+		|| __glibc_unlikely ((statep->__count >> 3) <= 26)	      \
+		|| __glibc_unlikely (var == UTF_7_IMAP && ch != '-'))	      \
 	      {								      \
 		STANDARD_FROM_LOOP_ERR_HANDLER ((statep->__count = 0, 1));    \
 	      }								      \
@@ -474,13 +488,15 @@ gconv_end (struct __gconv_step *data)
     else								      \
       {									      \
 	/* base64 encoding active */					      \
-	if (isdirect (ch, var))						      \
+	if ((var == UTF_7_IMAP && ch == '&') || isdirect (ch, var))	      \
 	  {								      \
 	    /* deactivate base64 encoding */				      \
 	    size_t count;						      \
 									      \
 	    count = ((statep->__count & 0x18) >= 0x10)			      \
-	      + needs_explicit_shift (ch) + 1;				      \
+	      + (var == UTF_7_IMAP || needs_explicit_shift (ch))	      \
+	      + (var == UTF_7_IMAP && ch == '&')			      \
+	      + 1;							      \
 	    if (__glibc_unlikely (outptr + count > outend))		      \
 	      {								      \
 		result = __GCONV_FULL_OUTPUT;				      \
@@ -489,9 +505,11 @@ gconv_end (struct __gconv_step *data)
 									      \
 	    if ((statep->__count & 0x18) >= 0x10)			      \
 	      *outptr++ = base64 ((statep->__count >> 3) & ~3, var);	      \
-	    if (needs_explicit_shift (ch))				      \
+	    if (var == UTF_7_IMAP || needs_explicit_shift (ch))		      \
 	      *outptr++ = '-';						      \
 	    *outptr++ = (unsigned char) ch;				      \
+	    if (var == UTF_7_IMAP && ch == '&')				      \
+	      *outptr++ = '-';						      \
 	    statep->__count = 0;					      \
 	  }								      \
 	else								      \