about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--catgets/gencat.c65
1 files changed, 52 insertions, 13 deletions
diff --git a/catgets/gencat.c b/catgets/gencat.c
index 0200ca44ef..49b63363ba 100644
--- a/catgets/gencat.c
+++ b/catgets/gencat.c
@@ -152,10 +152,11 @@ static void write_out (struct catalog *result, const char *output_name,
 		       const char *header_name);
 static struct set_list *find_set (struct catalog *current, int number);
 static void normalize_line (const char *fname, size_t line, iconv_t cd,
-			    wchar_t *string, wchar_t quote_char);
+			    wchar_t *string, wchar_t quote_char,
+			    wchar_t escape_char);
 static void read_old (struct catalog *catalog, const char *file_name);
 static int open_conversion (const char *codesetp, iconv_t *cd_towcp,
-			    iconv_t *cd_tombp);
+			    iconv_t *cd_tombp, wchar_t *escape_charp);
 
 
 int
@@ -272,6 +273,7 @@ read_input_file (struct catalog *current, const char *fname)
   size_t wbufsize;
   iconv_t cd_towc = (iconv_t) -1;
   iconv_t cd_tomb = (iconv_t) -1;
+  wchar_t escape_char = L'\\';
   char *codeset = NULL;
 
   if (strcmp (fname, "-") == 0 || strcmp (fname, "/dev/stdin") == 0)
@@ -522,7 +524,8 @@ this is the first definition"));
 
 	      /* We need the conversion.  */
 	      if (cd_towc == (iconv_t) -1
-		  && open_conversion (codeset, &cd_towc, &cd_tomb) != 0)
+		  && open_conversion (codeset, &cd_towc, &cd_tomb,
+				      &escape_char) != 0)
 		/* Something is wrong.  */
 		goto out;
 
@@ -648,7 +651,8 @@ duplicated message identifier"));
 
 	      /* We need the conversion.  */
 	      if (cd_towc == (iconv_t) -1
-		  && open_conversion (codeset, &cd_towc, &cd_tomb) != 0)
+		  && open_conversion (codeset, &cd_towc, &cd_tomb,
+				      &escape_char) != 0)
 		/* Something is wrong.  */
 		goto out;
 
@@ -694,7 +698,7 @@ invalid character: message ignored"));
 	      /* Strip quote characters, change escape sequences into
 		 correct characters etc.  */
 	      normalize_line (fname, start_line, cd_towc, wbuf,
-			      current->quote_char);
+			      current->quote_char, escape_char);
 
 	      /* Now the string is free of escape sequences.  Convert it
 		 back into a multibyte character string.  First free the
@@ -1053,7 +1057,7 @@ find_set (struct catalog *current, int number)
    and quote characters.  */
 static void
 normalize_line (const char *fname, size_t line, iconv_t cd, wchar_t *string,
-		wchar_t quote_char)
+		wchar_t quote_char, wchar_t escape_char)
 {
   int is_quoted;
   wchar_t *rp = string;
@@ -1072,7 +1076,7 @@ normalize_line (const char *fname, size_t line, iconv_t cd, wchar_t *string,
       /* We simply end the string when we find the first time an
 	 not-escaped quote character.  */
 	break;
-    else if (*rp == L'\\')
+    else if (*rp == escape_char)
       {
 	++rp;
 	if (quote_char != L'\0' && *rp == quote_char)
@@ -1106,10 +1110,6 @@ normalize_line (const char *fname, size_t line, iconv_t cd, wchar_t *string,
 	      *wp++ = L'\f';
 	      ++rp;
 	      break;
-	    case L'\\':
-	      *wp++ = L'\\';
-	      ++rp;
-	      break;
 	    case L'0' ... L'7':
 	      {
 		int number;
@@ -1147,7 +1147,13 @@ normalize_line (const char *fname, size_t line, iconv_t cd, wchar_t *string,
 	      }
 	      break;
 	    default:
-	      /* Simply ignore the backslash character.  */
+	      if (*rp == escape_char)
+		{
+		  *wp++ = escape_char;
+		  ++rp;
+		}
+	      else
+		/* Simply ignore the backslash character.  */;
 	      break;
 	    }
       }
@@ -1255,8 +1261,16 @@ read_old (struct catalog *catalog, const char *file_name)
 
 
 static int
-open_conversion (const char *codeset, iconv_t *cd_towcp, iconv_t *cd_tombp)
+open_conversion (const char *codeset, iconv_t *cd_towcp, iconv_t *cd_tombp,
+		 wchar_t *escape_charp)
 {
+  char buf[2];
+  char *bufptr;
+  size_t bufsize;
+  wchar_t wbuf[2];
+  char *wbufptr;
+  size_t wbufsize;
+
   /* If the input file does not specify the codeset use the locale's.  */
   if (codeset == NULL)
     {
@@ -1277,5 +1291,30 @@ open_conversion (const char *codeset, iconv_t *cd_towcp, iconv_t *cd_tombp)
       return 1;
     }
 
+  /* One special case for historical reasons is the backslash
+     character.  In some codesets the byte value 0x5c is not mapped to
+     U005c in Unicode.  These charsets then don't have a backslash
+     character at all.  Therefore we have to live with whatever the
+     codeset provides and recognize, instead of the U005c, the character
+     the byte value 0x5c is mapped to.  */
+  buf[0] = '\\';
+  buf[1] = '\0';
+  bufptr = buf;
+  bufsize = 2;
+
+  wbufptr = (char *) wbuf;
+  wbufsize = sizeof (wbuf);
+
+  iconv (*cd_towcp, &bufptr, &bufsize, &wbufptr, &wbufsize);
+  if (bufsize != 0 || wbufsize != 0)
+    {
+      /* Something went wrong, we couldn't convert the byte 0x5c.  Go
+	 on with using U005c.  */
+      error (0, 0, gettext ("cannot determine escape character"));
+      *escape_charp = L'\\';
+    }
+  else
+    *escape_charp = wbuf[0];
+
   return 0;
 }