about summary refs log tree commit diff
path: root/locale/programs/linereader.c
diff options
context:
space:
mode:
Diffstat (limited to 'locale/programs/linereader.c')
-rw-r--r--locale/programs/linereader.c579
1 files changed, 579 insertions, 0 deletions
diff --git a/locale/programs/linereader.c b/locale/programs/linereader.c
new file mode 100644
index 0000000000..e4a1305712
--- /dev/null
+++ b/locale/programs/linereader.c
@@ -0,0 +1,579 @@
+/* Copyright (C) 1996 Free Software Foundation, Inc.
+This file is part of the GNU C Library.
+Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>.
+
+The GNU C Library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Library General Public License as
+published by the Free Software Foundation; either version 2 of the
+License, or (at your option) any later version.
+
+The GNU C Library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Library General Public License for more details.
+
+You should have received a copy of the GNU Library General Public
+License along with the GNU C Library; see the file COPYING.LIB.  If
+not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA.  */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <ctype.h>
+#include <errno.h>
+#include <libintl.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "error.h"
+#include "linereader.h"
+#include "charset.h"
+#include "stringtrans.h"
+
+
+void *xmalloc (size_t __n);
+void *xrealloc (void *__p, size_t __n);
+char *xstrdup (const char *__str);
+
+
+static struct token *get_toplvl_escape (struct linereader *lr);
+static struct token *get_symname (struct linereader *lr);
+static struct token *get_ident (struct linereader *lr);
+static struct token *get_string (struct linereader *lr,
+				 const struct charset_t *charset);
+
+
+struct linereader *
+lr_open (const char *fname, kw_hash_fct_t hf)
+{
+  FILE *fp;
+  struct linereader *result;
+  int n;
+
+  if (fname == NULL || strcmp (fname, "-") == 0
+      || strcmp (fname, "/dev/stdin") == 0)
+    fp = stdin;
+  else
+    {
+      fp = fopen (fname, "r");
+      if (fp == NULL)
+	return NULL;
+    }
+
+  result = (struct linereader *) xmalloc (sizeof (*result));
+
+  result->fp = fp;
+  result->fname = xstrdup (fname);
+  result->buf = NULL;
+  result->bufsize = 0;
+  result->lineno = 1;
+  result->idx = 0;
+  result->comment_char = '#';
+  result->escape_char = '\\';
+  result->translate_strings = 1;
+
+  n = getdelim (&result->buf, &result->bufsize, '\n', result->fp);
+  if (n < 0)
+    {
+      int save = errno;
+      fclose (result->fp);
+      free (result);
+      errno = save;
+      return NULL;
+    }
+
+  if (n > 1 && result->buf[n - 2] == '\\' && result->buf[n - 1] == '\n')
+    n -= 2;
+
+  result->buf[n] = '\0';
+  result->bufact = n;
+  result->hash_fct = hf;
+
+  return result;
+}
+
+
+int
+lr_eof (struct linereader *lr)
+{
+  return lr->bufact = 0;
+}
+
+
+void
+lr_close (struct linereader *lr)
+{
+  fclose (lr->fp);
+  free (lr->buf);
+  free (lr);
+}
+
+
+int
+lr_next (struct linereader *lr)
+{
+  int n;
+
+  n = getdelim (&lr->buf, &lr->bufsize, '\n', lr->fp);
+  if (n < 0)
+    return -1;
+
+  ++lr->lineno;
+
+  if (n > 1 && lr->buf[n - 2] == lr->escape_char && lr->buf[n - 1] == '\n')
+    {
+      /* An escaped newline character is substituted with a single <SP>.  */
+      --n;
+      lr->buf[n - 1] = ' ';
+    }
+
+  lr->buf[n] = '\0';
+  lr->bufact = n;
+  lr->idx = 0;
+
+  return 0;
+}
+
+
+/* Defined in error.c.  */
+/* This variable is incremented each time `error' is called.  */
+extern unsigned int error_message_count;
+
+/* The calling program should define program_name and set it to the
+   name of the executing program.  */
+extern char *program_name;
+
+
+struct token *
+lr_token (struct linereader *lr, const struct charset_t *charset)
+{
+  int ch;
+
+  while (1)
+    {
+      do
+	{
+	  ch = lr_getc (lr);
+
+	  if (ch == '\n')
+	    {
+	      lr->token.tok = tok_eol;
+	      return &lr->token;
+	    }
+	}
+      while (isspace (ch));
+
+      if (ch == EOF)
+	{
+	  lr->token.tok = tok_eof;
+	  return &lr->token;
+	};
+
+      if (ch != lr->comment_char)
+	break;
+
+      /* Ignore rest of line.  */
+      lr_ignore_rest (lr, 0);
+      lr->token.tok = tok_eol;
+      return &lr->token;
+    }
+
+  /* Match escape sequences.  */
+  if (ch == lr->escape_char)
+    return get_toplvl_escape (lr);
+
+  /* Match ellipsis.  */
+  if (ch == '.' && strncmp (&lr->buf[lr->idx], "..", 2) == 0)
+    {
+      lr_getc (lr);
+      lr_getc (lr);
+      lr->token.tok = tok_ellipsis;
+      return &lr->token;
+    }
+
+  switch (ch)
+    {
+    case '<':
+      return get_symname (lr);
+
+    case '0' ... '9':
+      lr->token.tok = tok_number;
+      lr->token.val.num = ch - '0';
+
+      while (isdigit (ch = lr_getc (lr)))
+	{
+	  lr->token.val.num *= 10;
+	  lr->token.val.num += ch - '0';
+	}
+      if (isalpha (ch))
+	lr_error (lr, _("garbage at end of digit"));
+      lr_ungetn (lr, 1);
+
+      return &lr->token;
+
+    case ';':
+      lr->token.tok = tok_semicolon;
+      return &lr->token;
+
+    case ',':
+      lr->token.tok = tok_comma;
+      return &lr->token;
+
+    case '(':
+      lr->token.tok = tok_open_brace;
+      return &lr->token;
+
+    case ')':
+      lr->token.tok = tok_close_brace;
+      return &lr->token;
+
+    case '"':
+      return get_string (lr, charset);
+
+    case '-':
+      ch = lr_getc (lr);
+      if (ch == '1')
+	{
+	  lr->token.tok = tok_minus1;
+	  return &lr->token;
+	}
+      lr_ungetn (lr, 2);
+      break;
+    }
+
+  return get_ident (lr);
+}
+
+
+static struct token *
+get_toplvl_escape (struct linereader *lr)
+{
+  /* This is supposed to be a numeric value.  We return the
+     numerical value and the number of bytes.  */
+  size_t start_idx = lr->idx - 1;
+  unsigned int value = 0;
+  int nbytes = 0;
+  int ch;
+
+  do
+    {
+      unsigned int byte = 0;
+      unsigned int base = 8;
+
+      ch = lr_getc (lr);
+
+      if (ch == 'd')
+	{
+	  base = 10;
+	  ch = lr_getc (lr);
+	}
+      else if (ch == 'x')
+	{
+	  base = 16;
+	  ch = lr_getc (lr);
+	}
+
+      if ((base == 16 && !isxdigit (ch))
+	  || (base != 16 && (ch < '0' || ch >= '0' + base)))
+	{
+	esc_error:
+	  lr->token.val.str.start = &lr->buf[start_idx];
+
+	  while (ch != EOF || !isspace (ch))
+	    ch = lr_getc (lr);
+	  lr->token.val.str.len = lr->idx - start_idx;
+
+	  lr->token.tok = tok_error;
+	  return &lr->token;
+	}
+
+      if (isdigit (ch))
+	byte = ch - '0';
+      else
+	byte = tolower (ch) - 'a' + 10;
+
+      ch = lr_getc (lr);
+      if ((base == 16 && !isxdigit (ch))
+	  || (base != 16 && (ch < '0' || ch >= '0' + base)))
+	goto esc_error;
+
+      byte *= base;
+      if (isdigit (ch))
+	byte += ch - '0';
+      else
+	byte += tolower (ch) - 'a' + 10;
+
+      ch = lr_getc (lr);
+      if (base != 16 && isdigit (ch))
+	{
+	  byte *= base;
+	  base += ch - '0';
+
+	  ch = lr_getc (lr);
+	}
+
+      value *= 256;
+      value += byte;
+
+      ++nbytes;
+    }
+  while (ch == lr->escape_char && nbytes < 4);
+
+  if (!isspace (ch))
+    lr_error (lr, _("garbage at end of character code specification"));
+
+  lr_ungetn (lr, 1);
+
+  lr->token.tok = tok_charcode;
+  lr->token.val.charcode.val = value;
+  lr->token.val.charcode.nbytes = nbytes;
+
+  return &lr->token;
+}
+
+
+#define ADDC(ch)							    \
+  do									    \
+    {									    \
+      if (bufact == bufmax)						    \
+	{								    \
+	  bufmax *= 2;							    \
+	  buf = xrealloc (buf, bufmax);					    \
+	}								    \
+      buf[bufact++] = (ch);						    \
+    }									    \
+  while (0)
+
+
+static struct token *
+get_symname (struct linereader *lr)
+{
+  /* Symbol in brackets.  We must distinguish three kinds:
+     1. reserved words
+     2. ISO 10646 position values
+     3. all other.  */
+  char *buf;
+  size_t bufact = 0;
+  size_t bufmax = 56;
+  const struct keyword_t *kw;
+  int ch;
+
+  buf = (char *) xmalloc (bufmax);
+
+  do
+    {
+      ch = lr_getc (lr);
+      if (ch == lr->escape_char)
+	{
+	  int c2 = lr_getc (lr);
+	  ADDC (c2);
+
+	  if (c2 == '\n')
+	    ch = '\n';
+	}
+      else
+	ADDC (ch);
+    }
+  while (ch != '>' && ch != '\n');
+
+  if (ch == '\n')
+    lr_error (lr, _("unterminated symbolic name"));
+
+  /* Test for ISO 10646 position value.  */
+  if (buf[0] == 'U' && (bufact == 6 || bufact == 10))
+    {
+      char *cp = buf + 1;
+      while (cp < &buf[bufact - 1] && isxdigit (*cp))
+	++cp;
+
+      if (cp == &buf[bufact - 1])
+	{
+	  /* Yes, it is.  */
+	  lr->token.tok = bufact == 6 ? tok_ucs2 : tok_ucs4;
+	  lr->token.val.charcode.val = strtoul (buf, NULL, 16);
+	  lr->token.val.charcode.nbytes = lr->token.tok == tok_ucs2 ? 2 : 4;
+
+	  return &lr->token;
+	}
+    }
+
+  /* It is a symbolic name.  Test for reserved words.  */
+  kw = lr->hash_fct (buf, bufact - 1);
+
+  if (kw != NULL && kw->symname_or_ident == 1)
+    {
+      lr->token.tok = kw->token;
+      free (buf);
+    }
+  else
+    {
+      lr->token.tok = tok_bsymbol;
+
+      buf[bufact] = '\0';
+      buf = xrealloc (buf, bufact + 1);
+
+      lr->token.val.str.start = buf;
+      lr->token.val.str.len = bufact - 1;
+    }
+
+  return &lr->token;
+}
+
+
+static struct token *
+get_ident (struct linereader *lr)
+{
+  char *buf;
+  size_t bufact;
+  size_t bufmax = 56;
+  const struct keyword_t *kw;
+  int ch;
+
+  buf = xmalloc (bufmax);
+  bufact = 0;
+
+  ADDC (lr->buf[lr->idx - 1]);
+
+  while (!isspace ((ch = lr_getc (lr))) && ch != '"' && ch != ';'
+	 && ch != '<' && ch != ',')
+    /* XXX Handle escape sequences?  */
+    ADDC (ch);
+
+  lr_ungetn (lr, 1);
+
+  kw = lr->hash_fct (buf, bufact);
+
+  if (kw != NULL && kw->symname_or_ident == 0)
+    {
+      lr->token.tok = kw->token;
+      free (buf);
+    }
+  else
+    {
+      lr->token.tok = tok_ident;
+
+      buf[bufact] = '\0';
+      buf = xrealloc (buf, bufact + 1);
+
+      lr->token.val.str.start = buf;
+      lr->token.val.str.len = bufact;
+    }
+
+  return &lr->token;
+}
+
+
+static struct token *
+get_string (struct linereader *lr, const struct charset_t *charset)
+{
+  int illegal_string = 0;
+  char *buf, *cp;
+  size_t bufact;
+  size_t bufmax = 56;
+  int ch;
+
+  buf = xmalloc (bufmax);
+  bufact = 0;
+
+  while ((ch = lr_getc (lr)) != '"' && ch != '\n' && ch != EOF)
+    if (ch != '<' || charset == NULL)
+      {
+	if (ch == lr->escape_char)
+	  {
+	    ch = lr_getc (lr);
+	    if (ch == '\n' || ch == EOF)
+	      break;
+	  }
+	ADDC (ch);
+      }
+    else
+      {
+	/* We have to get the value of the symbol.  */
+	unsigned int value;
+	size_t startidx = bufact;
+
+	if (!lr->translate_strings)
+	  ADDC ('<');
+
+	while ((ch = lr_getc (lr)) != '>' && ch != '\n' && ch != EOF)
+	  {
+	    if (ch == lr->escape_char)
+	      {
+		ch = lr_getc (lr);
+		if (ch == '\n' || ch == EOF)
+		  break;
+	      }
+	    ADDC (ch);
+	  }
+
+	if (ch == '\n' || ch == EOF)
+	  lr_error (lr, _("unterminated string"));
+	else
+	  if (!lr->translate_strings)
+	    ADDC ('>');
+
+	if (lr->translate_strings)
+	  {
+	    value = charset_find_value (charset, &buf[startidx],
+					bufact - startidx);
+	    if (value == ILLEGAL_CHAR_VALUE)
+	      illegal_string = 1;
+	    bufact = startidx;
+
+	    if (bufmax - bufact < 8)
+	      {
+		bufmax *= 2;
+		buf = (char *) xrealloc (buf, bufmax);
+	      }
+
+	    cp = &buf[bufact];
+	    if (encode_char (value, &cp))
+	      illegal_string = 1;
+
+	    bufact = cp - buf;
+	  }
+      }
+
+  /* Catch errors with trailing escape character.  */
+  if (bufact > 0 && buf[bufact - 1] == lr->escape_char
+      && (bufact == 1 || buf[bufact - 2] != lr->escape_char))
+    {
+      lr_error (lr, _("illegal escape sequence at end of string"));
+      --bufact;
+    }
+  else if (ch == '\n' || ch == EOF)
+    lr_error (lr, _("unterminated string"));
+
+  /* Terminate string if necessary.  */
+  if (lr->translate_strings)
+    {
+      cp = &buf[bufact];
+      if (encode_char (0, &cp))
+	illegal_string = 1;
+
+      bufact = cp - buf;
+    }
+  else
+    ADDC ('\0');
+
+  lr->token.tok = tok_string;
+
+  if (illegal_string)
+    {
+      free (buf);
+      lr->token.val.str.start = NULL;
+      lr->token.val.str.len = 0;
+    }
+  else
+    {
+      buf = xrealloc (buf, bufact + 1);
+
+      lr->token.val.str.start = buf;
+      lr->token.val.str.len = bufact;
+    }
+
+  return &lr->token;
+}