about summary refs log tree commit diff
path: root/locale/locfile-lex.c
diff options
context:
space:
mode:
Diffstat (limited to 'locale/locfile-lex.c')
-rw-r--r--locale/locfile-lex.c533
1 files changed, 0 insertions, 533 deletions
diff --git a/locale/locfile-lex.c b/locale/locfile-lex.c
deleted file mode 100644
index 20e4f0f9cd..0000000000
--- a/locale/locfile-lex.c
+++ /dev/null
@@ -1,533 +0,0 @@
-/* Copyright (C) 1995 Free Software Foundation, Inc.
-
-The GNU C Library is free software; you can redistribute it and/or
-modify it under the terms of the GNU Library General Public License as
-published by the Free Software Foundation; either version 2 of the
-License, or (at your option) any later version.
-
-The GNU C Library is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-Library General Public License for more details.
-
-You should have received a copy of the GNU Library General Public
-License along with the GNU C Library; see the file COPYING.LIB.  If
-not, write to the Free Software Foundation, Inc., 675 Mass Ave,
-Cambridge, MA 02139, USA.  */
-
-#include <ctype.h>
-#include <langinfo.h>
-#include <libintl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-
-#include "localedef.h"
-#include "token.h"
-
-
-/* Include the hashing table for the keywords.  */
-const struct locale_keyword* in_word_set (register const char *str,
-                                          register int len);
-#include "keyword.h"
-
-
-/* Contains the status of reading the locale definition file.  */
-struct locfile_data locfile_data;
-
-/* This is a flag used while collation input.  This is the only place
-   where element names beside the ones defined in the character map are
-   allowed.  There we must not give error messages.  */
-int reject_new_char = 1;
-
-/* Prototypes for local functions.  */
-static int get_char (void);
-
-
-#define LD locfile_data
-
-/* Opens the locale definition file and initializes the status data structure
-   for following calls of `locfile_lex'.  */
-void
-locfile_open (const char *fname)
-{
-  if (fname == NULL)
-    /* We read from stdin.  */
-    LD.filename = "<stdin>";
-  else
-    {
-      if (freopen (fname, "r", stdin) == NULL)
-	error (4, 0, gettext ("input file `%s' not found"), fname);
-      LD.filename = fname;
-    }
-
-  /* Set default values.  */
-  LD.escape_char = '\\';
-  LD.comment_char = '#';
-
-  LD.bufsize = sysconf (_SC_LINE_MAX);
-  LD.buf = (char *) xmalloc (LD.bufsize);
-  LD.strbuf = (char *) xmalloc (LD.bufsize);
-
-  LD.buf_ptr = LD.returned_tokens = LD.line_no = 0;
-
-  /* Now sign that we want immediately read a line.  */
-  LD.continue_line = 1;
-  LD.buf[LD.buf_ptr] = '\0';
-}
-
-
-int
-xlocfile_lex (char **token, int *token_len)
-{
-  int retval = locfile_lex (token, token_len);
-
-  if (retval == 0)
-    /* I.e. end of file.  */
-    error (4, 0, gettext ("%s: unexpected end of file in locale defintion "
-			  "file"), locfile_data.filename);
-
-  return retval;
-}
-
-int
-locfile_lex (char **token, int *token_len)
-{
-  int start_again;
-  int retval = 0;
-
-  do
-    {
-      int start_ptr;
-
-      start_again = 0;
-
-      /* Read the next line.  Skip over empty lines and comments.  */
-      if ((LD.buf[LD.buf_ptr] == '\0' && LD.continue_line != 0)
-	  || LD.buf_ptr >= LD.bufsize
-	  || (posix_conformance == 0 && LD.buf[LD.buf_ptr] == LD.comment_char))
-	do
-	  {
-	    size_t linelen;
-
-	    LD.buf_ptr = 0;
-
-	    if (fgets (LD.buf, LD.bufsize, stdin) == NULL)
-	      {
-		/* This makes subsequent calls also return EOF.  */
-		LD.buf[0] = '\0';
-		return 0;
-	      }
-
-	    /* Increment line number counter.  */
-	    ++LD.line_no;
-
-	    /* We now have to look whether this line is continued and
-	       whether it at all fits into our buffer.  */
-	    linelen = strlen (LD.buf);
-
-	    if (linelen == LD.bufsize - 1)
-	      /* The did not fit into the buffer.  */
-	      error (2, 0, gettext ("%s:%Zd: line too long;  use "
-				    "`getconf LINE_MAX' to get the maximum "
-				    "line length"), LD.filename, LD.line_no);
-
-	    /* Remove '\n' at end of line.  */
-	    if (LD.buf[linelen - 1] == '\n')
-	      LD.buf[--linelen] = '\0';
-
-	    if (linelen > 0 && LD.buf[linelen - 1] == LD.escape_char)
-	      {
-		LD.buf[--linelen] = '\0';
-		LD.continue_line = 1;
-	      }
-	    else
-	      LD.continue_line = 0;
-
-	    while (isspace (LD.buf[LD.buf_ptr]))
-	      ++LD.buf_ptr;
-
-	    /* We are not so restrictive and allow white spaces before
-	       a comment.  */
-	    if (posix_conformance == 0
-		&& LD.buf[LD.buf_ptr] == LD.comment_char
-		&& LD.buf_ptr != 0)
-	      error (0, 0, gettext ("%s:%Zd: comment does not start in "
-				    "column 1"), LD.filename, LD.line_no);
-	  }
-	while (LD.buf[LD.buf_ptr] == '\0'
-	       || LD.buf[LD.buf_ptr] == LD.comment_char);
-
-
-      /* Get information for return values.  */
-      *token = LD.buf + LD.buf_ptr;
-      start_ptr = LD.buf_ptr;
-
-      /* If no further character is in the line this is the end of a logical
-	 line.  This information is needed in the parser.  */
-      if (LD.buf[LD.buf_ptr] == '\0')
-	{
-	  LD.buf_ptr = LD.bufsize;
-	  retval = TOK_ENDOFLINE;
-	}
-      else if (isalpha (LD.buf[LD.buf_ptr]))
-	/* The token is an identifier.  The POSIX standard does not say
-	   what characters might be contained but offical POSIX locale
-	   definition files contain beside alnum characters '_', '-' and
-	   '+'.  */
-	{
-	  const struct locale_keyword *kw;
-
-	  do
-	    ++LD.buf_ptr;
-	  while (isalnum (LD.buf[LD.buf_ptr]) || LD.buf[LD.buf_ptr] == '_'
-		 || LD.buf[LD.buf_ptr] == '-' || LD.buf[LD.buf_ptr] == '+');
-
-	  /* Look in table of keywords.  */
-	  kw = in_word_set (*token, LD.buf_ptr - start_ptr);
-	  if (kw == NULL)
-	    retval = TOK_IDENT;
-	  else
-	    {
-	      if (kw->token_id == TOK_ESCAPE_CHAR
-		  || kw->token_id == TOK_COMMENT_CHAR)
-		/* `escape_char' and `comment_char' are keywords for the
-		   lexer.  Do not give them to the parser.  */
-		{
-		  start_again = 1;
-
-		  if (!isspace (LD.buf[LD.buf_ptr])
-		      || (posix_conformance && LD.returned_tokens > 0))
-		    error (0, 0, gettext ("%s:%Zd: syntax error in locale "
-					  "definition file"),
-			   LD.filename, LD.line_no);
-
-		  do
-		    ++LD.buf_ptr;
-		  while (isspace (LD.buf[LD.buf_ptr]));
-
-		  kw->token_id == TOK_ESCAPE_CHAR
-		    ? LD.escape_char
-		    : LD.comment_char = LD.buf[LD.buf_ptr++];
-
-		  ignore_to_eol (0, posix_conformance);
-		}
-	      else
-		/* It is one of the normal keywords.  */
-		retval = kw->token_id;
-	    }
-
-	  *token_len = LD.buf_ptr - start_ptr;
-	}
-      else if (LD.buf[LD.buf_ptr] == '"')
-	/* Read a string.  All symbolic character descriptions are expanded.
-	   This has to be done in a local buffer because a simple symbolic
-	   character like <A> may expand to upto 6 bytes.  */
-	{
-	  char *last = LD.strbuf;
-
-	  ++LD.buf_ptr;
-	  while (LD.buf[LD.buf_ptr] != '"')
-	    {
-	      int pre = LD.buf_ptr;
-	      int char_val = get_char (); /* token, token_len); */
-
-	      if (char_val == 0)
-		{
-		  error (4, 0, gettext ("%s:%Zd: unterminated string at end "
-					"of line"), LD.filename, LD.line_no);
-		  /* NOTREACHED */
-		}
-
-	      if (char_val > 0)
-		/* Unknown characters are simply not stored.  */
-		last += char_to_utf (last, char_val);
-	      else
-		{
-		  char tmp[LD.buf_ptr - pre + 1];
-		  memcpy (tmp, &LD.buf[pre], LD.buf_ptr - pre);
-		  tmp[LD.buf_ptr - pre] = '\0';
-		  error (0, 0, gettext ("%s:%Zd: character `%s' not defined"),
-			 LD.filename, LD.line_no, tmp);
-		}
-	    }
-	  if (LD.buf[LD.buf_ptr] != '\0')
-	    ++LD.buf_ptr;
-
-	  *last = '\0';
-	  *token = LD.strbuf;
-	  *token_len = last  - LD.strbuf;
-	  retval = TOK_STRING;
-	}
-      else if (LD.buf[LD.buf_ptr] == '.' && LD.buf[LD.buf_ptr + 1] == '.'
-	       && LD.buf[LD.buf_ptr + 2] == '.')
-	{
-	  LD.buf_ptr += 3;
-	  retval = TOK_ELLIPSIS;
-	}
-      else if (LD.buf[LD.buf_ptr] == LD.escape_char)
-	{
-	  char *endp;
-
-	  ++LD.buf_ptr;
-	  switch (LD.buf[LD.buf_ptr])
-	    {
-	    case 'x':
-	      if (isdigit (LD.buf[++LD.buf_ptr]))
-		{
-		  retval = strtol (&LD.buf[LD.buf_ptr], &endp, 16);
-		  if (endp - (LD.buf + LD.buf_ptr) < 2 || retval > 255)
-		    retval = 'x';
-		  else
-		    LD.buf_ptr = endp - LD.buf;
-		}
-	      else
-		retval = 'x';
-	      break;
-	    case 'd':
-	      if (isdigit (LD.buf[++LD.buf_ptr]))
-		{
-		  retval = strtol (&LD.buf[LD.buf_ptr], &endp, 10);
-		  if (endp - (LD.buf + LD.buf_ptr) < 2 || retval > 255)
-		    retval = 'd';
-		  else
-		    LD.buf_ptr = endp - LD.buf;
-		}
-	      else
-		retval = 'd';
-	      break;
-	    case '0'...'9':
-	      retval = strtol (&LD.buf[LD.buf_ptr], &endp, 8);
-	      if (endp - (LD.buf + LD.buf_ptr) < 2 || retval > 255)
-		retval = LD.buf[LD.buf_ptr++];
-	      else
-		LD.buf_ptr = endp - LD.buf;
-	      break;
-	    case 'a':
-	      retval = '\a';
-	      ++LD.buf_ptr;
-	      break;
-	    case 'b':
-	      retval = '\b';
-	      ++LD.buf_ptr;
-	      break;
-	    case 'f':
-	      retval = '\f';
-	      ++LD.buf_ptr;
-	      break;
-	    case 'n':
-	      retval = '\n';
-	      ++LD.buf_ptr;
-	      break;
-	    case 'r':
-	      retval = '\r';
-	      ++LD.buf_ptr;
-	      break;
-	    case 't':
-	      retval = '\t';
-	      ++LD.buf_ptr;
-	      break;
-	    case 'v':
-	      retval = '\v';
-	      ++LD.buf_ptr;
-	      break;
-	    default:
-	      retval = LD.buf[LD.buf_ptr++];
-	      break;
- 	    }
-	}
-      else if (isdigit (LD.buf[LD.buf_ptr]))
-	{
-	  char *endp;
-
-	  *token_len = strtol (&LD.buf[LD.buf_ptr], &endp, 10);
-	  LD.buf_ptr = endp - LD.buf;
-	  retval = TOK_NUMBER;
-	}
-      else if (LD.buf[LD.buf_ptr] == '-' && LD.buf[LD.buf_ptr + 1] == '1')
-	{
-	  LD.buf_ptr += 2;
-	  retval = TOK_MINUS1;
-	}
-      else
-	{
-	  int ch = get_char (); /* token, token_len); */
-	  if (ch != -1)
-	    {
-	      *token_len = ch;
-	      retval = TOK_CHAR;
-	    }
-	  else
-	    retval = TOK_ILL_CHAR;
-	}
-
-      /* Ignore white space.  */
-      while (isspace (LD.buf[LD.buf_ptr]))
-	++LD.buf_ptr;
-    }
-  while (start_again != 0);
-
-  ++LD.returned_tokens;
-  return retval;
-}
-
-
-/* Code a character with UTF-8 if the character map has multi-byte
-   characters.  */
-int
-char_to_utf (char *buf, int char_val)
-{
-  if (charmap_data.mb_cur_max == 1)
-    {
-      *buf++ = char_val;
-      return 1;
-    }
-  else
-    {
-/* The number of bits coded in each character.  */
-#define CBPC 6
-      static struct coding_tab
-        {
-          int mask;
-          int val;
-        }
-      tab[] =
-        {
-          { 0x7f,       0x00 },
-          { 0x7ff,      0xc0 },
-          { 0xffff,     0xe0 },
-          { 0x1fffff,   0xf0 },
-          { 0x3ffffff,  0xf8 },
-          { 0x7fffffff, 0xfc },
-          { 0, }
-        };
-      struct coding_tab *t;
-      int c;
-      int cnt = 1;
-
-      for (t = tab; char_val > t->mask; ++t, ++cnt)
-	;
-
-      c = cnt;
-
-      buf += cnt;
-      while (c > 1)
-	{
-	  *--buf = 0x80 | (char_val & ((1 << CBPC) - 1));
-	  char_val >>= CBPC;
-	  --c;
-	}
-
-      *--buf = t->val | char_val;
-
-      return cnt;
-    }
-}
-
-
-/* Ignore rest of line upto ENDOFLINE token, starting with given token.
-   If WARN_FLAG is set warn about any token but ENDOFLINE.  */
-void
-ignore_to_eol (int token, int warn_flag)
-{
-  if (token == TOK_ENDOFLINE)
-    return;
-
-  if (LD.buf[LD.buf_ptr] != '\0' && warn_flag)
-    error (0, 0, gettext ("%s:%Zd: trailing garbage at end of line"),
-	   locfile_data.filename, locfile_data.line_no);
-
-  while (LD.continue_line)
-    {
-      LD.continue_line = 0;
-
-      /* Increment line number counter.  */
-      ++LD.line_no;
-
-      if (fgets (LD.buf, LD.bufsize, stdin) != NULL)
-	{
-	  /* We now have to look whether this line is continued and
-	     whether it at all fits into our buffer.  */
-	  int linelen = strlen (LD.buf);
-
-	  if (linelen == LD.bufsize - 1)
-	    /* The did not fit into the buffer.  */
-	    error (2, 0, gettext ("%s:%Zd: line too long;  use `getconf "
-				  "LINE_MAX' to get the current maximum "
-				  "line length"), LD.filename, LD.line_no);
-
-	  /* Remove '\n' at end of line.  */
-	  if (LD.buf[linelen - 1] == '\n')
-	    --linelen;
-
-	  if (LD.buf[linelen - 1] == LD.escape_char)
-	    LD.continue_line = 1;
-	}
-    }
- 
-  /* This causes to begin the next line.  */
-  LD.buf_ptr = LD.bufsize;
-}
-
-
-/* Return the value of the character at the beginning of the input buffer.
-   Symbolic character constants are expanded.  */
-static int
-get_char (void)
-{
-  if (LD.buf[LD.buf_ptr] == '<')
-    /* This is a symbolic character name.  */
-    {
-      int char_val;
-      char *startp = LD.buf + (++LD.buf_ptr);
-      char *endp = startp;
-
-      while (LD.buf[LD.buf_ptr] != '>' && isprint (LD.buf[LD.buf_ptr]))
-	{
-	  if (LD.buf[LD.buf_ptr] == '\0'
-	      || (LD.buf[LD.buf_ptr] == LD.escape_char
-		  && LD.buf[++LD.buf_ptr] == '\0'))
-	    break;
-
-	  *endp++ = LD.buf[LD.buf_ptr++];
-	}
-
-      if (LD.buf[LD.buf_ptr] != '>' && LD.buf[LD.buf_ptr] == '\0')
-	{
-	  error (0, 0, gettext ("%s:%Zd: end of line in character symbol"),
-		 LD.filename, LD.line_no);
-
-	  if (startp == endp)
-	    return -1;
-	}
-      else
-	++LD.buf_ptr;
-
-      char_val = find_char (startp, endp - startp);
-      if (char_val == -1 && verbose != 0 && reject_new_char != 0)
-	{
-	  /* Locale defintions are often given very general.  Missing
-	     characters are only reported when explicitely requested.  */
-	  char tmp[endp - startp + 3];
-
-	  tmp[0] = '<';
-	  memcpy (tmp + 1, startp, endp - startp);
-	  tmp[endp - startp + 1] = '>';
-	  tmp[endp - startp + 2] = '\0';
-
-	  error (0, 0, gettext ("%s:%Zd: character `%s' not defined"),
-		 LD.filename, LD.line_no, tmp);
-	}
-      
-      return char_val;
-    }
-  else
-    return (int) LD.buf[LD.buf_ptr++];
-}
-
-/*
- * Local Variables:
- *  mode:c
- *  c-basic-offset:2
- * End:
- */