about summary refs log tree commit diff
path: root/locale/charmap.c
diff options
context:
space:
mode:
Diffstat (limited to 'locale/charmap.c')
-rw-r--r--locale/charmap.c524
1 files changed, 524 insertions, 0 deletions
diff --git a/locale/charmap.c b/locale/charmap.c
new file mode 100644
index 0000000000..ad1075e5bc
--- /dev/null
+++ b/locale/charmap.c
@@ -0,0 +1,524 @@
+/* Copyright (C) 1995 Free Software Foundation, Inc.
+
+The GNU C Library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Library General Public License as
+published by the Free Software Foundation; either version 2 of the
+License, or (at your option) any later version.
+
+The GNU C Library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Library General Public License for more details.
+
+You should have received a copy of the GNU Library General Public
+License along with the GNU C Library; see the file COPYING.LIB.  If
+not, write to the Free Software Foundation, Inc., 675 Mass Ave,
+Cambridge, MA 02139, USA.  */
+
+#include <ctype.h>
+#include <errno.h>
+#include <libintl.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "localedef.h"
+#include "hash.h"
+
+/* Data structure for representing charmap database.  */
+struct charmap charmap_data;
+
+/* Line number in charmap file.  */
+static unsigned int line_no;
+
+/* Prototypes for local functions.  */
+static void read_prolog (FILE *infile);
+static unsigned long read_body (FILE *infile);
+
+
+/* Read complete table of symbolic names for character set from file.  If
+   this file does not exist or is not readable a default file is tried.
+   If this also is not readable no character map is defined.  */
+void
+charmap_read (const char *filename)
+{
+  unsigned long max_char;
+  long path_max = pathconf (".", _PC_PATH_MAX);
+  char buf[path_max];
+  FILE *infile = NULL;
+
+  /* Initialize charmap data.  */
+  charmap_data.codeset_name = NULL;
+  charmap_data.mb_cur_max = -1;
+  charmap_data.mb_cur_min = -1;
+  charmap_data.escape_char = '\\';
+  charmap_data.comment_char = '#';
+
+  if (filename != NULL)
+    {
+      strcpy (buf, filename);
+      infile = fopen (filename, "r");
+      if (infile == NULL && filename[0] != '/')
+        {
+          snprintf (buf, path_max, "%s/%s", CHARMAP_PATH, filename);
+          infile = fopen (buf, "r");
+        }
+    }
+  if (infile == NULL)
+    {
+      if (filename != NULL)
+	error (0, errno, gettext ("input file `%s' not found"), filename);
+
+      snprintf (buf, path_max, "%s/%s", CHARMAP_PATH, DEFAULT_CHARMAP);
+      infile = fopen (buf, "r");
+
+      if (infile == NULL)
+	error (4, errno, gettext ("input file `%s' not found"), filename); 
+    }
+
+  charmap_data.filename = buf;
+  init_hash (&charmap_data.table, 500);
+  line_no = 0;
+
+  /* Read the prolog of the charmap file.  */
+  read_prolog (infile);
+
+  /* Last works on the charmap tables global data.  */
+  if (charmap_data.mb_cur_max == -1)
+    charmap_data.mb_cur_max = 1;
+  if (charmap_data.mb_cur_min == -1)
+    charmap_data.mb_cur_min = charmap_data.mb_cur_max;
+
+  if ((size_t) charmap_data.mb_cur_max > sizeof (long))
+    {
+      error (2, 0, gettext ("program limitation: for now only upto %Zu "
+			    "bytes per character are allowed"), sizeof (long));
+    }
+
+  /* Now process all entries.  */
+  max_char = read_body (infile);
+
+  /* We don't need the file anymore.  */
+  fclose (infile);
+
+
+  /* Determine the optimal table size when using the simple modulo hashing
+     function.  */
+  if (max_char >= 256)
+    {
+      int size;
+      /* Current best values, initialized to some never reached high value.  */
+      int best_count = 10000;
+      int best_size = 10000;
+      int best_product = best_count * best_size;
+
+      /* Give warning.  */
+      error (-1, 0, gettext ("computing character table size: this may take "
+			     "a while"));
+
+      for (size = 256; size <= best_product; ++size)
+	{
+	  /* Array with slot counters.  */
+	  int cnt[size];
+	  /* Current character.  */
+	  int ch;
+	  /* Maximal number of characters in any slot.  */
+	  int maxcnt = 0;
+	  /* Product of current size and maximal count.  */
+	  int product = 0;
+	  /* Iteration pointer through hashing table.  */
+	  char *ptr = NULL;
+
+	  /* Initializes counters to zero.  */
+	  memset(cnt, 0, size * sizeof (int));
+
+	  /* Iterate through whole hashing table.  */
+	  while (product < best_product
+		 && iterate_table (&charmap_data.table, (void **) &ptr,
+				   (void **) &ch))
+	    {
+	      /* Increment slot counter.  */
+	      ++cnt[ch % size];
+	      /* Test for current maximum.  */
+	      if (cnt[ch % size] > maxcnt)
+		{
+		  maxcnt = cnt[ch % size];
+		  product = maxcnt * size;
+		}
+	    }
+
+	  if (product < best_product)
+	    {
+	      best_count = maxcnt;
+	      best_size = size;
+	      best_product = best_count * best_size;
+	    }
+	}
+
+      charmap_data.hash_size = best_size;
+      charmap_data.hash_layers = best_count;
+    }
+  else
+    {
+      charmap_data.hash_size = 256;
+      charmap_data.hash_layers = 1;
+    }
+}
+
+
+#define SYNTAX_ERROR							     \
+  do { error (0, 0, gettext ("%s:%u: syntax error in charmap file"),	     \
+	      charmap_data.filename, line_no);                               \
+       goto end_of_loop; } while (0)
+
+/* Read the prolog of the charmap file until the line containing `CHARMAP'.
+   All possible entries are processed.  */
+static void
+read_prolog (FILE *infile)
+{
+  size_t bufsize = sysconf (_SC_LINE_MAX);
+  char buf[bufsize];
+
+  while (1)
+    {
+      char *cp = buf;
+      char len;
+
+      /* Read the next line.  */
+      fgets (buf, bufsize, infile);
+      len = strlen (buf);
+
+      /* On EOF simply return.  */
+      if (len == 0 || buf[len - 1] != '\n')
+	error (4, 0, gettext ("%s: unexpected end of file in charmap"),
+	       charmap_data.filename);
+
+      /* This is the next line.  */
+      ++line_no;
+
+      /* Comments and empty lines are ignored.  */
+      if (len == 1 || buf[0] == charmap_data.comment_char)
+	continue;
+
+      buf[len - 1] = '\0';
+
+      /* Throw away leading white spaces.  This is not defined in POSIX.2
+	 so don't do it if conformance is requested.  */
+      if (!posix_conformance)
+	while (isspace (*cp))
+	  ++cp;
+
+      /* If `CHARMAP' is read the prolog is over.  */
+      if (strncmp (cp, "CHARMAP", 7) == 0
+	  && (!posix_conformance || cp[7] == '\0'))
+	return;
+
+      /* Now it can be only one of special symbols defining the charmap
+	 parameters.  All are beginning with '<'.  */
+      if (*cp != '<')
+	SYNTAX_ERROR;
+
+      ++cp;
+      if (strncmp (cp, "code_set_name>", 14) == 0)
+	{
+	  char *startp;
+
+#define cp_to_arg(no,pred)						      \
+	  cp += no;							      \
+	  while (isspace (*cp))						      \
+	    ++cp;							      \
+	  if (*cp == '\0' || !pred (*cp))				      \
+            SYNTAX_ERROR;
+
+	  cp_to_arg (14,isgraph)
+
+	  if (charmap_data.codeset_name != NULL)
+	    {
+	      error (0, 0, gettext ("%s:%u: duplicate code set name "
+				    "specification"),
+		     charmap_data.filename, line_no);
+	      free (charmap_data.codeset_name);
+	    }
+
+	  startp = cp;
+	  while (*cp != '\0' && isgraph (*cp) && !isspace (*cp))
+	    ++cp;
+
+	  charmap_data.codeset_name = (char *) xmalloc (cp - startp + 1);
+	  strncpy (startp, startp, cp - startp);
+	}
+      else if (strncmp (cp, "mb_cur_max>", 11) == 0)
+	{
+          int new_val;
+	  cp_to_arg (11,isdigit)
+
+	  if (charmap_data.mb_cur_max != -1)
+	    error (0, 0,
+		   gettext ("%s:%u: duplicate definition of mb_cur_max"),
+		   charmap_data.filename, line_no);
+
+	  new_val = (int) strtol (cp, &cp, posix_conformance ? 10 : 0);
+	  if (new_val < 1)
+	    error (0, 0, gettext ("%s:%u: illegal value for mb_cur_max: %d"),
+		   charmap_data.filename, line_no, new_val);
+	  else
+	    charmap_data.mb_cur_max = new_val;
+	}
+      else if (strncmp (cp, "mb_cur_min>", 11) == 0)
+	{
+          int new_val;
+	  cp_to_arg (11,isdigit)
+
+	  if (charmap_data.mb_cur_max != -1)
+	    error (0, 0,
+		   gettext ("%s:%u: duplicate definition of mb_cur_min"),
+		   charmap_data.filename, line_no);
+
+	  new_val = (int) strtol (cp, &cp, posix_conformance ? 10 : 0);
+	  if (new_val < 1)
+	    error (0, 0, gettext ("%s:%u: illegal value for mb_cur_min: %d"),
+		   charmap_data.filename, line_no, new_val);
+	  else
+	    charmap_data.mb_cur_min = new_val;
+	}
+      else if (strncmp (cp, "escape_char>", 12) == 0)
+	{
+	  cp_to_arg (12, isgraph)
+	  charmap_data.escape_char = *cp;
+	}
+      else if (strncmp (cp, "comment_char>", 13) == 0)
+	{
+	  cp_to_arg (13, isgraph)
+	  charmap_data.comment_char = *cp;
+	}
+      else
+	SYNTAX_ERROR;
+      end_of_loop:
+    }
+}
+#undef cp_to_arg
+
+
+static unsigned long
+read_body (FILE *infile)
+{
+  unsigned long max_char = 0;
+  size_t bufsize = sysconf (_SC_LINE_MAX);
+  char buf[bufsize];
+  char name_str[bufsize / 2];
+  char code_str[bufsize / 2];
+
+  while (1)
+    {
+      char *cp = buf;
+      size_t len;
+
+      /* Read the next line.  */
+      fgets (buf, bufsize, infile);
+      len = strlen (buf);
+
+      /* On EOF simply return.  */
+      if (len == 0)
+	error (0, 0, gettext ("%s: `END CHARMAP' is missing"),
+	       charmap_data.filename);
+
+      /* This is the next line.  */
+      ++line_no;
+
+      if (len == bufsize - 1)
+	{
+	  error (0, 0, gettext ("%s:%u: line too long;  use `getconf "
+				"LINE_MAX' to get the current maximum line"
+				"length"), charmap_data.filename, line_no);
+	  do
+	    {
+	      fgets (buf, bufsize, infile);
+	      len = strlen (buf);
+	    }
+	  while (len == bufsize - 1);
+	  continue;
+	}
+
+      /* Comments and empty lines are ignored.  */
+      if (len == 1 || buf[0] == charmap_data.comment_char)
+	continue;
+
+      buf[len - 1] = '\0';
+
+      /* Throw away leading white spaces.  This is not defined in POSIX.2
+	 so don't do it if conformance is requested.  */
+      if (!posix_conformance)
+	while (isspace (*cp))
+	  ++cp;
+
+      if (*cp == '<')
+	{
+	  char *end1p, *end2p, *start2p;
+	  size_t cnt = 0;
+	  unsigned long char_value = 0;
+
+	  if (sscanf (cp + 1, "%s %s", name_str, code_str) != 2)
+	    SYNTAX_ERROR;
+
+	  end1p = cp = name_str;
+	  while (*cp != '\0' && *cp != '>')
+	    {
+	      if (*cp == charmap_data.escape_char)
+		if (*++cp == '\0')
+		  SYNTAX_ERROR;
+	      *end1p++ = *cp++;
+	    }
+	  if (*cp == '\0')
+	    /* No final '>'.  Make error condition.  */
+	    end1p = name_str;
+	  else
+	    ++cp;
+
+	  *end1p = '\0';
+	  
+	  if (*cp == '.' && *++cp == '.' && *++cp == '.' && *++cp == '<')
+	    {
+	      /* This might be the alternate form.  */
+	      start2p = end2p = ++cp;
+	      while (*cp != '\0' && *cp != '>')
+		{
+		  if (*cp == charmap_data.escape_char)
+		    if (*++cp == '\0')
+		      SYNTAX_ERROR;
+		  *end2p = *cp++;
+		}
+	      if (*cp == '\0')
+		/* NO final '>'.  Make error condition.  */
+		end2p = start2p;
+	      else
+		++cp;
+	    }
+	  else
+	    start2p = end2p = NULL;
+
+
+	  if (end1p == name_str || (start2p != NULL && start2p != end2p)
+	      || *cp != '\0'
+	      || *code_str != charmap_data.escape_char)
+	    SYNTAX_ERROR;
+
+	  cp = code_str;
+	  do
+	    {
+	      char *begin;
+	      long val;
+
+	      switch (*++cp)
+		{
+		case 'd':
+		  val = strtol ((begin = cp + 1), &cp, 10);
+		  break;
+		case 'x':
+		  val = strtol ((begin = cp + 1), &cp, 16);
+		  break;
+		default:
+		  val = strtol ((begin = cp), &cp, 8);
+		  break;
+		}
+	      if (begin == cp)
+		SYNTAX_ERROR;
+
+	      if (posix_conformance && cp - begin < 2)
+		error (0, 0, gettext ("%s:%u: byte constant has less than "
+				      "two digits"),
+		       charmap_data.filename, line_no);
+
+	      if (val < 0 || val > 255)
+		{
+		  error (0, 0, gettext ("%s:%u: character encoding must be "
+					"given in 8-bit bytes"),
+			 charmap_data.filename, line_no);
+		  goto end_of_loop;
+		}
+
+	      if (cnt < (size_t) charmap_data.mb_cur_max)
+		{
+		  if (cnt < sizeof (long))  /* FIXME */
+		    char_value = (char_value << 8) | val;
+		}
+	      else
+		{
+		  error (0, 0, gettext ("%s:%u: number of bytes in character "
+					"definition exceeds `mb_cur_max'"),
+			 charmap_data.filename, line_no);
+		  break;
+		}
+	      ++cnt;
+	    }
+	  while (*cp == charmap_data.escape_char);
+
+	  /* Ignore the rest of the line (comment).  */
+	  if (end2p == NULL)
+	    {
+	      if (insert_entry (&charmap_data.table, name_str,
+				end1p - name_str, (void *) char_value))
+		error (0, 0, gettext ("%s:%u: duplicate entry"),
+		       charmap_data.filename, line_no);
+
+	      max_char = MAX (max_char, char_value);
+	    }
+	  else
+	    {
+	      char *en1, *en2, *start1p;
+	      long n1, n2, n;
+
+	      start1p = name_str;
+
+ 	      while (*start1p == *start2p && !isdigit (*start1p)
+		     && start1p < end1p)
+		  ++start1p, ++start2p;
+
+	      n1 = strtol (start1p, &en1, 10);
+	      n2 = strtol (start2p, &en2, 10);
+
+	      if (en1 - start1p != en2 - start2p || en1 != end1p
+		  || en2 != end2p)
+		SYNTAX_ERROR;
+
+	      if (n1 > n2)
+		error (0, 0, gettext ("%s:%u: starting character is bigger "
+				      "than last"),
+		       charmap_data.filename, line_no);
+
+	      n = n1;
+	      while (n <= n2)
+		{
+		  snprintf(start1p, en1 - start1p, "%0*d", en1 - start1p, n);
+
+		  if (insert_entry (&charmap_data.table, name_str,
+				    en1 - name_str,
+				    (void *) (char_value + n - n1)))
+		    error (0, 0, gettext ("%s:%u: duplicate entry"),
+			   charmap_data.filename, line_no);
+
+		  max_char = MAX (max_char, char_value + n - n1);
+		  ++n;
+		}
+	    }
+	}
+      else
+	{
+	  if (strncmp (cp, "END CHARMAP", 11) == 0)
+	    return max_char;
+	  
+	  SYNTAX_ERROR;
+	}
+      end_of_loop:
+    }
+
+  return max_char;
+}
+
+/*
+ * Local Variables:
+ *  mode:c
+ *  c-basic-offset:2
+ * End:
+ */