about summary refs log tree commit diff
path: root/iconv/gconv_trans.c
diff options
context:
space:
mode:
Diffstat (limited to 'iconv/gconv_trans.c')
-rw-r--r--iconv/gconv_trans.c122
1 files changed, 109 insertions, 13 deletions
diff --git a/iconv/gconv_trans.c b/iconv/gconv_trans.c
index 56c3ff6be6..829ff5f981 100644
--- a/iconv/gconv_trans.c
+++ b/iconv/gconv_trans.c
@@ -18,6 +18,7 @@
    write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
    Boston, MA 02111-1307, USA.  */
 
+#include <dlfcn.h>
 #include <stdint.h>
 
 #include "gconv_int.h"
@@ -25,26 +26,121 @@
 
 
 int
-gconv_transliterate (struct __gconv_step *step,
-		     struct __gconv_step_data *step_data,
-		     const unsigned char *inbufstart,
-		     const unsigned char **inbufp,
-		     const unsigned char *inbufend,
-		     unsigned char *outbufstart,
-		     unsigned char **outbufp, unsigned char *outbufend,
-		     size_t *irreversible)
+__gconv_transliterate (struct __gconv_step *step,
+		       struct __gconv_step_data *step_data,
+		       const unsigned char *inbufstart,
+		       const unsigned char **inbufp,
+		       const unsigned char *inbufend,
+		       unsigned char **outbufstart, size_t *irreversible)
 {
   /* Find out about the locale's transliteration.  */
-  uint_fast32_t size = _NL_CURRENT_WORD (LC_CTYPE,
-					 _NL_CTYPE_TRANSLIT_HASH_SIZE);
-  uint_fast32_t layers = _NL_CURRENT_WORD (LC_CTYPE,
-					   _NL_CTYPE_TRANSLIT_HASH_LAYERS);
+  uint_fast32_t size;
+  uint_fast32_t layers;
+  uint32_t *from_idx;
+  uint32_t *from_tbl;
+  uint32_t *to_idx;
+  uint32_t *to_tbl;
+  uint32_t *winbuf;
+  uint32_t *winbufend;
+  uint_fast32_t low;
+  uint_fast32_t high;
 
   /* If there is no transliteration information in the locale don't do
      anything and return the error.  */
+  size = _NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_TRANSLIT_HASH_SIZE);
   if (size == 0)
     return __GCONV_ILLEGAL_INPUT;
 
-  /* XXX For now we don't do anything.  */
+  /* Get the rest of the values.  */
+  layers = _NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_TRANSLIT_HASH_LAYERS);
+  from_idx = (uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_FROM_IDX);
+  from_tbl = (uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_FROM_TBL);
+  to_idx = (uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_TO_IDX);
+  to_tbl = (uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_TO_TBL);
+
+  /* The input buffer.  There are actually 4-byte values.  */
+  winbuf = (uint32_t *) *inbufp;
+  winbufend = (uint32_t *) inbufend;
+
+  /* Test whether there is enough input.  */
+  if (winbuf + 1 > winbufend)
+    return (winbuf == winbufend
+	    ? __GCONV_EMPTY_INPUT : __GCONV_INCOMPLETE_INPUT);
+
+  /* The array starting at FROM_IDX contains indeces to the string table
+     in FROM_TBL.  The indeces are sorted wrt to the strings.  I.e., we
+     are doing binary search.  */
+  low = 0;
+  high = size;
+  while (low < high)
+    {
+      uint_fast32_t med = (low + high) / 2;
+      uint32_t idx;
+      int cnt;
+
+      /* Compare the string at this index with the string at the current
+	 position in the input buffer.  */
+      idx = from_idx[med];
+      cnt = 0;
+      do
+	{
+	  if (from_tbl[idx + cnt] != winbuf[cnt])
+	    /* Does not match.  */
+	    break;
+	  ++cnt;
+	}
+      while (from_tbl[idx + cnt] != L'\0' && winbuf + cnt < winbufend);
+
+      if (cnt > 0 && from_tbl[idx + cnt] == L'\0')
+	{
+	  /* Found a matching input sequence.  Now try to convert the
+	     possible replacements.  */
+	  uint32_t idx2 = to_idx[med];
+
+	  do
+	    {
+	      /* Determine length of replacement.  */
+	      uint_fast32_t len = 0;
+	      int res;
+	      const unsigned char *toinptr;
+
+	      while (to_tbl[idx2 + len] != L'\0')
+		++len;
+
+	      /* Try this input text.  */
+	      toinptr = (const unsigned char *) &to_tbl[idx2];
+	      res = DL_CALL_FCT (step->__fct,
+				 (step, step_data, &toinptr,
+				  (const unsigned char *) &to_tbl[idx2 + len],
+				  (unsigned char **) outbufstart,
+				  irreversible, 0, 0));
+	      if (res != __GCONV_ILLEGAL_INPUT)
+		{
+		  /* If the conversion succeeds we have to increment the
+		     input buffer.  */
+		  if (res == __GCONV_EMPTY_INPUT)
+		    {
+		      *inbufp += cnt * sizeof (uint32_t);
+		      ++*irreversible;
+		    }
+
+		  return res;
+		}
+
+	      /* Next replacement.  */
+	      idx2 += len + 1;
+	    }
+	  while (to_tbl[idx2] != L'\0');
+
+	  /* Nothing found, continue searching.  */
+	}
+
+      if (winbuf + cnt >= winbufend || from_tbl[idx + cnt] < winbuf[cnt])
+	low = idx;
+      else
+	high = idx;
+    }
+
+  /* Haven't found a match.  */
   return __GCONV_ILLEGAL_INPUT;
 }