diff options
Diffstat (limited to 'iconv/gconv_trans.c')
-rw-r--r-- | iconv/gconv_trans.c | 122 |
1 files changed, 109 insertions, 13 deletions
diff --git a/iconv/gconv_trans.c b/iconv/gconv_trans.c index 56c3ff6be6..829ff5f981 100644 --- a/iconv/gconv_trans.c +++ b/iconv/gconv_trans.c @@ -18,6 +18,7 @@ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ +#include <dlfcn.h> #include <stdint.h> #include "gconv_int.h" @@ -25,26 +26,121 @@ int -gconv_transliterate (struct __gconv_step *step, - struct __gconv_step_data *step_data, - const unsigned char *inbufstart, - const unsigned char **inbufp, - const unsigned char *inbufend, - unsigned char *outbufstart, - unsigned char **outbufp, unsigned char *outbufend, - size_t *irreversible) +__gconv_transliterate (struct __gconv_step *step, + struct __gconv_step_data *step_data, + const unsigned char *inbufstart, + const unsigned char **inbufp, + const unsigned char *inbufend, + unsigned char **outbufstart, size_t *irreversible) { /* Find out about the locale's transliteration. */ - uint_fast32_t size = _NL_CURRENT_WORD (LC_CTYPE, - _NL_CTYPE_TRANSLIT_HASH_SIZE); - uint_fast32_t layers = _NL_CURRENT_WORD (LC_CTYPE, - _NL_CTYPE_TRANSLIT_HASH_LAYERS); + uint_fast32_t size; + uint_fast32_t layers; + uint32_t *from_idx; + uint32_t *from_tbl; + uint32_t *to_idx; + uint32_t *to_tbl; + uint32_t *winbuf; + uint32_t *winbufend; + uint_fast32_t low; + uint_fast32_t high; /* If there is no transliteration information in the locale don't do anything and return the error. */ + size = _NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_TRANSLIT_HASH_SIZE); if (size == 0) return __GCONV_ILLEGAL_INPUT; - /* XXX For now we don't do anything. */ + /* Get the rest of the values. */ + layers = _NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_TRANSLIT_HASH_LAYERS); + from_idx = (uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_FROM_IDX); + from_tbl = (uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_FROM_TBL); + to_idx = (uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_TO_IDX); + to_tbl = (uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_TO_TBL); + + /* The input buffer. There are actually 4-byte values. */ + winbuf = (uint32_t *) *inbufp; + winbufend = (uint32_t *) inbufend; + + /* Test whether there is enough input. */ + if (winbuf + 1 > winbufend) + return (winbuf == winbufend + ? __GCONV_EMPTY_INPUT : __GCONV_INCOMPLETE_INPUT); + + /* The array starting at FROM_IDX contains indeces to the string table + in FROM_TBL. The indeces are sorted wrt to the strings. I.e., we + are doing binary search. */ + low = 0; + high = size; + while (low < high) + { + uint_fast32_t med = (low + high) / 2; + uint32_t idx; + int cnt; + + /* Compare the string at this index with the string at the current + position in the input buffer. */ + idx = from_idx[med]; + cnt = 0; + do + { + if (from_tbl[idx + cnt] != winbuf[cnt]) + /* Does not match. */ + break; + ++cnt; + } + while (from_tbl[idx + cnt] != L'\0' && winbuf + cnt < winbufend); + + if (cnt > 0 && from_tbl[idx + cnt] == L'\0') + { + /* Found a matching input sequence. Now try to convert the + possible replacements. */ + uint32_t idx2 = to_idx[med]; + + do + { + /* Determine length of replacement. */ + uint_fast32_t len = 0; + int res; + const unsigned char *toinptr; + + while (to_tbl[idx2 + len] != L'\0') + ++len; + + /* Try this input text. */ + toinptr = (const unsigned char *) &to_tbl[idx2]; + res = DL_CALL_FCT (step->__fct, + (step, step_data, &toinptr, + (const unsigned char *) &to_tbl[idx2 + len], + (unsigned char **) outbufstart, + irreversible, 0, 0)); + if (res != __GCONV_ILLEGAL_INPUT) + { + /* If the conversion succeeds we have to increment the + input buffer. */ + if (res == __GCONV_EMPTY_INPUT) + { + *inbufp += cnt * sizeof (uint32_t); + ++*irreversible; + } + + return res; + } + + /* Next replacement. */ + idx2 += len + 1; + } + while (to_tbl[idx2] != L'\0'); + + /* Nothing found, continue searching. */ + } + + if (winbuf + cnt >= winbufend || from_tbl[idx + cnt] < winbuf[cnt]) + low = idx; + else + high = idx; + } + + /* Haven't found a match. */ return __GCONV_ILLEGAL_INPUT; } |