/* Transliteration using the locale's data. Copyright (C) 2000 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@cygnus.com>, 2000. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the GNU C Library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. */ #include <assert.h> #include <dlfcn.h> #include <search.h> #include <stdint.h> #include <string.h> #include <stdlib.h> #include <bits/libc-lock.h> #include "gconv_int.h" #include "../locale/localeinfo.h" int __gconv_transliterate (struct __gconv_step *step, struct __gconv_step_data *step_data, void *trans_data __attribute__ ((unused)), const unsigned char *inbufstart, const unsigned char **inbufp, const unsigned char *inbufend, unsigned char **outbufstart, size_t *irreversible) { /* Find out about the locale's transliteration. */ uint_fast32_t size; const uint32_t *from_idx; const uint32_t *from_tbl; const uint32_t *to_idx; const uint32_t *to_tbl; const uint32_t *winbuf; const uint32_t *winbufend; uint_fast32_t low; uint_fast32_t high; /* The input buffer. There are actually 4-byte values. */ winbuf = (const uint32_t *) *inbufp; winbufend = (const uint32_t *) inbufend; /* If there is no transliteration information in the locale don't do anything and return the error. */ size = _NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_TRANSLIT_TAB_SIZE); if (size == 0) goto no_rules; /* Get the rest of the values. */ from_idx = (const uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_FROM_IDX); from_tbl = (const uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_FROM_TBL); to_idx = (const uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_TO_IDX); to_tbl = (const uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_TO_TBL); /* Test whether there is enough input. */ if (winbuf + 1 > winbufend) return (winbuf == winbufend ? __GCONV_EMPTY_INPUT : __GCONV_INCOMPLETE_INPUT); /* The array starting at FROM_IDX contains indeces to the string table in FROM_TBL. The indeces are sorted wrt to the strings. I.e., we are doing binary search. */ low = 0; high = size; while (low < high) { uint_fast32_t med = (low + high) / 2; uint32_t idx; int cnt; /* Compare the string at this index with the string at the current position in the input buffer. */ idx = from_idx[med]; cnt = 0; do { if (from_tbl[idx + cnt] != winbuf[cnt]) /* Does not match. */ break; ++cnt; } while (from_tbl[idx + cnt] != L'\0' && winbuf + cnt < winbufend); if (cnt > 0 && from_tbl[idx + cnt] == L'\0') { /* Found a matching input sequence. Now try to convert the possible replacements. */ uint32_t idx2 = to_idx[med]; do { /* Determine length of replacement. */ uint_fast32_t len = 0; int res; const unsigned char *toinptr; unsigned char *outptr; while (to_tbl[idx2 + len] != L'\0') ++len; /* Try this input text. */ toinptr = (const unsigned char *) &to_tbl[idx2]; outptr = *outbufstart; res = DL_CALL_FCT (step->__fct, (step, step_data, &toinptr, (const unsigned char *) &to_tbl[idx2 + len], &outptr, NULL, 0, 0)); if (res != __GCONV_ILLEGAL_INPUT) { /* If the conversion succeeds we have to increment the input buffer. */ if (res == __GCONV_EMPTY_INPUT) { *inbufp += cnt * sizeof (uint32_t); ++*irreversible; res = __GCONV_OK; } *outbufstart = outptr; return res; } /* Next replacement. */ idx2 += len + 1; } while (to_tbl[idx2] != L'\0'); /* Nothing found, continue searching. */ } else if (cnt > 0) /* This means that the input buffer contents matches a prefix of an entry. Since we cannot match it unless we get more input, we will tell the caller about it. */ return __GCONV_INCOMPLETE_INPUT; if (winbuf + cnt >= winbufend || from_tbl[idx + cnt] < winbuf[cnt]) low = med + 1; else high = med; } no_rules: /* Maybe the character is supposed to be ignored. */ if (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_TRANSLIT_IGNORE_LEN) != 0) { int n = _NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_TRANSLIT_IGNORE_LEN); const uint32_t *ranges = (const uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_IGNORE); const uint32_t wc = *(const uint32_t *) (*inbufp); int i; /* Test whether there is enough input. */ if (winbuf + 1 > winbufend) return (winbuf == winbufend ? __GCONV_EMPTY_INPUT : __GCONV_INCOMPLETE_INPUT); for (i = 0; i < n; ranges += 3, ++i) if (ranges[0] <= wc && wc <= ranges[1] && (wc - ranges[0]) % ranges[2] == 0) { /* Matches the range. Ignore it. */ *inbufp += 4; ++*irreversible; return __GCONV_OK; } else if (wc < ranges[0]) /* There cannot be any other matching range since they are sorted. */ break; } /* One last chance: use the default replacement. */ if (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN) != 0) { const uint32_t *default_missing = (const uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_DEFAULT_MISSING); const unsigned char *toinptr = (const unsigned char *) default_missing; uint32_t len = _NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN); unsigned char *outptr; int res; /* Test whether there is enough input. */ if (winbuf + 1 > winbufend) return (winbuf == winbufend ? __GCONV_EMPTY_INPUT : __GCONV_INCOMPLETE_INPUT); outptr = *outbufstart; res = DL_CALL_FCT (step->__fct, (step, step_data, &toinptr, (const unsigned char *) (default_missing + len), &outptr, NULL, 0, 0)); if (res != __GCONV_ILLEGAL_INPUT) { /* If the conversion succeeds we have to increment the input buffer. */ if (res == __GCONV_EMPTY_INPUT) { /* This worked but is not reversible. */ ++*irreversible; *inbufp += 4; res = __GCONV_OK; } *outbufstart = outptr; return res; } } /* Haven't found a match. */ return __GCONV_ILLEGAL_INPUT; } /* Structure to represent results of found (or not) transliteration modules. */ struct known_trans { /* This structure must remain the first member. */ struct trans_struct info; char *fname; void *handle; int open_count; }; /* Tree with results of previous calls to __gconv_translit_find. */ static void *search_tree; /* We modify global data. */ __libc_lock_define_initialized (static, lock); /* Compare two transliteration entries. */ static int trans_compare (const void *p1, const void *p2) { const struct known_trans *s1 = (const struct known_trans *) p1; const struct known_trans *s2 = (const struct known_trans *) p2; return strcmp (s1->info.name, s2->info.name); } /* Open (maybe reopen) the module named in the struct. Get the function and data structure pointers we need. */ static int open_translit (struct known_trans *trans) { __gconv_trans_query_fct queryfct; trans->handle = __libc_dlopen (trans->fname); if (trans->handle == NULL) /* Not available. */ return 1; /* Find the required symbol. */ queryfct = __libc_dlsym (trans->handle, "gconv_trans_context"); if (queryfct == NULL) { /* We cannot live with that. */ close_and_out: __libc_dlclose (trans->handle); trans->handle = NULL; return 1; } /* Get the context. */ if (queryfct (trans->info.name, &trans->info.csnames, &trans->info.ncsnames) != 0) goto close_and_out; /* Of course we also have to have the actual function. */ trans->info.trans_fct = __libc_dlsym (trans->handle, "gconv_trans"); if (trans->info.trans_fct == NULL) goto close_and_out; /* Now the optional functions. */ trans->info.trans_init_fct = __libc_dlsym (trans->handle, "gconv_trans_init"); trans->info.trans_context_fct = __libc_dlsym (trans->handle, "gconv_trans_context"); trans->info.trans_end_fct = __libc_dlsym (trans->handle, "gconv_trans_end"); trans->open_count = 1; return 0; } int internal_function __gconv_translit_find (struct trans_struct *trans) { struct known_trans **found; const struct path_elem *runp; int res = 1; /* We have to have a name. */ assert (trans->name != NULL); /* Acquire the lock. */ __libc_lock_lock (lock); /* See whether we know this module already. */ found = __tfind (trans, &search_tree, trans_compare); if (found != NULL) { /* Is this module available? */ if ((*found)->handle != NULL) { /* Maybe we have to reopen the file. */ if ((*found)->handle != (void *) -1) /* The object is not unloaded. */ res = 0; else if (open_translit (*found) == 0) { /* Copy the data. */ *trans = (*found)->info; (*found)->open_count++; res = 0; } } } else { size_t name_len = strlen (trans->name) + 1; int need_so = 0; struct known_trans *newp; /* We have to continue looking for the module. */ if (__gconv_path_elem == NULL) __gconv_get_path (); /* See whether we have to append .so. */ if (name_len <= 4 || memcmp (&trans->name[name_len - 4], ".so", 3) != 0) need_so = 1; /* Create a new entry. */ newp = (struct known_trans *) malloc (sizeof (struct known_trans) + (__gconv_max_path_elem_len + name_len + 3) + name_len); if (newp != NULL) { char *cp; /* Clear the struct. */ memset (newp, '\0', sizeof (struct known_trans)); /* Store a copy of the module name. */ newp->info.name = cp = (char *) (newp + 1); cp = __mempcpy (cp, trans->name, name_len); newp->fname = cp; /* Search in all the directories. */ for (runp = __gconv_path_elem; runp->name != NULL; ++runp) { cp = __mempcpy (__stpcpy ((char *) newp->fname, runp->name), trans->name, name_len); if (need_so) memcpy (cp, ".so", sizeof (".so")); if (open_translit (newp) == 0) { /* We found a module. */ res = 0; break; } } if (res) newp->fname = NULL; /* In any case we'll add the entry to our search tree. */ if (__tsearch (newp, &search_tree, trans_compare) == NULL) { /* Yickes, this should not happen. Unload the object. */ res = 1; /* XXX unload here. */ } } } __libc_lock_unlock (lock); return res; }