diff options
Diffstat (limited to 'generator/pbmtext.c')
-rw-r--r-- | generator/pbmtext.c | 206 |
1 files changed, 148 insertions, 58 deletions
diff --git a/generator/pbmtext.c b/generator/pbmtext.c index 0fe8ad6a..7d9f7cf7 100644 --- a/generator/pbmtext.c +++ b/generator/pbmtext.c @@ -828,55 +828,140 @@ truncateText(struct Text const inputText, static void -fgetNarrowWideString(PM_WCHAR * const widestring, - unsigned int const size, - FILE * const ifP, - const char ** const errorP) { -/*---------------------------------------------------------------------------- - Return the next line from file *ifP, up to 'size' characters, as - *widestring. +fgetWideString(PM_WCHAR * const widestring, + unsigned int const size, + FILE * const ifP, + bool * const eofP, + const char ** const errorP) { - Return error if we can't read the file, or file is at EOF. ------------------------------------------------------------------------------*/ - int wideCode; - /* Width orientation for *ifP: positive means wide, negative means - byte, zero means undecided. - */ + wchar_t * rc; assert(widestring); assert(size > 0); - wideCode = fwide(ifP, 0); - if (wideCode > 0) { - /* *ifP is wide-oriented */ - wchar_t * rc; - rc = fgetws(widestring, size, ifP); - if (rc == NULL) + rc = fgetws(widestring, size, ifP); + + if (rc == NULL) { + if (feof(ifP)) { + *eofP = true; + *errorP = NULL; + } else if (ferror(ifP) && errno == EILSEQ) pm_asprintf(errorP, - "fgetws() of max %u bytes failed or end of stream", - size); + "fgetws(): conversion error: sequence is " + "invalid for locale '%s'", + setlocale(LC_CTYPE, NULL)); else - *errorP = NULL; + pm_asprintf(errorP, + "fgetws() of max %u bytes failed", + size); } else { - char * bufNarrow; - char * rc; + *eofP = false; + *errorP = NULL; + } +} - MALLOCARRAY_NOFAIL(bufNarrow, MAXLINECHARS+1); - rc = fgets(bufNarrow, size, ifP); - if (rc == NULL) - pm_asprintf(errorP, "EOF or error reading file"); - else { - size_t cnt; - for (cnt = 0; cnt < size && bufNarrow[cnt] != '\0'; ++cnt) - widestring[cnt] = (PM_WCHAR)(unsigned char) bufNarrow[cnt]; +static void +fgetNarrowString(PM_WCHAR * const widestring, + unsigned int const size, + FILE * const ifP, + bool * const eofP, + const char ** const errorP) { + + char * bufNarrow; + char * rc; - widestring[cnt] = L'\0'; + assert(widestring); + assert(size > 0); + + MALLOCARRAY_NOFAIL(bufNarrow, MAXLINECHARS+1); + + rc = fgets(bufNarrow, size, ifP); + + if (rc == NULL) { + if (feof(ifP)) { + *eofP = true; *errorP = NULL; - } - free(bufNarrow); + } else + pm_asprintf(errorP, "Error reading file"); + } else { + size_t cnt; + + for (cnt = 0; cnt < size && bufNarrow[cnt] != '\0'; ++cnt) + widestring[cnt] = (PM_WCHAR)(unsigned char) bufNarrow[cnt]; + + widestring[cnt] = L'\0'; + + *eofP = false; + *errorP = NULL; } + free(bufNarrow); +} + + + +static void +fgetNarrowWideString(PM_WCHAR * const widestring, + unsigned int const size, + FILE * const ifP, + bool * const eofP, + const char ** const errorP) { +/*---------------------------------------------------------------------------- + Return the next line from file *ifP, as *widestring. + + Lines are delimited by newline characters and EOF. + + 'size' is the size in characters of the buffer at *widestring. If the line + to which the file is positioned is longer than that minus 1, we consider it + to be only that long and consider the next character of the actual line to + be the first character of the next line. We leave the file positioned + to that character. + + Return *eofP == true iff we encounter end of file (and therefore don't read + a line). + + If we can't read the file (or sense EOF), return as *errorP a text + explanation of why; otherwise, return *errorP = NULL. + + The line we return is null-terminated. But it also includes any embedded + null characters that are within the line in the file. It is not strictly + possible for Caller to tell whether a null character in *widestring comes + from the file or is the one we put there, so Caller should just ignore any + null character and anything after it. It is also not possible for Caller to + tell if we trunctaed the actual line because of 'size' if there is a null + character in the line. This means there just isn't any way to get + reasonable behavior from this function if the input file contains null + characters (but at least the damage is limited to presenting arbitrary text + as the contents of the file - the program won't crash). + + Null characters never appear within normal text (including wide-character + text). If there is one in the input file, it is probably because the input + is corrupted. + + The line we return may or may not end in a newline character. It ends in a + newline character unless it doesn't fit in 'size' characters or it is the + last line in the file and doesn't end in newline. +-----------------------------------------------------------------------------*/ + /* The limitations described above with respect to null characters in + *ifP are derived from the same limitations in POSIX 'fgets' and + 'fgetws'. To avoid them, we would have to read *ifP one character + at a time with 'fgetc' and 'fgetwc'. + */ + + int const wideCode = fwide(ifP, 0); + /* Width orientation for *ifP: positive means wide, negative means + byte, zero means undecided. + */ + + assert(widestring); + assert(size > 0); + + if (wideCode > 0) + /* *ifP is wide-oriented */ + fgetWideString(widestring, size, ifP, eofP, errorP); + else + fgetNarrowString(widestring, size, ifP, eofP, errorP); } @@ -898,6 +983,10 @@ getText(PM_WCHAR const cmdlineText[], But we return text as only renderable characters - characters in *fontP - with control characters interpreted or otherwise fixed, according to 'fixMode'. + + If *inputTextP indicates Standard Input and Standard Input contains null + characters, we will truncate lines or consider a single line to be multiple + lines. -----------------------------------------------------------------------------*/ struct Text inputText; @@ -934,30 +1023,31 @@ getText(PM_WCHAR const cmdlineText[], for (lineCount = 0, eof = false; !eof; ) { const char * error; - fgetNarrowWideString(buf, MAXLINECHARS, stdin, &error); - if (error) { - /* We're lazy, so we treat any error as EOF */ - pm_strfree(error); - eof = true; - } else { - if (wcslen(buf) + 1 >= MAXLINECHARS) - pm_error( - "Line %u (starting at zero) of input text " - "is longer than %u characters." - "Cannot process", - lineCount, (unsigned int) MAXLINECHARS-1); - if (lineCount >= maxlines) { - maxlines *= 2; - REALLOCARRAY(textArray, maxlines); - if (textArray == NULL) + fgetNarrowWideString(buf, MAXLINECHARS, stdin, &eof, &error); + if (error) + pm_error("Unable to read line %u from file. %s", + lineCount, error); + else { + if (!eof) { + if (wcslen(buf) + 1 >= MAXLINECHARS) + pm_error( + "Line %u (starting at zero) of input text " + "is longer than %u characters." + "Cannot process", + lineCount, (unsigned int) MAXLINECHARS-1); + if (lineCount >= maxlines) { + maxlines *= 2; + REALLOCARRAY(textArray, maxlines); + if (textArray == NULL) + pm_error("out of memory"); + } + fixControlChars(buf, fontP, + (const PM_WCHAR **)&textArray[lineCount], + fixMode); + if (textArray[lineCount] == NULL) pm_error("out of memory"); + ++lineCount; } - fixControlChars(buf, fontP, - (const PM_WCHAR **)&textArray[lineCount], - fixMode); - if (textArray[lineCount] == NULL) - pm_error("out of memory"); - ++lineCount; } } inputText.textArray = textArray; @@ -1305,7 +1395,7 @@ main(int argc, const char *argv[]) { char * newLocale; newLocale = setlocale(LC_ALL, ""); if (!newLocale) - pm_error("Failed to set locale (LC_ALL) from environemt"); + pm_error("Failed to set locale (LC_ALL) from environment"); /* Orient standard input stream to wide */ fwide(stdin, 1); |