about summary refs log tree commit diff
path: root/generator/pbmtextps.c
diff options
context:
space:
mode:
authorgiraffedata <giraffedata@9d0c8265-081b-0410-96cb-a4ca84ce46f8>2023-06-28 17:29:32 +0000
committergiraffedata <giraffedata@9d0c8265-081b-0410-96cb-a4ca84ce46f8>2023-06-28 17:29:32 +0000
commit23ce26f64c34e30951ad9ade2151552ed77e7357 (patch)
treed73b31a0c2f7c7be4a69f8a8e84e00dd39c432b5 /generator/pbmtextps.c
parent1b6e51a266008348ad93ed8b6ac9ec91b5024fea (diff)
downloadnetpbm-mirror-23ce26f64c34e30951ad9ade2151552ed77e7357.tar.gz
netpbm-mirror-23ce26f64c34e30951ad9ade2151552ed77e7357.tar.xz
netpbm-mirror-23ce26f64c34e30951ad9ade2151552ed77e7357.zip
promote Advanced to Stable
git-svn-id: http://svn.code.sf.net/p/netpbm/code/stable@4558 9d0c8265-081b-0410-96cb-a4ca84ce46f8
Diffstat (limited to 'generator/pbmtextps.c')
-rw-r--r--generator/pbmtextps.c450
1 files changed, 386 insertions, 64 deletions
diff --git a/generator/pbmtextps.c b/generator/pbmtextps.c
index f543618d..2a1173de 100644
--- a/generator/pbmtextps.c
+++ b/generator/pbmtextps.c
@@ -27,6 +27,7 @@
 
 #define _XOPEN_SOURCE 500
   /* Make sure popen() is in stdio.h, strdup() is in string.h */
+
 #include <unistd.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -41,6 +42,8 @@
 #include "pm_system.h"
 #include "pbm.h"
 
+enum InputFmt {INPUT_LITERAL, INPUT_ASCIIHEX, INPUT_ASCII85};
+
 static void
 validateFontName(const char * const name) {
 /*-----------------------------------------------------------------------------
@@ -52,6 +55,9 @@ validateFontName(const char * const name) {
 -----------------------------------------------------------------------------*/
     unsigned int idx;
 
+    if (name[0] == '\0')
+        pm_error("Font name is empty string");
+
     for (idx = 0; name[idx] != '\0'; ++idx) {
         char const c = name[idx];
 
@@ -71,8 +77,8 @@ validateFontName(const char * const name) {
 
 
 static void
-asciiHexEncode(char *          const inbuff,
-               char *          const outbuff) {
+asciiHexEncode(const char * const inbuff,
+               char *       const outbuff) {
 /*-----------------------------------------------------------------------------
   Convert the input text string to ASCII-Hex encoding.
 
@@ -83,69 +89,357 @@ asciiHexEncode(char *          const inbuff,
 
     unsigned int idx;
 
+    outbuff[0] = '<';
+
     for (idx = 0; inbuff[idx] != '\0'; ++idx) {
         unsigned int const item = (unsigned char) inbuff[idx];
 
-        outbuff[idx*2]   = hexits[item >> 4];
-        outbuff[idx*2+1] = hexits[item & 0xF];
+        outbuff[1 + idx * 2 + 0] = hexits[item >> 4];
+        outbuff[1 + idx * 2 + 1] = hexits[item & 0xF];
     }
 
-    outbuff[idx * 2] = '\0';
+    if (idx == 0)
+        pm_message("Empty input string");
+
+    outbuff[1 + idx * 2] = '>';
+    outbuff[1 + idx * 2 + 1] = '\0';
 }
 
 
 
 static void
-buildTextFromArgs(int           const argc,
-                  const char ** const argv,
-                  const char ** const asciiHexTextP) {
+failForInvalidChar(char         const c,
+                   const char * const type) {
+
+    if (c >= 0x32 || c < 0x7F)
+        pm_error("Invalid character '%c' in '%s' input string",   c, type);
+    else
+        pm_error("Invalid character 0x%02x in '%s' input string", c, type);
+}
+
+
+
+static void
+formatNonemptyAsciiHex(const char * const inbuff,
+                       char       * const outbuff,
+                       unsigned int const inLen) {
+
+    unsigned int inIdx, outIdx;
+        /* Cursors in input and output buffers, respectively */
+    unsigned int validCharCt;
+        /* Number of valid hex characters we've processed so far in input */
+    unsigned int startIdx, endIdx;
+        /* Limits of input buffer cursor ('inIdx') */
+
+     if (inbuff[0] == '<') {
+         startIdx = 1;
+         endIdx = inLen - 2;
+     } else {
+         startIdx = 0;
+         endIdx = inLen - 1;
+     }
+
+     validCharCt = 0;  /* No valid characters seen yet */
+
+     outIdx = 0;  /* Start at beginning of output buffer */
+
+     outbuff[outIdx++] = '<';
+
+     for (inIdx = startIdx; inIdx <= endIdx; ++inIdx) {
+         switch (inbuff[inIdx]) {
+         case '<':
+         case '>':
+             pm_error("Misplaced character '%c' in Ascii Hex input string",
+                      inbuff[inIdx]);
+           break;
+         case '\f':
+         case '\n':
+         case '\r':
+         case ' ':
+         case '\t':
+             /* ignore whitespace chars */
+             break;
+         default:
+             if ((inbuff [inIdx] >='0' && inbuff [inIdx] <='9') ||
+                 (inbuff [inIdx] >='A' && inbuff [inIdx] <='F') ||
+                 (inbuff [inIdx] >='a' && inbuff [inIdx] <='f')) {
+
+                 outbuff[outIdx++] = inbuff [inIdx];
+                 ++validCharCt;
+             } else
+                 failForInvalidChar(inbuff[inIdx], "Ascii Hex");
+             break;
+         }
+     }
+
+     if (validCharCt == 0)
+         pm_message("Empty Ascii Hex input string");
+     else if (validCharCt % 2 != 0)
+         pm_error("Number of characters in Ascii Hex input string "
+                  "is not even");
+
+     outbuff[outIdx++] = '>';
+     outbuff[outIdx++] = '\0';
+}
+
+
+
+static void
+formatAsciiHexString(const char * const inbuff,
+                     char       * const outbuff,
+                     unsigned int const inLen) {
 /*----------------------------------------------------------------------------
-   Build the array of text to be included in the Postscript program to
-   be rendered, from the arguments of this program.
-
-   We encode it in ASCII-Hex notation as opposed to using the plain text from
-   the command line because 1) the command line might have Postscript control
-   characters in it; and 2) the command line might have text in 8-bit or
-   multibyte code, but a Postscript program is supposed to be entirely
-   printable ASCII characters.
------------------------------------------------------------------------------*/
-    char * text;
+  Format the ASCII Hex input 'inbuff' as a Postscript ASCII Hex string,
+  e.g. "<313233>".  Input can be just the ASCII Hex (e.g. "313233") or already
+  formatted (e.g. "<313233>").  Input may also contain white space, which we
+  ignore -- our output never contains white space.  Though in Postscript, an
+  ASCII NUL character counts as white space, we consider it the end of the
+  input.
+
+  We consider white space outside of the <> delimiters to be an error.
+
+  Abort with error message if there is anything other than valid hex digits in
+  the ASCII hex string proper.  This is necessary to prevent code injection.
+----------------------------------------------------------------------------*/
+    if (inLen == 0 ||
+        (inLen == 2 && inbuff[0] == '<' && inbuff[inLen-1] == '>' )) {
+        pm_message("Empty Ascii Hex input string");
+        strncpy(outbuff, "<>", 3);
+    } else {
+        if (inbuff[0] == '<' && inbuff[inLen-1] != '>' )
+            pm_error("Ascii Hex input string starts with '<' "
+                     "but does not end with '>'");
+        else if (inbuff[0] != '<' && inbuff[inLen-1] == '>' )
+            pm_error("Ascii Hex input string ends with '>' "
+                     "but does not start with '<'");
+
+        formatNonemptyAsciiHex(inbuff, outbuff, inLen);
+    }
+}
+
+
+
+static void
+formatNonemptyAscii85(const char * const inbuff,
+                      char       * const outbuff,
+                      unsigned int const inLen) {
+
+    unsigned int inIdx, outIdx;
+        /* Cursors in input and output buffers, respectively */
+    unsigned int seqPos;
+        /* Position in 5-character Ascii-85 sequence where 'inIdx' points */
+    unsigned int startIdx, endIdx;
+        /* Limits of input buffer cursor ('inIdx') */
+
+    if (inLen > 4 && inbuff[0] == '<' && inbuff[1] == '~' &&
+        inbuff[inLen-2] == '~' && inbuff[inLen-1] == '>') {
+        startIdx = 2;
+        endIdx = inLen - 3;
+    } else {
+        startIdx = 0;
+        endIdx = inLen - 1;
+    }
+
+    seqPos = 0;  /* No 5-character Ascii-85 sequence encountered yet */
+    outIdx = 0;  /* Start filling output buffer from beginning */
+
+    outbuff[outIdx++] = '<';
+    outbuff[outIdx++] = '~';
+
+    for (inIdx = startIdx; inIdx <= endIdx; ++inIdx) {
+      switch (inbuff[inIdx]) {
+      case '~':
+          pm_error("Misplaced character '~' in Ascii 85 input string");
+          break;
+      case '\f':
+      case '\n':
+      case '\r':
+      case ' ':
+      case '\t':
+          break;
+      case 'z':
+          /* z extension */
+          if (seqPos > 0)
+              pm_error("Special 'z' character appears in the middle of a "
+                       "5-character Ascii-85 sequence, which is invalid");
+          else
+              outbuff[outIdx++] = inbuff[inIdx];
+          break;
+        default:
+          /* valid Ascii 85 char */
+          if (inbuff [inIdx] >='!' && inbuff [inIdx] <='u') {
+              outbuff[outIdx++] = inbuff[inIdx];
+              seqPos = (seqPos + 1) % 5;
+          } else
+              failForInvalidChar(inbuff[inIdx], "Ascii 85");
+          break;
+      }
+    }
+
+    if (outIdx == 2) {
+        pm_message("Empty Ascii 85 input string");
+    }
+
+    outbuff[outIdx++]   = '~';
+    outbuff[outIdx++] = '>';
+    outbuff[outIdx++] = '\0';
+}
+
+
+
+static void
+formatAscii85String(const char * const inbuff,
+                    char       * const outbuff,
+                    unsigned int const inLen) {
+/*----------------------------------------------------------------------------
+  Format the Ascii-85 input 'inbuff' as a Postscript Ascii-85 string,
+  e.g. "<~313233~>".  Input can be just the Ascii-85 (e.g. "313233") or
+  already formatted (e.g. "<~313233~>").  Input may also contain white space,
+  which we ignore -- our output never contains white space.  Though in
+  Postscript, an ASCII NUL character counts as white space, we consider it the
+  end of the input.
+
+  We consider white space outside of the <~~> delimiters to be an error.
+
+  Abort with error message if we encounter anything other than valid Ascii-85
+  encoding characters in the string proper.  Note that the Adobe variant
+  does not support the "y" extention.
+----------------------------------------------------------------------------*/
+    if (inLen == 0 || (inLen == 4 && strncmp (inbuff, "<~~>", 4) == 0)) {
+        pm_message("Empty Ascii 85 input string");
+        strncpy(outbuff,"<~~>", 5);
+    } else {
+        if (inLen >= 2) {
+            if (inbuff[0] == '<' && inbuff[1] == '~' &&
+                (inLen < 4 || inbuff[inLen-2] != '~'
+                 || inbuff[inLen-1] != '>' ))
+                pm_error("Ascii 85 input string starts with '<~' "
+                         "but does not end with '~>'");
+            else if (inbuff[inLen-2] == '~' && inbuff[inLen-1] == '>' &&
+                     (inLen < 4 || inbuff[0] != '<' || inbuff[1] != '~'))
+                pm_error("Ascii 85 input string ends with '~>' "
+                         "but does not start with '<~'");
+        }
+        formatNonemptyAscii85(inbuff, outbuff, inLen);
+    }
+}
+
+
+
+static void
+combineArgs(int            const argc,
+            const char **  const argv,
+            const char **  const textP,
+            unsigned int * const textSizeP) {
+
     unsigned int totalTextSize;
+    char * text;
+    size_t * argSize;
     unsigned int i;
+    size_t idx;
+
+    MALLOCARRAY_NOFAIL(argSize, argc);
+        /* argv[0] not accessed; argSize[0] not used */
+
+    for (i = 1, totalTextSize = 0; i < argc; ++i) {
+        argSize[i] = strlen(argv[i]);
+        totalTextSize += argSize[i];
+    }
+
+    totalTextSize = totalTextSize + (argc - 2);  /* adjust for spaces */
+
+    MALLOCARRAY(text, totalTextSize + 1); /* add one for \0 at end */
+    if (text == NULL)
+        pm_error("out of memory allocating buffer for "
+                 "%u characters of text", totalTextSize);
+
+    strncpy(text, argv[1], argSize[1]);
+    for (i = 2, idx = argSize[1]; i < argc; ++i) {
+        text[idx++] = ' ';
+        strncpy(&text[idx], argv[i], argSize[i]);
+        idx += argSize[i];
+    }
+
+    assert(idx == totalTextSize);
+
+    text[idx++] = '\0';
+
+    assert(strlen(text) == totalTextSize);
+
+    *textP     = text;
+    *textSizeP = totalTextSize;
+
+    free(argSize);
+}
+
+
 
-    text = strdup("");
-    totalTextSize = 1;
+static void
+buildTextFromArgs(int           const argc,
+                  const char ** const argv,
+                  const char ** const inputTextP,
+                  enum InputFmt const inputFmt) {
+/*----------------------------------------------------------------------------
+   Build the string of text to be included in the Postscript program to be
+   rendered, from the arguments of this program.
+
+   We encode it in either ASCII-Hex or ASCII-85 as opposed to using the plain
+   text from the command line because 1) the command line might have
+   Postscript control characters in it; and 2) the command line might have
+   text in 8-bit or multibyte code, but a Postscript program is supposed to be
+   entirely printable ASCII characters.
+
+   Official Postscript specifications have a limit on the length of a program
+   line, which means there is a limit on the length of text string such as we
+   generate.  But we don't worry about that because Ghostscript actually
+   accepts very long program lines.
+-----------------------------------------------------------------------------*/
+    const char * text;
+        /* All the arguments ('argv') concatenated */
+    unsigned int textSize;
+        /* Length of 'text' */
 
     if (argc-1 < 1)
         pm_error("No text");
 
-    for (i = 1; i < argc; ++i) {
-        if (i > 1) {
-            totalTextSize += 1;
-            text = realloc(text, totalTextSize);
-            if (text == NULL)
-                pm_error("out of memory");
-            strcat(text, " ");
-        }
-        totalTextSize += strlen(argv[i]);
-        text = realloc(text, totalTextSize);
-        if (text == NULL)
-            pm_error("out of memory");
-        strcat(text, argv[i]);
-    }
+    combineArgs(argc, argv, &text, &textSize);
 
-    {
+    switch (inputFmt) {
+    case INPUT_LITERAL: {
         char * asciiHexText;
 
-        MALLOCARRAY(asciiHexText, totalTextSize * 2);
-
+        MALLOCARRAY(asciiHexText, textSize * 2 + 3);
         if (!asciiHexText)
             pm_error("Unable to allocate memory for hex encoding of %u "
-                     "characters of text", totalTextSize);
+                     "characters of text", textSize);
 
         asciiHexEncode(text, asciiHexText);
-        *asciiHexTextP = asciiHexText;
+        *inputTextP = asciiHexText;
+    } break;
+    case INPUT_ASCIIHEX: {
+        char * asciiHexText;
+
+        MALLOCARRAY(asciiHexText, textSize + 3);
+        if (!asciiHexText)
+            pm_error("Unable to allocate memory for hex encoding of %u "
+                     "characters of text", textSize);
+
+        formatAsciiHexString(text, asciiHexText, textSize);
+        *inputTextP = asciiHexText;
+    } break;
+    case INPUT_ASCII85: {
+        char * ascii85Text;
+
+        MALLOCARRAY(ascii85Text, textSize + 5);
+        if (!ascii85Text)
+            pm_error("Unable to allocate memory for hex encoding of %u "
+                     "characters of text", textSize);
+
+        formatAscii85String(text, ascii85Text, textSize);
+        *inputTextP = ascii85Text;
+    } break;
     }
+
     pm_strfree(text);
 }
 
@@ -155,20 +449,23 @@ struct CmdlineInfo {
     /* All the information the user supplied in the command line,
        in a form easy for the program to use.
     */
-    unsigned int res;
-    float        fontsize;
-    const char * font;
-    float        stroke;
-    float        ascent;
-    float        descent;
-    float        leftmargin;
-    float        rightmargin;
-    float        topmargin;
-    float        bottommargin;
-    unsigned int pad;
-    unsigned int verbose;
-    unsigned int dump;
-    const char * text;
+    unsigned int  res;
+    float         fontsize;
+    const char *  font;
+    float         stroke;
+    float         ascent;
+    float         descent;
+    float         leftmargin;
+    float         rightmargin;
+    float         topmargin;
+    float         bottommargin;
+    unsigned int  pad;
+    unsigned int  verbose;
+    unsigned int  dump;
+    const char *  text;
+        /* Text to render, in Postscript format, either Ascii-hex
+           (e.g. <313233>) or Ascii-85 (e.g. <~aBc-~>)
+        */
 };
 
 
@@ -189,8 +486,9 @@ parseCommandLine(int argc, const char ** argv,
     unsigned int cropSpec, ascentSpec, descentSpec;
     unsigned int leftmarginSpec, rightmarginSpec;
     unsigned int topmarginSpec, bottommarginSpec;
+    unsigned int asciihexSpec, ascii85Spec;
 
-    MALLOCARRAY(option_def, 100);
+    MALLOCARRAY_NOFAIL(option_def, 100);
 
     option_def_index = 0;   /* incremented by OPTENTRY */
     OPTENT3(0, "resolution",    OPT_UINT,
@@ -217,6 +515,10 @@ parseCommandLine(int argc, const char ** argv,
             NULL,                    &cropSpec,                 0);
     OPTENT3(0, "pad",           OPT_FLAG,
             NULL,                    &cmdlineP->pad,            0);
+    OPTENT3(0, "asciihex",      OPT_FLAG,
+            NULL,                    &asciihexSpec,             0);
+    OPTENT3(0, "ascii85",       OPT_FLAG,
+            NULL,                    &ascii85Spec,              0);
     OPTENT3(0, "verbose",       OPT_FLAG,
             NULL,                    &cmdlineP->verbose,        0);
     OPTENT3(0, "dump-ps",       OPT_FLAG,
@@ -233,8 +535,6 @@ parseCommandLine(int argc, const char ** argv,
     cmdlineP->leftmargin  = 0;
     cmdlineP->topmargin   = 0;
     cmdlineP->bottommargin = 0;
-    cropSpec       = FALSE;
-    cmdlineP->pad  = FALSE;
 
     opt.opt_table = option_def;
     opt.short_allowed = FALSE;  /* We have no short (old-fashioned) options */
@@ -244,6 +544,8 @@ parseCommandLine(int argc, const char ** argv,
 
     validateFontName(cmdlineP->font);
 
+    if (cmdlineP->res <= 0)
+        pm_error("-resolution must be positive");
     if (cmdlineP->fontsize <= 0)
         pm_error("-fontsize must be positive");
     if (cmdlineP->ascent < 0)
@@ -259,7 +561,7 @@ parseCommandLine(int argc, const char ** argv,
     if (cmdlineP->bottommargin <0)
         pm_error("-bottommargin must not be negative");
 
-    if (cropSpec == TRUE) {
+    if (cropSpec) {
         if (ascentSpec || descentSpec ||
             leftmarginSpec || rightmarginSpec ||
             topmarginSpec || bottommarginSpec ||
@@ -275,8 +577,24 @@ parseCommandLine(int argc, const char ** argv,
              cmdlineP->leftmargin = cmdlineP->fontsize / 2;
     }
 
-    buildTextFromArgs(argc, argv, &cmdlineP->text);
+    {
+        enum InputFmt inputFmt;
+
+        if (asciihexSpec) {
+            if (ascii85Spec)
+                pm_error("You cannot specify both -asciihex and -ascii85");
+            else
+                inputFmt = INPUT_ASCIIHEX;
+        } else if (ascii85Spec)
+            inputFmt = INPUT_ASCII85;
+        else
+            inputFmt = INPUT_LITERAL;
+
+        if (argc-1 < 1)
+            pm_error("No text");
 
+        buildTextFromArgs(argc, argv, &cmdlineP->text, inputFmt);
+    }
     free(option_def);
 }
 
@@ -325,7 +643,7 @@ postscriptProgram(struct CmdlineInfo const cmdline) {
         "/FindFont {/%s findfont} def\n"
         "/fontsize %f def\n"
         "/pensize %f def\n"
-        "/textstring <%s> def\n"
+        "/textstring %s def\n"
         "/ascent %f def\n"
         "/descent %f def\n"
         "/leftmargin %f def\n"
@@ -386,6 +704,13 @@ postscriptProgram(struct CmdlineInfo const cmdline) {
     const char * retval;
     const char * psVariable;
 
+    /* According to Adobe, maximum line length in a Postscript program is
+       255 characters excluding the newline.  The following may generated a
+       line longer than that if 'cmdline.text' or 'cmdline.font' is long.
+       However, Ghostscript accepts much longer lines, so we don't worry
+       about that.
+    */
+
     pm_asprintf(&psVariable, psTemplate, cmdline.font,
                 cmdline.fontsize, cmdline.stroke, cmdline.text,
                 cmdline.ascent, cmdline.descent,
@@ -413,11 +738,8 @@ gsArgList(const char *       const outputFilename,
     const char ** retval;
     unsigned int argCt;  /* Number of arguments in 'retval' so far */
 
-    if (cmdline.res <= 0)
-         pm_error("Resolution (dpi) must be positive.");
-
-    if (cmdline.fontsize <= 0)
-         pm_error("Font size must be positive.");
+    assert(cmdline.res > 0);
+    assert(cmdline.fontsize > 0);
 
     MALLOCARRAY_NOFAIL(retval, maxArgCt+2);