about summary refs log tree commit diff
path: root/editor/pamthreshold.c
diff options
context:
space:
mode:
Diffstat (limited to 'editor/pamthreshold.c')
-rw-r--r--editor/pamthreshold.c623
1 files changed, 623 insertions, 0 deletions
diff --git a/editor/pamthreshold.c b/editor/pamthreshold.c
new file mode 100644
index 00000000..40260e79
--- /dev/null
+++ b/editor/pamthreshold.c
@@ -0,0 +1,623 @@
+/* pamthreshold - convert a Netpbm image to black and white by thresholding  */
+
+/* This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+/* Copyright (C) 2006 Erik Auerswald
+ * auerswal@unix-ag.uni-kl.de */
+
+#include <assert.h>
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "mallocvar.h"
+#include "nstring.h"
+#include "shhopt.h"
+#include "pam.h"
+
+#define MAX_ITERATIONS 100             /* stop after at most 100 iterations */
+
+struct cmdlineInfo {
+    /* All the information the user supplied in the command line,
+       in a form easy for the program to use.
+    */
+    const char * inputFileName;
+    unsigned int simple;
+    float        threshold;
+    bool         local;
+    bool         dual;
+    float        contrast;
+    unsigned int width, height;
+        /* geometry of local subimage.  Defined only if 'local' or 'dual'
+           is true.
+        */
+};
+
+
+
+struct range {
+    /* A range of sample values, normalized to [0, 1] */
+    samplen min;
+    samplen max;
+};
+
+
+
+static void
+initRange(struct range * const rangeP) {
+
+    /* Initialize to "undefined" state */
+    rangeP->min = 1.0;
+    rangeP->max = 0.0;
+}
+
+          
+
+static void
+addToRange(struct range * const rangeP,
+           samplen        const newSample) {
+
+    rangeP->min = MIN(newSample, rangeP->min);
+    rangeP->max = MAX(newSample, rangeP->max);
+}
+
+
+
+static float
+spread(struct range const range) {
+
+    assert(range.max >= range.min);
+    
+    return range.max - range.min;
+}
+
+
+
+static void
+parseGeometry(const char *   const wxl,
+              unsigned int * const widthP,
+              unsigned int * const heightP,
+              const char **  const errorP) {
+
+    char * const xPos = strchr(wxl, 'x');
+    if (!xPos)
+        asprintfN(errorP, "There is no 'x'.  It should be WIDTHxHEIGHT");
+    else {
+        *widthP  = atoi(wxl);
+        *heightP = atoi(xPos + 1);
+
+        if (*widthP == 0)
+            asprintfN(errorP, "Width is zero.");
+        else if (*heightP == 0)
+            asprintfN(errorP, "Height is zero.");
+        else
+            *errorP = NULL;
+    }
+}
+
+
+
+static void
+parseCommandLine(int                 argc, 
+                 char **             argv,
+                 struct cmdlineInfo *cmdlineP ) {
+/*----------------------------------------------------------------------------
+   Parse program command line described in Unix standard form by argc
+   and argv.  Return the information in the options as *cmdlineP.  
+
+   If command line is internally inconsistent (invalid options, etc.),
+   issue error message to stderr and abort program.
+
+   Note that the strings we return are stored in the storage that
+   was passed to us as the argv array.  We also trash *argv.
+-----------------------------------------------------------------------------*/
+    /* vars for the option parser */
+    optEntry * option_def;
+    optStruct3 opt;
+    unsigned int option_def_index = 0;   /* incremented by OPTENT3 */
+
+    unsigned int thresholdSpec, localSpec, dualSpec, contrastSpec;
+    const char * localOpt;
+    const char * dualOpt;
+
+    MALLOCARRAY_NOFAIL(option_def, 100);
+
+    /* define the options */
+    OPTENT3(0, "simple",    OPT_FLAG,   NULL,               
+            &cmdlineP->simple,      0);
+    OPTENT3(0, "local",     OPT_STRING, &localOpt,
+            &localSpec,             0);
+    OPTENT3(0, "dual",      OPT_STRING, &dualOpt,
+            &dualSpec,              0);
+    OPTENT3(0, "threshold", OPT_FLOAT,  &cmdlineP->threshold,
+            &thresholdSpec,         0);
+    OPTENT3(0, "contrast",  OPT_FLOAT,  &cmdlineP->contrast,
+            &contrastSpec,          0);
+
+    /* set the defaults */
+    cmdlineP->width = cmdlineP->height = 0U;
+
+    /* set the option description for optParseOptions3 */
+    opt.opt_table     = option_def;
+    opt.short_allowed = FALSE;           /* long options only */
+    opt.allowNegNum   = FALSE;           /* we have no numbers at all */
+
+    /* parse commandline, change argc, argv, and *cmdlineP */
+    optParseOptions3(&argc, argv, opt, sizeof(opt), 0);
+
+    if (cmdlineP->simple + localSpec + dualSpec > 1)
+        pm_error("You may specify only one of -simple, -local, and -dual");
+
+    if (!thresholdSpec)
+        cmdlineP->threshold = 0.5;
+
+    /* 0 <= threshold <= 1 */
+    if ((cmdlineP->threshold < 0.0) || (cmdlineP->threshold > 1.0))
+        pm_error("threshold must be in [0,1]");
+
+    if (!contrastSpec)
+        cmdlineP->contrast = 0.05;
+
+    /* 0 <= contrast <= 1 */
+    if ((cmdlineP->contrast < 0.0) || (cmdlineP->contrast > 1.0))
+        pm_error("contrast must be in [0,1]");
+
+    if (localSpec) {
+        const char * error;
+        cmdlineP->local = TRUE;
+
+        parseGeometry(localOpt, &cmdlineP->width, &cmdlineP->height, &error);
+
+        if (error) {
+            pm_error("Invalid -local value '%s'.  %s", localOpt, error);
+            strfree(error);
+        }
+    } else
+        cmdlineP->local = FALSE;
+
+    if (dualSpec) {
+        const char * error;
+        cmdlineP->dual = TRUE;
+
+        parseGeometry(dualOpt, &cmdlineP->width, &cmdlineP->height, &error);
+
+        if (error) {
+            pm_error("Invalid -dual value '%s'.  %s", dualOpt, error);
+            strfree(error);
+        }
+    } else
+        cmdlineP->dual = FALSE;
+
+    if (argc-1 < 1)
+        cmdlineP->inputFileName = "-";
+    else if (argc-1 == 1)
+        cmdlineP->inputFileName = argv[1];
+    else 
+        pm_error("Progam takes at most 1 parameter: the file name.  "
+                 "You specified %d", argc-1);
+}
+
+
+
+/* simple thresholding (the same as in pamditherbw) */
+
+static void
+thresholdSimple(struct pam * const inpamP,
+                struct pam * const outpamP,
+                float        const threshold) {
+
+    tuplen * inrow;    /* normalized input row */
+    tuple * outrow;    /* raw output row */
+    unsigned int row;  /* number of the current row */
+
+    inrow  = pnm_allocpamrown(inpamP);
+    outrow = pnm_allocpamrow(outpamP);
+
+    /* do the simple thresholding */
+    for (row = 0; row < inpamP->height; ++row) {
+        unsigned int col;
+        pnm_readpamrown(inpamP, inrow);
+        for (col = 0; col < inpamP->width; ++col)
+            outrow[col][0] =
+                inrow[col][0] >= threshold ? PAM_BW_WHITE : PAM_BLACK;
+        pnm_writepamrow(outpamP, outrow);
+    }
+
+    pnm_freepamrow(inrow);
+    pnm_freepamrow(outrow);
+}
+
+
+
+static void
+analyzeDistribution(struct pam *          const inpamP,
+                    const unsigned int ** const histogramP,
+                    struct range *        const rangeP) {
+/*----------------------------------------------------------------------------
+   Find the distribution of the sample values -- minimum, maximum, and
+   how many of each value -- in input image *inpamP, whose file is
+   positioned to the raster.
+
+   Return the minimum and maximum as *rangeP and the frequency
+   distribution as *histogramP, an array such that histogram[i] is the
+   number of pixels that have sample value i.
+
+   Leave the file positioned to the raster.
+-----------------------------------------------------------------------------*/
+    unsigned int row;
+    tuple * inrow;
+    tuplen * inrown;
+    unsigned int * histogram;  /* malloced array */
+    unsigned int i;
+
+    pm_filepos rasterPos;      /* Position in input file of the raster */
+
+    pm_tell2(inpamP->file, &rasterPos, sizeof(rasterPos));
+
+    inrow = pnm_allocpamrow(inpamP);
+    inrown = pnm_allocpamrown(inpamP);
+    MALLOCARRAY(histogram, inpamP->maxval+1);
+    if (histogram == NULL)
+        pm_error("Unable to allocate space for %lu-entry histogram",
+                 inpamP->maxval+1);
+
+    /* Initialize histogram -- zero occurences of everything */
+    for (i = 0; i <= inpamP->maxval; ++i)
+        histogram[i] = 0;
+
+    initRange(rangeP);
+
+    for (row = 0; row < inpamP->height; ++row) {
+        unsigned int col;
+        pnm_readpamrow(inpamP, inrow);
+        pnm_normalizeRow(inpamP, inrow, NULL, inrown);
+        for (col = 0; col < inpamP->width; ++col) {
+            ++histogram[inrow[col][0]];
+            addToRange(rangeP, inrown[col][0]);
+        }
+    }
+    *histogramP = histogram;
+
+    pnm_freepamrow(inrow);
+    pnm_freepamrown(inrown);
+
+    pm_seek2(inpamP->file, &rasterPos, sizeof(rasterPos));
+}
+
+
+
+static void
+getLocalThreshold(tuplen **    const inrows,
+                  unsigned int const windowWidth,
+                  unsigned int const x,
+                  unsigned int const localWidth,
+                  unsigned int const localHeight,
+                  float        const darkness,
+                  float        const minSpread,
+                  samplen      const defaultThreshold,
+                  samplen *    const thresholdP) {
+/*----------------------------------------------------------------------------
+  Find a suitable threshold in local area around one pixel.
+
+  inrows[][] is a an array of 'windowWidth' pixels by 'localHeight'.
+
+  'x' is a column number within the window.
+
+  We look at the rectangle consisting of the 'localWidth' columns
+  surrounding x, all rows.  If x is near the left or right edge, we truncate
+  the window as needed.
+
+  We base the threshold on the local spread (difference between minimum
+  and maximum sample values in the local areas) and the 'darkness'
+  factor.  A higher 'darkness' gets a higher threshold.
+
+  If the spread is less than 'minSpread', we return 'defaultThreshold' and
+  'darkness' is irrelevant.
+
+  'localWidth' must be odd.
+-----------------------------------------------------------------------------*/
+    unsigned int const startCol = x >= localWidth/2 ? x - localWidth/2 : 0;
+
+    unsigned int col;
+    struct range localRange;
+
+    assert(localWidth % 2 == 1);  /* entry condition */
+
+    initRange(&localRange);
+
+    for (col = startCol; col <= x + localWidth/2 && col < windowWidth; ++col) {
+        unsigned int row;
+
+        for (row = 0; row < localHeight; ++row)
+            addToRange(&localRange, inrows[row][col][0]);
+    }
+
+    if (spread(localRange) < minSpread)
+        *thresholdP = defaultThreshold;
+    else
+        *thresholdP = localRange.min + darkness * spread(localRange);
+}
+
+
+
+static void
+thresholdLocalRow(struct pam *       const inpamP,
+                  tuplen **          const inrows,
+                  unsigned int       const localWidth,
+                  unsigned int       const windowHeight,
+                  unsigned int       const row,
+                  struct cmdlineInfo const cmdline,
+                  struct range       const globalRange,
+                  samplen            const globalThreshold,
+                  tuple *            const outrow) {
+
+    tuplen * const inrow = inrows[row % windowHeight];
+
+    float minSpread;
+    unsigned int col;
+
+    if (cmdline.dual)
+        minSpread = cmdline.contrast * spread(globalRange);
+    else
+        minSpread = 0.0;
+
+    for (col = 0; col < inpamP->width; ++col) {
+        samplen threshold;
+
+        getLocalThreshold(inrows, inpamP->width, col, localWidth, windowHeight,
+                          cmdline.threshold, minSpread, globalThreshold,
+                          &threshold);
+        
+        outrow[col][0] = inrow[col][0] >= threshold ? PAM_BW_WHITE : PAM_BLACK;
+    }
+}
+
+
+
+static void
+computeGlobalThreshold(struct pam *         const inpamP,
+                       const unsigned int * const histogram,
+                       struct range         const globalRange,
+                       float *              const thresholdP) {
+/*----------------------------------------------------------------------------
+   Compute the proper threshold to use for the image described by
+   *inpamP, whose file is positioned to the raster.
+
+   For our convenience:
+
+     'histogram' describes the frequency of occurence of the various sample
+     values in the image.
+
+     'globalRange' describes the range (minimum, maximum) of sample values
+     in the image.
+
+   Return the threshold (scaled to [0, 1]) as *thresholdP.
+
+   Leave the file positioned to the raster.
+-----------------------------------------------------------------------------*/
+    /* Found this algo in the wikipedia article "Thresholding (image
+       processing)"
+    */
+
+    float threshold;           /* threshold is iteratively determined */
+    float oldthreshold;        /* stop if oldthreshold==threshold */
+    unsigned int iter;         /* count of done iterations */
+
+    /* Use middle value (halfway between min and max) as initial threshold */
+    threshold = (globalRange.min + globalRange.max) / 2.0;
+
+    oldthreshold = -1.0;  /* initial value */
+    iter = 0; /* initial value */
+
+    /* adjust threshold to image */
+    while (fabs(oldthreshold - threshold) > 0.01 && iter < MAX_ITERATIONS) {
+        unsigned long white, black;   /* number of white, black pixels */
+        unsigned int row;
+
+        ++iter;
+
+        /* count black and white pixels */
+
+        for (row = 0, white = 0, black = 0; row < inpamP->height; ++row) {
+            unsigned int col;
+
+            for(col = 0; col < threshold * inpamP->maxval; ++col)
+                black += histogram[col];
+            for(; col <= inpamP->maxval; ++col)
+                white += histogram[col];
+        }
+
+        oldthreshold = threshold;
+
+        /* Use the weighted average of black and white pixels to calculate new
+           threshold
+        */
+        threshold =
+            (black * (globalRange.min + threshold) / 2.0 +
+             white * (threshold + globalRange.max) / 2.0) /
+            (black + white);
+    }
+
+    *thresholdP = threshold;
+}
+
+
+
+static void
+thresholdLocal(struct pam *       const inpamP,
+               struct pam *       const outpamP,
+               struct cmdlineInfo const cmdline) {
+/*----------------------------------------------------------------------------
+  Threshold the image described by *inpamP, whose file is positioned to the
+  raster, and output the resulting raster to the image described by
+  *outpamP.
+
+  Use local adaptive thresholding aka dynamic thresholding or dual
+  thresholding (global for low contrast areas, LAT otherwise)
+-----------------------------------------------------------------------------*/
+    struct range globalRange; /* Range of sample values in entire image */
+    tuplen ** inrows;
+        /* vertical window of image containing the local area.  This is
+           a ring of 'windowHeight' rows.  Row R of the image, when it is
+           in the window, is inrows[R % windowHeight].
+        */
+    unsigned int windowHeight;  /* size of 'inrows' window */
+    unsigned int nextRowToRead;
+        /* Number of the next row to be read from the file into the inrows[]
+           buffer.
+        */
+    tuple * outrow;           /* raw output row */
+    unsigned int row;
+        /* Number of the current row.  The current row is normally the
+           one in the center of the inrows[] buffer (which has an actual
+           center row because it is of odd height), but when near the top
+           and bottom edge of the image, it is not.
+        */
+    const unsigned int * histogram;
+    samplen globalThreshold;
+        /* This is a threshold based on the entire image, to use in areas
+           where the contrast is too small to use a locally-derived threshold.
+        */
+    unsigned int oddLocalWidth;
+    unsigned int oddLocalHeight;
+    unsigned int i;
+    
+    /* use a subimage with odd width and height to have a middle pixel */
+
+    if (cmdline.width % 2 == 0)
+        oddLocalWidth = cmdline.width + 1;
+    else 
+        oddLocalWidth = cmdline.width;
+    if (cmdline.height % 2 == 0)
+        oddLocalHeight = cmdline.height + 1;
+    else
+        oddLocalHeight = cmdline.height;
+
+    windowHeight = MIN(oddLocalHeight, inpamP->height);
+
+    analyzeDistribution(inpamP, &histogram, &globalRange);
+
+    computeGlobalThreshold(inpamP, histogram, globalRange, &globalThreshold);
+
+    outrow = pnm_allocpamrow(outpamP);
+
+    MALLOCARRAY(inrows, windowHeight);
+
+    if (inrows == NULL)
+        pm_error("Unable to allocate memory for a %u-row array", windowHeight);
+
+    for (i = 0; i < windowHeight; ++i)
+        inrows[i] = pnm_allocpamrown(inpamP);
+
+    /* Fill the vertical window buffer */
+    nextRowToRead = 0;
+
+    while (nextRowToRead < windowHeight)
+        pnm_readpamrown(inpamP, inrows[nextRowToRead++ % windowHeight]);
+
+    for (row = 0; row < inpamP->height; ++row) {
+        thresholdLocalRow(inpamP, inrows, oddLocalWidth, windowHeight, row,
+                          cmdline, globalRange, globalThreshold, outrow);
+
+        pnm_writepamrow(outpamP, outrow);
+        
+        /* read next image line if available and necessary */
+        if (row + windowHeight / 2 >= nextRowToRead &&
+            nextRowToRead < inpamP->height)
+            pnm_readpamrown(inpamP, inrows[nextRowToRead++ % windowHeight]);
+    }
+
+    free((void*)histogram);
+    for (i = 0; i < windowHeight; ++i)
+        pnm_freepamrow(inrows[i]);
+    free(inrows);
+    pnm_freepamrow(outrow);
+}
+
+
+
+static void
+thresholdIterative(struct pam * const inpamP,
+                   struct pam * const outpamP) {
+
+    const unsigned int * histogram;
+    struct range globalRange;
+    samplen threshold;
+
+    analyzeDistribution(inpamP, &histogram, &globalRange);
+
+    computeGlobalThreshold(inpamP, histogram, globalRange, &threshold);
+
+    pm_message("using global threshold %4.2f", threshold);
+
+    thresholdSimple(inpamP, outpamP, threshold);
+}
+
+
+
+int
+main(int argc, char **argv) {
+
+    FILE * ifP; 
+    struct cmdlineInfo cmdline;
+    struct pam inpam, outpam;
+    bool eof;  /* No more images in input stream */
+
+    pnm_init(&argc, argv);
+
+    parseCommandLine(argc, argv, &cmdline);
+
+    if (cmdline.simple)
+        ifP = pm_openr(cmdline.inputFileName);
+    else
+        ifP = pm_openr_seekable(cmdline.inputFileName);
+
+    /* threshold each image in the PAM file */
+    eof = FALSE;
+    while (!eof) {
+        pnm_readpaminit(ifP, &inpam, PAM_STRUCT_SIZE(tuple_type));
+
+        /* set output image parameters for a bilevel image */
+        outpam.size        = sizeof(outpam);
+        outpam.len         = PAM_STRUCT_SIZE(tuple_type);
+        outpam.file        = stdout;
+        outpam.format      = PAM_FORMAT;
+        outpam.plainformat = 0;
+        outpam.height      = inpam.height;
+        outpam.width       = inpam.width;
+        outpam.depth       = 1;
+        outpam.maxval      = 1;
+        outpam.bytes_per_sample = 1;
+        strcpy(outpam.tuple_type, "BLACKANDWHITE");
+
+        pnm_writepaminit(&outpam);
+
+        /* do the thresholding */
+
+        if (cmdline.simple)
+            thresholdSimple(&inpam, &outpam, cmdline.threshold);
+        else if (cmdline.local || cmdline.dual)
+            thresholdLocal(&inpam, &outpam, cmdline);
+        else
+            thresholdIterative(&inpam, &outpam);
+
+        pnm_nextimage(ifP, &eof);
+    }
+
+    pm_close(ifP);
+
+    return 0;
+}