about summary refs log tree commit diff
path: root/editor/pamthreshold.c
diff options
context:
space:
mode:
authorgiraffedata <giraffedata@9d0c8265-081b-0410-96cb-a4ca84ce46f8>2007-05-20 21:54:33 +0000
committergiraffedata <giraffedata@9d0c8265-081b-0410-96cb-a4ca84ce46f8>2007-05-20 21:54:33 +0000
commit0148a477f88f4b5129a5162a2fe5460ba0c6a8cb (patch)
tree7680dff69a26fb9b4eaafffa7f1a177b3f17069c /editor/pamthreshold.c
parente08f19e5b27ad33f42c79ea5e1a9416f38ed97e6 (diff)
downloadnetpbm-mirror-0148a477f88f4b5129a5162a2fe5460ba0c6a8cb.tar.gz
netpbm-mirror-0148a477f88f4b5129a5162a2fe5460ba0c6a8cb.tar.xz
netpbm-mirror-0148a477f88f4b5129a5162a2fe5460ba0c6a8cb.zip
Fix bogus global threshold calculation
git-svn-id: http://svn.code.sf.net/p/netpbm/code/trunk@303 9d0c8265-081b-0410-96cb-a4ca84ce46f8
Diffstat (limited to 'editor/pamthreshold.c')
-rw-r--r--editor/pamthreshold.c207
1 files changed, 128 insertions, 79 deletions
diff --git a/editor/pamthreshold.c b/editor/pamthreshold.c
index 8f4587e0..1fad18ce 100644
--- a/editor/pamthreshold.c
+++ b/editor/pamthreshold.c
@@ -44,6 +44,7 @@ struct cmdlineInfo {
         /* geometry of local subimage.  Defined only if 'local' or 'dual'
            is true.
         */
+    unsigned int verbose;
 };
 
 
@@ -163,6 +164,8 @@ parseCommandLine(int                 argc,
             &thresholdSpec,         0);
     OPTENT3(0, "contrast",  OPT_FLOAT,  &cmdlineP->contrast,
             &contrastSpec,          0);
+    OPTENT3(0, "verbose",    OPT_FLAG,   NULL,               
+            &cmdlineP->verbose,     0);
 
     /* set the defaults */
     cmdlineP->width = cmdlineP->height = 0U;
@@ -261,6 +264,7 @@ thresholdSimple(struct pam * const inpamP,
 
 static void
 analyzeDistribution(struct pam *          const inpamP,
+                    bool                  const verbose,
                     const unsigned int ** const histogramP,
                     struct range *        const rangeP) {
 /*----------------------------------------------------------------------------
@@ -272,7 +276,8 @@ analyzeDistribution(struct pam *          const inpamP,
    distribution as *histogramP, an array such that histogram[i] is the
    number of pixels that have sample value i.
 
-   Leave the file positioned to the raster.
+   Assume the file is positioned to the raster upon entry and leave
+   it positioned at the same place.
 -----------------------------------------------------------------------------*/
     unsigned int row;
     tuple * inrow;
@@ -312,6 +317,123 @@ analyzeDistribution(struct pam *          const inpamP,
     pnm_freepamrown(inrown);
 
     pm_seek2(inpamP->file, &rasterPos, sizeof(rasterPos));
+
+    if (verbose)
+        pm_message("Pixel values range from %f to %f",
+                   rangeP->min, rangeP->max);
+}
+
+
+
+static void
+computeWhiteBlackMeans(const unsigned int * const histogram,
+                       sample               const maxval,
+                       float                const threshold,
+                       float *              const meanBlackP,
+                       float *              const meanWhiteP) {
+/*----------------------------------------------------------------------------
+  Assuming that histogram[] and 'maxval' describe the pixels of an image,
+  find the mean value of the pixels that are below 'threshold' and
+  that are above 'threshold'.
+-----------------------------------------------------------------------------*/
+    unsigned int nWhite, nBlack;
+        /* Number of would-be-black, would-be-white pixels */
+    uint64_t sumBlack, sumWhite;
+        /* Sum of all the would-be-black, would-be-white pixels */
+    sample gray;
+
+    assert(threshold * maxval <= maxval);
+
+    for (gray = 0, nBlack = 0, sumBlack = 0;
+         gray < threshold * maxval;
+         ++gray) {
+        nBlack += histogram[gray];
+        sumBlack += gray * histogram[gray];
+    }
+    for (nWhite = 0, sumWhite = 0; gray <= maxval; ++gray) {
+        nWhite += histogram[gray];
+        sumWhite += gray * histogram[gray];
+    }
+
+    *meanBlackP = (float)sumBlack / nBlack / maxval;
+    *meanWhiteP = (float)sumWhite / nWhite / maxval;
+}
+
+
+
+static void
+computeGlobalThreshold(struct pam *         const inpamP,
+                       const unsigned int * const histogram,
+                       struct range         const globalRange,
+                       float *              const thresholdP) {
+/*----------------------------------------------------------------------------
+   Compute the proper threshold to use for the image described by
+   *inpamP, whose file is positioned to the raster.
+
+   For our convenience:
+
+     'histogram' describes the frequency of occurence of the various sample
+     values in the image.
+
+     'globalRange' describes the range (minimum, maximum) of sample values
+     in the image.
+
+   Return the threshold (scaled to [0, 1]) as *thresholdP.
+-----------------------------------------------------------------------------*/
+    /* Found this algo in the wikipedia article "Thresholding (image
+       processing)."  It is said to be a special one-dimensional case
+       of the "k-means clustering algorithm."
+
+       The article claims it's proven to converge, by the way.
+       We have an interation limit just as a safety net.
+
+       This code originally implemented a rather different algorithm,
+       while nonetheless carrying the comment that it implemented the
+       Wikipedia article.  I changed it to match Wikipedia in May 2007
+       (at that time there was also a fatal bug in the code, so it
+       didn't implement any intentional algorithm).
+
+       In May 2007, the Wikipedia article described an enhancement of
+       the algorithm that uses a weighted average.  But that enhancement
+       actually reduces the entire thing to: set the threshold to the
+       mean pixel value.  It must be some kind of mistake.  We use the
+       unenhanced version of the algorithm.
+    */
+
+    float threshold;           /* threshold is iteratively determined */
+    float oldthreshold;        /* stop if oldthreshold==threshold */
+    unsigned int iter;         /* count of done iterations */
+
+    assert(betweenZeroAndOne(globalRange.min));
+    assert(betweenZeroAndOne(globalRange.max));
+
+    /* Use middle value (halfway between min and max) as initial threshold */
+    threshold = (globalRange.min + globalRange.max) / 2.0;
+
+    oldthreshold = -1.0;  /* initial value */
+    iter = 0; /* initial value */
+
+    /* adjust threshold to image */
+    while (fabs(oldthreshold - threshold) > 2.0/inpamP->maxval &&
+           iter < MAX_ITERATIONS) {
+        float meanBlack, meanWhite;
+
+        ++iter;
+
+        computeWhiteBlackMeans(histogram, inpamP->maxval, threshold,
+                               &meanBlack, &meanWhite);
+
+        assert(betweenZeroAndOne(meanBlack));
+        assert(betweenZeroAndOne(meanWhite));
+
+        oldthreshold = threshold;
+
+        threshold = (meanBlack + meanWhite) / 2;
+    }
+
+    assert(betweenZeroAndOne(threshold));
+
+    *thresholdP = threshold;
 }
 
 
@@ -405,80 +527,6 @@ thresholdLocalRow(struct pam *       const inpamP,
 
 
 static void
-computeGlobalThreshold(struct pam *         const inpamP,
-                       const unsigned int * const histogram,
-                       struct range         const globalRange,
-                       float *              const thresholdP) {
-/*----------------------------------------------------------------------------
-   Compute the proper threshold to use for the image described by
-   *inpamP, whose file is positioned to the raster.
-
-   For our convenience:
-
-     'histogram' describes the frequency of occurence of the various sample
-     values in the image.
-
-     'globalRange' describes the range (minimum, maximum) of sample values
-     in the image.
-
-   Return the threshold (scaled to [0, 1]) as *thresholdP.
-
-   Leave the file positioned to the raster.
------------------------------------------------------------------------------*/
-    /* Found this algo in the wikipedia article "Thresholding (image
-       processing)"
-    */
-
-    float threshold;           /* threshold is iteratively determined */
-    float oldthreshold;        /* stop if oldthreshold==threshold */
-    unsigned int iter;         /* count of done iterations */
-
-    assert(betweenZeroAndOne(globalRange.min));
-    assert(betweenZeroAndOne(globalRange.max));
-
-    /* Use middle value (halfway between min and max) as initial threshold */
-    threshold = (globalRange.min + globalRange.max) / 2.0;
-
-    oldthreshold = -1.0;  /* initial value */
-    iter = 0; /* initial value */
-
-    /* adjust threshold to image */
-    while (fabs(oldthreshold - threshold) > 0.01 && iter < MAX_ITERATIONS) {
-        unsigned long white, black;   /* number of white, black pixels */
-        unsigned int row;
-
-        ++iter;
-
-        /* count black and white pixels */
-
-        for (row = 0, white = 0, black = 0; row < inpamP->height; ++row) {
-            unsigned int col;
-
-            for(col = 0; col < threshold * inpamP->maxval; ++col)
-                black += histogram[col];
-            for(; col <= inpamP->maxval; ++col)
-                white += histogram[col];
-        }
-
-        oldthreshold = threshold;
-
-        /* Use the weighted average of black and white pixels to calculate new
-           threshold
-        */
-        threshold =
-            (black * (globalRange.min + threshold) / 2.0 +
-             white * (threshold + globalRange.max) / 2.0) /
-            (black + white);
-        
-        assert(betweenZeroAndOne(threshold ));
-    }
-
-    *thresholdP = threshold;
-}
-
-
-
-static void
 thresholdLocal(struct pam *       const inpamP,
                struct pam *       const outpamP,
                struct cmdlineInfo const cmdline) {
@@ -532,7 +580,7 @@ thresholdLocal(struct pam *       const inpamP,
 
     /* global information is needed for dual thresholding */
     if (cmdline.dual) {
-        analyzeDistribution(inpamP, &histogram, &globalRange);
+        analyzeDistribution(inpamP, cmdline.verbose, &histogram, &globalRange);
         computeGlobalThreshold(inpamP, histogram, globalRange,
                                &globalThreshold);
     } else {
@@ -580,13 +628,14 @@ thresholdLocal(struct pam *       const inpamP,
 
 static void
 thresholdIterative(struct pam * const inpamP,
-                   struct pam * const outpamP) {
+                   struct pam * const outpamP,
+                   bool         const verbose) {
 
     const unsigned int * histogram;
     struct range globalRange;
     samplen threshold;
 
-    analyzeDistribution(inpamP, &histogram, &globalRange);
+    analyzeDistribution(inpamP, verbose, &histogram, &globalRange);
 
     computeGlobalThreshold(inpamP, histogram, globalRange, &threshold);
 
@@ -641,7 +690,7 @@ main(int argc, char **argv) {
         else if (cmdline.local || cmdline.dual)
             thresholdLocal(&inpam, &outpam, cmdline);
         else
-            thresholdIterative(&inpam, &outpam);
+            thresholdIterative(&inpam, &outpam, cmdline.verbose);
 
         pnm_nextimage(ifP, &eof);
     }