about summary refs log tree commit diff
path: root/analyzer/pgmhist.c
diff options
context:
space:
mode:
authorgiraffedata <giraffedata@9d0c8265-081b-0410-96cb-a4ca84ce46f8>2014-03-01 20:56:07 +0000
committergiraffedata <giraffedata@9d0c8265-081b-0410-96cb-a4ca84ce46f8>2014-03-01 20:56:07 +0000
commit8258d933e01b90d57758152cc0cde7345e950ea7 (patch)
tree8ef37c6c6b87b7cca17e0431264f41e255186671 /analyzer/pgmhist.c
parente73609bf2f5770aaea795f369c67ee8dab6457a1 (diff)
downloadnetpbm-mirror-8258d933e01b90d57758152cc0cde7345e950ea7.tar.gz
netpbm-mirror-8258d933e01b90d57758152cc0cde7345e950ea7.tar.xz
netpbm-mirror-8258d933e01b90d57758152cc0cde7345e950ea7.zip
Add -forensic
git-svn-id: http://svn.code.sf.net/p/netpbm/code/trunk@2145 9d0c8265-081b-0410-96cb-a4ca84ce46f8
Diffstat (limited to 'analyzer/pgmhist.c')
-rw-r--r--analyzer/pgmhist.c318
1 files changed, 228 insertions, 90 deletions
diff --git a/analyzer/pgmhist.c b/analyzer/pgmhist.c
index fb16d097..9498061e 100644
--- a/analyzer/pgmhist.c
+++ b/analyzer/pgmhist.c
@@ -20,7 +20,7 @@
 
 
 
-struct cmdline_info {
+struct CmdlineInfo {
     /* All the information the user supplied in the command line,
        in a form easy for the program to use.
     */
@@ -29,13 +29,14 @@ struct cmdline_info {
     unsigned int median;
     unsigned int quartile;
     unsigned int decile;
+    unsigned int forensic;
 };
 
 
 
 static void
 parseCommandLine(int argc, const char ** argv,
-                 struct cmdline_info * const cmdlineP) {
+                 struct CmdlineInfo * const cmdlineP) {
 /*----------------------------------------------------------------------------
    Note that the file spec array we return is stored in the storage that
    was passed to us as the argv array.
@@ -43,7 +44,7 @@ parseCommandLine(int argc, const char ** argv,
     optStruct3 opt;  /* set by OPTENT3 */
     optEntry * option_def;
     unsigned int option_def_index;
-    
+
     MALLOCARRAY_NOFAIL(option_def, 100);
 
     option_def_index = 0;   /* incremented by OPTENT3 */
@@ -56,6 +57,8 @@ parseCommandLine(int argc, const char ** argv,
             &cmdlineP->quartile,            0);
     OPTENT3(0,   "decile",        OPT_FLAG,  NULL,
             &cmdlineP->decile,              0);
+    OPTENT3(0,   "forensic",      OPT_FLAG,  NULL,
+            &cmdlineP->forensic,            0);
 
     opt.opt_table = option_def;
     opt.short_allowed = FALSE;  /* We have no short (old-fashioned) options */
@@ -68,7 +71,7 @@ parseCommandLine(int argc, const char ** argv,
         pm_error("You may specify only one of -median, -quartile, "
                  "and -decile");
 
-    if (argc-1 == 0) 
+    if (argc-1 == 0)
         cmdlineP->inputFileName = "-";
     else if (argc-1 != 1)
         pm_error("Program takes zero or one argument (filename).  You "
@@ -81,39 +84,80 @@ parseCommandLine(int argc, const char ** argv,
 
 
 
+static gray
+universalMaxval(gray const maxval,
+                int  const format) {
+/*----------------------------------------------------------------------------
+  A maxval that makes it impossible for a pixel to be invalid in an image that
+  states it maxval as 'maxval' and has format 'format'.
+
+  E.g. in a one-byte-per-sample image, it's not possible to read a sample
+  value greater than 255, so a maxval of 255 makes it impossible for a sample
+  to be invalid.
+
+  But: we never go above 65535, which means our maxval isn't entirely
+  universal.  If the image is plain PGM, it could contain a pixel that
+  exceeds even that.
+-----------------------------------------------------------------------------*/
+    assert(0 < maxval && maxval < 65536);
+
+    if (format == RPGM_FORMAT) {
+        /*  Raw PGM stream has either one or two bytes per pixel, depending
+            upon its stated maxval.
+        */
+        if (maxval > 255)
+            return 65535;
+        else
+            return 255;
+    } else if (format == RPBM_FORMAT) {
+        /* A Raw PBM stream has one bit per pixel, which libnetpbm renders
+           as 0 or 255 when we read it.
+        */
+        assert(maxval == 255);
+        return 255;
+    } else {
+        /* A plain PGM or PBM stream has essentially unlimited range in the
+           tokens that are supposed to be sample values.  We arbitrarily draw
+           the line at 65535.
+        */
+        return 65535;
+    }
+}
+
+
+
 static void
-buildHistogram(FILE *          const ifP,
-               unsigned int ** const histP,
-               gray *          const maxvalP) {
+buildHistogram(FILE *               const ifP,
+               int                  const format,
+               unsigned int         const cols,
+               unsigned int         const rows,
+               gray                 const mmaxval,
+               unsigned long int ** const histP) {
+/*----------------------------------------------------------------------------
+   Compute the histogram of sample values in the input stream *ifP as *histP,
+   in newly malloced storage.
 
+   Assume the image maxval is 'mmaxval'.  Assume *ifP is positioned to the
+   start of the raster.
+-----------------------------------------------------------------------------*/
     gray * grayrow;
-    int rows, cols;
-    int format;
     unsigned int row;
     unsigned int i;
-    unsigned int * hist;  /* malloc'ed array */
-    gray maxval;
-
-    pgm_readpgminit(ifP, &cols, &rows, &maxval, &format);
-
-    if (UINT_MAX / cols < rows)
-        pm_error("Too many pixels (%u x %u) in image.  "
-                 "Maximum computable is %u",
-                 cols, rows, UINT_MAX);
+    unsigned long int * hist;  /* malloc'ed array */
 
     grayrow = pgm_allocrow(cols);
 
-    MALLOCARRAY(hist, maxval + 1);
+    MALLOCARRAY(hist, mmaxval + 1);
     if (hist == NULL)
         pm_error("out of memory");
 
-    for (i = 0; i <= maxval; ++i)
+    for (i = 0; i <= mmaxval; ++i)
         hist[i] = 0;
 
     for (row = 0; row < rows; ++row) {
         unsigned int col;
 
-        pgm_readpgmrow(ifP, grayrow, cols, maxval, format);
+        pgm_readpgmrow(ifP, grayrow, cols, mmaxval, format);
 
         for (col = 0; col < cols; ++col) {
             /* Because total pixels in image is limited: */
@@ -124,49 +168,31 @@ buildHistogram(FILE *          const ifP,
     }
     pgm_freerow(grayrow);
 
-    *histP   = hist;
-    *maxvalP = maxval;
-}
-
-
-
-static unsigned int
-sum(unsigned int const hist[],
-    gray         const maxval) {
-
-    unsigned int sum;
-    unsigned int sampleVal;
-
-    for (sampleVal = 0, sum = 0; sampleVal <= maxval; ++sampleVal)
-        sum += hist[sampleVal];
-
-    return sum;
+    *histP = hist;
 }
 
 
 
 static void
-findQuantiles(unsigned int const n,
-              unsigned int const hist[],
-              gray         const maxval,
-              gray *       const quantile) {
+findQuantiles(unsigned int      const n,
+              unsigned long int const hist[],
+              unsigned long int const totalCt,
+              gray              const mmaxval,
+              gray *            const quantile) {
 /*----------------------------------------------------------------------------
    Find the order-n quantiles (e.g. n == 4 means quartiles) of the pixel
    sample values, given that hist[] is the histogram of them (hist[N] is the
    number of pixels that have sample value N).
 
-   'maxval' is the maxval of the image, so the size of hist[] is 'maxval' + 1.
+   'mmaxval' is the highest index in hist[] (so its size is 'maxval' + 1,
+   and there are no pixels greater than 'mmaxval' in the image).
 
    We return the ith quantile as quantile[i].  For example, for quartiles,
    quantile[3] is the least sample value for which at least 3/4 of the pixels
-   are less than or equal to it.  
+   are less than or equal to it.
 
    quantile[] must be allocated at least to size 'n'.
-
-   We return 
 -----------------------------------------------------------------------------*/
-    unsigned int const totalCt = sum(hist, maxval);
-
     unsigned int quantSeq;
         /* 0 is first quantile, 1 is second quantile, etc. */
 
@@ -191,8 +217,8 @@ findQuantiles(unsigned int const n,
 
         assert(quantCt <= totalCt);
 
-        /* at sampleVal == maxval, cumCt == totalCt, so because
-           quantCt <= 'totalCt', 'sampleVal' cannot go above maxval.
+        /* at sampleVal == mmaxval, cumCt == totalCt, so because
+           quantCt <= 'totalCt', 'sampleVal' cannot go above mmaxval.
         */
 
         while (cumCt < quantCt) {
@@ -200,7 +226,7 @@ findQuantiles(unsigned int const n,
             cumCt += hist[sampleVal];
         }
 
-        assert(sampleVal <= maxval);
+        assert(sampleVal <= mmaxval);
 
         /* 'sampleVal' is the lowest sample value for which at least 'quantCt'
            pixels have that sample value or less.  'cumCt' is the number
@@ -213,9 +239,10 @@ findQuantiles(unsigned int const n,
 
 
 static void
-countCumulative(unsigned int    const hist[],
-                gray            const maxval,
-                unsigned int ** const rcountP) {
+countCumulative(unsigned long int    const hist[],
+                gray                 const mmaxval,
+                unsigned long int    const totalPixelCt,
+                unsigned long int ** const rcountP) {
 /*----------------------------------------------------------------------------
    From the histogram hist[] (hist[N] is the number of pixels of sample
    value N), compute the cumulative distribution *rcountP ((*rcountP)[N]
@@ -223,17 +250,17 @@ countCumulative(unsigned int    const hist[],
 
    *rcountP is newly malloced memory.
 -----------------------------------------------------------------------------*/
-    unsigned int * rcount;
-    unsigned int cumCount;
+    unsigned long int * rcount;
+    unsigned long int cumCount;
     int i;
-    
-    MALLOCARRAY(rcount, maxval + 1);
+
+    MALLOCARRAY(rcount, mmaxval + 1);
     if (rcount == NULL)
         pm_error("out of memory");
 
-    for (i = maxval, cumCount = 0; i >= 0; --i) {
+    for (i = mmaxval, cumCount = 0; i >= 0; --i) {
         /* Because total pixels in image is limited: */
-        assert(UINT_MAX - hist[i] >= cumCount);
+        assert(ULONG_MAX - hist[i] >= cumCount);
 
         cumCount += hist[i];
         rcount[i] = cumCount;
@@ -245,11 +272,11 @@ countCumulative(unsigned int    const hist[],
 
 
 static void
-reportHistHumanFriendly(unsigned int const hist[],
-                        unsigned int const rcount[],
-                        gray         const maxval) {
+reportHistHumanFriendly(unsigned long int const hist[],
+                        unsigned long int const rcount[],
+                        gray              const maxval) {
 
-    unsigned int const totalPixels = rcount[0];
+    unsigned long int const totalPixelCt = rcount[0];
 
     unsigned int cumCount;
     unsigned int i;
@@ -261,9 +288,48 @@ reportHistHumanFriendly(unsigned int const hist[],
         if (hist[i] > 0) {
             cumCount += hist[i];
             printf(
-                "%5d  %5d  %5.3g%%  %5.3g%%\n", i, hist[i],
-                (float) cumCount * 100.0 / totalPixels, 
-                (float) rcount[i] * 100.0 / totalPixels);
+                "%5d  %5ld  %5.3g%%  %5.3g%%\n", i, hist[i],
+                (float) cumCount * 100.0 / totalPixelCt,
+                (float) rcount[i] * 100.0 / totalPixelCt);
+        }
+    }
+}
+
+
+static void
+reportHistForensicHumanFriendly(unsigned long int const hist[],
+                                unsigned long int const rcount[],
+                                gray              const maxval,
+                                gray              const mmaxval) {
+
+    unsigned long int const totalPixelCt = rcount[0];
+
+    unsigned long int cumCount;
+    unsigned int i;
+
+    printf("value  count  b%%     w%%   \n");
+    printf("-----  -----  ------  ------\n");
+
+    for (i = 0, cumCount = 0; i <= maxval; ++i) {
+        if (hist[i] > 0) {
+            cumCount += hist[i];
+            printf(
+                "%5d  %5ld  %5.3g%%  %5.3g%%\n", i, hist[i],
+                (float) cumCount * 100.0 / totalPixelCt,
+                (float) rcount[i] * 100.0 / totalPixelCt);
+        }
+    }
+    if (totalPixelCt > cumCount) {
+        printf("-----  -----\n");
+
+        for (i = maxval; i <= mmaxval; ++i) {
+            if (hist[i] > 0) {
+                cumCount += hist[i];
+                printf(
+                    "%5d  %5ld  %5.3g%%  %5.3g%%\n", i, hist[i],
+                    (float) cumCount * 100.0 / totalPixelCt,
+                    (float) rcount[i] * 100.0 / totalPixelCt);
+            }
         }
     }
 }
@@ -271,13 +337,13 @@ reportHistHumanFriendly(unsigned int const hist[],
 
 
 static void
-reportHistMachineFriendly(unsigned int const hist[],
-                          gray         const maxval) {
+reportHistMachineFriendly(unsigned long int const hist[],
+                          gray              const maxval) {
 
     unsigned int i;
 
     for (i = 0; i <= maxval; ++i) {
-        printf("%u %u\n", i, hist[i]);
+        printf("%u %lu\n", i, hist[i]);
     }
 }
 
@@ -335,60 +401,132 @@ reportDecilesHumanFriendly(gray const decile[]) {
 
 
 
-int
-main(int argc, const char ** argv) {
+static void
+summarizeInvalidPixels(unsigned long int const hist[],
+                       unsigned long int const rcount[],
+                       gray              const mmaxval,
+                       gray              const maxval) {
+/*----------------------------------------------------------------------------
+  Print total count of valid and invalid pixels, if there are any
+  invalid ones.
+-----------------------------------------------------------------------------*/
+    unsigned long int const invalidPixelCt = 
+        mmaxval > maxval ? rcount[maxval+1] : 0;
+
+    if (invalidPixelCt > 0) {
+        unsigned long int const totalPixelCt = rcount[0];
+        unsigned long int const validPixelCt = totalPixelCt - invalidPixelCt;
+
+        printf("\n");
+        printf("** Image stream contains invalid sample values "
+               "(above maxval %u)\n", maxval);
+        printf("Valid sample values:   %lu (%5.4g%%)\n",
+               validPixelCt,   (float)validPixelCt / totalPixelCt * 100.0);
+        printf("Invalid sample values: %lu (%5.4g%%)\n",
+               invalidPixelCt, (float)invalidPixelCt / totalPixelCt * 100.0);
+    }
+}
 
-    struct cmdline_info cmdline;
-    FILE * ifP;
-    gray maxval;
-    unsigned int * hist;   /* malloc'ed array */
 
-    pm_proginit(&argc, argv);
 
-    parseCommandLine(argc, argv, &cmdline);
+static void
+reportFromHistogram(const unsigned long int * const hist,
+                    gray                      const mmaxval,
+                    gray                      const maxval,
+                    unsigned long int         const totalPixelCt,
+                    struct CmdlineInfo        const cmdline) {
+/*----------------------------------------------------------------------------
+   Analyze histogram 'hist', which has 'mmaxval' buckets, and report
+   what we find.
 
-    ifP = pm_openr(cmdline.inputFileName);
+   'maxval' is the maxval that the image states (but note that we tolerate
+   invalid sample values greater than maxval, which could be as high as
+   'mmaxval').
 
-    buildHistogram(ifP, &hist, &maxval);
+   'cmdline' tells what kind of reporting to do.
+-----------------------------------------------------------------------------*/
 
     if (cmdline.median) {
         gray median[2];
-        findQuantiles(2, hist, maxval, median); 
+        findQuantiles(2, hist, totalPixelCt, mmaxval, median);
         if (cmdline.machine)
             reportQuantilesMachineFriendly(median, 1);
         else
             reportMedianHumanFriendly(median[0]);
     } else if (cmdline.quartile) {
         gray quartile[4];
-        findQuantiles(4, hist, maxval, quartile);
+        findQuantiles(4, hist, totalPixelCt, mmaxval, quartile);
         if (cmdline.machine)
             reportQuantilesMachineFriendly(quartile, 4);
         else
             reportQuartilesHumanFriendly(quartile);
     } else if (cmdline.decile) {
         gray decile[10];
-        findQuantiles(10, hist, maxval, decile);
+        findQuantiles(10, hist, totalPixelCt, mmaxval, decile);
         if (cmdline.machine)
             reportQuantilesMachineFriendly(decile, 10);
         else
             reportDecilesHumanFriendly(decile);
     } else {
         if (cmdline.machine)
-            reportHistMachineFriendly(hist, maxval);
+            reportHistMachineFriendly(hist, mmaxval);
         else {
-            unsigned int * rcount; /* malloc'ed array */
-            countCumulative(hist, maxval, &rcount);
-            reportHistHumanFriendly(hist, rcount, maxval);
+            unsigned long int * rcount; /* malloc'ed array */
+            countCumulative(hist, mmaxval, totalPixelCt, &rcount);
+            if (cmdline.forensic)
+                reportHistForensicHumanFriendly(hist, rcount, maxval, mmaxval);
+            else
+                reportHistHumanFriendly(hist, rcount, maxval);
+
+            summarizeInvalidPixels(hist, rcount, mmaxval, maxval);
 
             free(rcount);
         }
     }
+}
+
+
+
+int
+main(int argc, const char ** argv) {
+
+    struct CmdlineInfo cmdline;
+    FILE * ifP;
+    int rows, cols;
+    int format;
+    gray maxval;
+        /* Stated maxval of the image */
+    gray mmaxval;
+        /* Maxval we assume, which may be greater than the stated maxval
+           so that we can process invalid pixels in the image that exceed
+           the maxval.
+        */
+    unsigned long int totalPixelCt;
+    unsigned long int * hist;   /* malloc'ed array */
+
+    pm_proginit(&argc, argv);
+
+    parseCommandLine(argc, argv, &cmdline);
+
+    ifP = pm_openr(cmdline.inputFileName);
+
+    pgm_readpgminit(ifP, &cols, &rows, &maxval, &format);
+
+    if (ULONG_MAX / cols < rows)
+        pm_error("Too many pixels (%u x %u) in image.  "
+                 "Maximum computable is %lu",
+                 cols, rows, ULONG_MAX);
+
+    totalPixelCt = cols * rows;
+
+    mmaxval = cmdline.forensic ? universalMaxval(maxval, format) : maxval;
+
+    buildHistogram(ifP, format, cols, rows, mmaxval, &hist);
+
+    reportFromHistogram(hist, mmaxval, maxval, totalPixelCt, cmdline);
 
     free(hist);
     pm_close(ifP);
 
     return 0;
 }
-
-
-