diff options
Diffstat (limited to 'analyzer/pgmhist.c')
-rw-r--r-- | analyzer/pgmhist.c | 455 |
1 files changed, 399 insertions, 56 deletions
diff --git a/analyzer/pgmhist.c b/analyzer/pgmhist.c index 4790ecba..1e779655 100644 --- a/analyzer/pgmhist.c +++ b/analyzer/pgmhist.c @@ -20,18 +20,23 @@ -struct cmdline_info { +struct CmdlineInfo { /* All the information the user supplied in the command line, in a form easy for the program to use. */ const char * inputFileName; /* Filename of input files */ + unsigned int machine; + unsigned int median; + unsigned int quartile; + unsigned int decile; + unsigned int forensic; }; static void parseCommandLine(int argc, const char ** argv, - struct cmdline_info * const cmdlineP) { + struct CmdlineInfo * const cmdlineP) { /*---------------------------------------------------------------------------- Note that the file spec array we return is stored in the storage that was passed to us as the argv array. @@ -39,64 +44,120 @@ parseCommandLine(int argc, const char ** argv, optStruct3 opt; /* set by OPTENT3 */ optEntry * option_def; unsigned int option_def_index; - + MALLOCARRAY_NOFAIL(option_def, 100); option_def_index = 0; /* incremented by OPTENT3 */ - OPTENTINIT; + OPTENT3(0, "machine", OPT_FLAG, NULL, + &cmdlineP->machine, 0); + OPTENT3(0, "median", OPT_FLAG, NULL, + &cmdlineP->median, 0); + OPTENT3(0, "quartile", OPT_FLAG, NULL, + &cmdlineP->quartile, 0); + OPTENT3(0, "decile", OPT_FLAG, NULL, + &cmdlineP->decile, 0); + OPTENT3(0, "forensic", OPT_FLAG, NULL, + &cmdlineP->forensic, 0); opt.opt_table = option_def; opt.short_allowed = FALSE; /* We have no short (old-fashioned) options */ opt.allowNegNum = FALSE; /* We have no parms that are negative numbers */ - optParseOptions3(&argc, (char **)argv, opt, sizeof(opt), 0); + pm_optParseOptions3(&argc, (char **)argv, opt, sizeof(opt), 0); /* Uses and sets argc, argv, and some of *cmdlineP and others. */ - if (argc-1 == 0) + if (cmdlineP->median + cmdlineP->quartile + cmdlineP->decile > 1) + pm_error("You may specify only one of -median, -quartile, " + "and -decile"); + + if (argc-1 == 0) cmdlineP->inputFileName = "-"; else if (argc-1 != 1) pm_error("Program takes zero or one argument (filename). You " "specified %d", argc-1); else cmdlineP->inputFileName = argv[1]; + + free(option_def); +} + + + +static gray +universalMaxval(gray const maxval, + int const format) { +/*---------------------------------------------------------------------------- + A maxval that makes it impossible for a pixel to be invalid in an image that + states it maxval as 'maxval' and has format 'format'. + + E.g. in a one-byte-per-sample image, it's not possible to read a sample + value greater than 255, so a maxval of 255 makes it impossible for a sample + to be invalid. + + But: we never go above 65535, which means our maxval isn't entirely + universal. If the image is plain PGM, it could contain a pixel that + exceeds even that. +-----------------------------------------------------------------------------*/ + assert(0 < maxval && maxval < 65536); + + if (format == RPGM_FORMAT) { + /* Raw PGM stream has either one or two bytes per pixel, depending + upon its stated maxval. + */ + if (maxval > 255) + return 65535; + else + return 255; + } else if (format == RPBM_FORMAT) { + /* A Raw PBM stream has one bit per pixel, which libnetpbm renders + as 0 or 255 when we read it. + */ + assert(maxval == 255); + return 255; + } else { + /* A plain PGM or PBM stream has essentially unlimited range in the + tokens that are supposed to be sample values. We arbitrarily draw + the line at 65535. + */ + return 65535; + } } static void -buildHistogram(FILE * const ifP, - unsigned int ** const histP, - gray * const maxvalP) { +buildHistogram(FILE * const ifP, + int const format, + unsigned int const cols, + unsigned int const rows, + gray const mmaxval, + unsigned long int ** const histP) { +/*---------------------------------------------------------------------------- + Compute the histogram of sample values in the input stream *ifP as *histP, + in newly malloced storage. + Assume the image maxval is 'mmaxval'. Assume *ifP is positioned to the + start of the raster. +-----------------------------------------------------------------------------*/ gray * grayrow; - int rows, cols; - int format; unsigned int row; unsigned int i; - unsigned int * hist; /* malloc'ed array */ - gray maxval; - - pgm_readpgminit(ifP, &cols, &rows, &maxval, &format); - - if (UINT_MAX / cols < rows) - pm_error("Too many pixels (%u x %u) in image. " - "Maximum computable is %u", - cols, rows, UINT_MAX); + unsigned long int * hist; /* malloc'ed array */ grayrow = pgm_allocrow(cols); - MALLOCARRAY(hist, maxval + 1); + MALLOCARRAY(hist, mmaxval + 1); if (hist == NULL) pm_error("out of memory"); - for (i = 0; i <= maxval; ++i) + for (i = 0; i <= mmaxval; ++i) hist[i] = 0; for (row = 0; row < rows; ++row) { unsigned int col; - pgm_readpgmrow(ifP, grayrow, cols, maxval, format); + pgm_readpgmrow(ifP, grayrow, cols, mmaxval, format); for (col = 0; col < cols; ++col) { /* Because total pixels in image is limited: */ @@ -107,28 +168,108 @@ buildHistogram(FILE * const ifP, } pgm_freerow(grayrow); - *histP = hist; - *maxvalP = maxval; + *histP = hist; } static void -countCumulative(unsigned int const hist[], - gray const maxval, - unsigned int ** const rcountP) { +findQuantiles(unsigned int const n, + unsigned long int const hist[], + unsigned long int const totalCt, + gray const mmaxval, + gray * const quantile) { +/*---------------------------------------------------------------------------- + Find the order-n quantiles (e.g. n == 4 means quartiles) of the pixel + sample values, given that hist[] is the histogram of them (hist[N] is the + number of pixels that have sample value N). - unsigned int * rcount; - unsigned int cumCount; + 'mmaxval' is the highest index in hist[] (so its size is 'mmaxval' + 1, + and there are no pixels greater than 'mmaxval' in the image). + + We return the ith quantile as quantile[i]. For example, for quartiles, + quantile[3] is the least sample value for which at least 3/4 of the pixels + are less than or equal to it. + + quantile[] must be allocated at least to size 'n'. + + 'n' must not be more than 100. +-----------------------------------------------------------------------------*/ + unsigned int quantSeq; + /* 0 is first quantile, 1 is second quantile, etc. */ + + gray sampleVal; + /* As we increment through all the possible sample values, this + is the one we're considering now. + */ + unsigned int cumCt; + /* The number of pixels that have sample value 'sampleVal' or less. */ + + assert(n > 1 && n <= 100); + + sampleVal = 0; /* initial value */ + cumCt = hist[0]; /* initial value */ + + for (quantSeq = 1; quantSeq <= n; ++quantSeq) { + unsigned long int const q = totalCt / n; + unsigned long int const r = totalCt % n; + unsigned long int const quantCt = q*quantSeq + (r*quantSeq + n - 1)/n; + /* This is how many pixels are (ignoring quantization) in the + quantile. E.g. for the 3rd quartile, it is 3/4 of the pixels + in the image. + + This is equivalent to (float) totalCt * quantSeq / n rounded + upwards. We use the int version in spite of complexities + for preventing overflow for slight innacuracies in floating + point arithmetic causes problems when used as loop counter + and array index. + */ + assert(quantCt <= totalCt); + + /* at sampleVal == mmaxval, cumCt == totalCt, so because + quantCt <= 'totalCt', 'sampleVal' cannot go above mmaxval. + */ + + while (cumCt < quantCt) { + ++sampleVal; + cumCt += hist[sampleVal]; + } + + assert(sampleVal <= mmaxval); + + /* 'sampleVal' is the lowest sample value for which at least 'quantCt' + pixels have that sample value or less. 'cumCt' is the number + of pixels that have sample value 'sampleVal' or less. + */ + quantile[quantSeq-1] = sampleVal; + } +} + + + +static void +countCumulative(unsigned long int const hist[], + gray const mmaxval, + unsigned long int const totalPixelCt, + unsigned long int ** const rcountP) { +/*---------------------------------------------------------------------------- + From the histogram hist[] (hist[N] is the number of pixels of sample + value N), compute the cumulative distribution *rcountP ((*rcountP)[N] + is the number of pixels of sample value N or higher). + + *rcountP is newly malloced memory. +-----------------------------------------------------------------------------*/ + unsigned long int * rcount; + unsigned long int cumCount; int i; - - MALLOCARRAY(rcount, maxval + 1); + + MALLOCARRAY(rcount, mmaxval + 1); if (rcount == NULL) pm_error("out of memory"); - for (i = maxval, cumCount = 0; i >= 0; --i) { + for (i = mmaxval, cumCount = 0; i >= 0; --i) { /* Because total pixels in image is limited: */ - assert(UINT_MAX - hist[i] >= cumCount); + assert(ULONG_MAX - hist[i] >= cumCount); cumCount += hist[i]; rcount[i] = cumCount; @@ -140,26 +281,215 @@ countCumulative(unsigned int const hist[], static void -report(unsigned int const hist[], - unsigned int const rcount[], - gray const maxval) { +reportHistHumanFriendly(unsigned long int const hist[], + unsigned long int const rcount[], + gray const maxval) { - unsigned int const totalPixels = rcount[0]; - unsigned int count; + unsigned long int const totalPixelCt = rcount[0]; + + unsigned int cumCount; unsigned int i; - printf("value count b%% w%% \n"); + printf("value count b%% w%% \n"); printf("----- ----- ------ ------\n"); - count = 0; + for (i = 0, cumCount = 0; i <= maxval; ++i) { + if (hist[i] > 0) { + cumCount += hist[i]; + printf( + "%5d %5ld %5.3g%% %5.3g%%\n", i, hist[i], + (float) cumCount * 100.0 / totalPixelCt, + (float) rcount[i] * 100.0 / totalPixelCt); + } + } +} - for (i = 0; i <= maxval; ++i) { + +static void +reportHistForensicHumanFriendly(unsigned long int const hist[], + unsigned long int const rcount[], + gray const maxval, + gray const mmaxval) { + + unsigned long int const totalPixelCt = rcount[0]; + + unsigned long int cumCount; + unsigned int i; + + printf("value count b%% w%% \n"); + printf("----- ----- ------ ------\n"); + + for (i = 0, cumCount = 0; i <= maxval; ++i) { if (hist[i] > 0) { - count += hist[i]; + cumCount += hist[i]; printf( - "%5d %5d %5.3g%% %5.3g%%\n", i, hist[i], - (float) count * 100.0 / totalPixels, - (float) rcount[i] * 100.0 / totalPixels); + "%5d %5ld %5.3g%% %5.3g%%\n", i, hist[i], + (float) cumCount * 100.0 / totalPixelCt, + (float) rcount[i] * 100.0 / totalPixelCt); + } + } + if (totalPixelCt > cumCount) { + printf("----- -----\n"); + + for (i = maxval; i <= mmaxval; ++i) { + if (hist[i] > 0) { + cumCount += hist[i]; + printf( + "%5d %5ld %5.3g%% %5.3g%%\n", i, hist[i], + (float) cumCount * 100.0 / totalPixelCt, + (float) rcount[i] * 100.0 / totalPixelCt); + } + } + } +} + + + +static void +reportHistMachineFriendly(unsigned long int const hist[], + gray const maxval) { + + unsigned int i; + + for (i = 0; i <= maxval; ++i) { + printf("%u %lu\n", i, hist[i]); + } +} + + + +static void +reportQuantilesMachineFriendly(gray const quantile[], + unsigned int const n) { + + unsigned int i; + + for (i = 0; i < n; ++i) + printf("%u\n", quantile[i]); +} + + + +static void +reportMedianHumanFriendly(gray const median) { + + printf("Median: %5u\n", median); +} + + + +static void +reportQuartilesHumanFriendly(gray const quartile[]) { + + unsigned int i; + + printf("Quartiles:\n"); + + printf("Q Value\n"); + printf("---- -----\n"); + + for (i = 1; i <= 4; ++i) + printf("%3u%% %5u\n", 25*i, quartile[i-1]); +} + + + +static void +reportDecilesHumanFriendly(gray const decile[]) { + + unsigned int i; + + printf("Deciles:\n"); + + printf("Q Value\n"); + printf("--- -----\n"); + + for (i = 1; i <= 10; ++i) + printf("%3u%% %5u\n", 10*i, decile[i-1]); +} + + + +static void +summarizeInvalidPixels(unsigned long int const hist[], + unsigned long int const rcount[], + gray const mmaxval, + gray const maxval) { +/*---------------------------------------------------------------------------- + Print total count of valid and invalid pixels, if there are any + invalid ones. +-----------------------------------------------------------------------------*/ + unsigned long int const invalidPixelCt = + mmaxval > maxval ? rcount[maxval+1] : 0; + + if (invalidPixelCt > 0) { + unsigned long int const totalPixelCt = rcount[0]; + unsigned long int const validPixelCt = totalPixelCt - invalidPixelCt; + + printf("\n"); + printf("** Image stream contains invalid sample values " + "(above maxval %u)\n", maxval); + printf("Valid sample values: %lu (%5.4g%%)\n", + validPixelCt, (float)validPixelCt / totalPixelCt * 100.0); + printf("Invalid sample values: %lu (%5.4g%%)\n", + invalidPixelCt, (float)invalidPixelCt / totalPixelCt * 100.0); + } +} + + + +static void +reportFromHistogram(const unsigned long int * const hist, + gray const mmaxval, + gray const maxval, + unsigned long int const totalPixelCt, + struct CmdlineInfo const cmdline) { +/*---------------------------------------------------------------------------- + Analyze histogram 'hist', which has 'mmaxval' buckets, and report + what we find. + + 'maxval' is the maxval that the image states (but note that we tolerate + invalid sample values greater than maxval, which could be as high as + 'mmaxval'). + + 'cmdline' tells what kind of reporting to do. +-----------------------------------------------------------------------------*/ + + if (cmdline.median) { + gray median[2]; + findQuantiles(2, hist, totalPixelCt, mmaxval, median); + if (cmdline.machine) + reportQuantilesMachineFriendly(median, 1); + else + reportMedianHumanFriendly(median[0]); + } else if (cmdline.quartile) { + gray quartile[4]; + findQuantiles(4, hist, totalPixelCt, mmaxval, quartile); + if (cmdline.machine) + reportQuantilesMachineFriendly(quartile, 4); + else + reportQuartilesHumanFriendly(quartile); + } else if (cmdline.decile) { + gray decile[10]; + findQuantiles(10, hist, totalPixelCt, mmaxval, decile); + if (cmdline.machine) + reportQuantilesMachineFriendly(decile, 10); + else + reportDecilesHumanFriendly(decile); + } else { + if (cmdline.machine) + reportHistMachineFriendly(hist, mmaxval); + else { + unsigned long int * rcount; /* malloc'ed array */ + countCumulative(hist, mmaxval, totalPixelCt, &rcount); + if (cmdline.forensic) + reportHistForensicHumanFriendly(hist, rcount, maxval, mmaxval); + else + reportHistHumanFriendly(hist, rcount, maxval); + + summarizeInvalidPixels(hist, rcount, mmaxval, maxval); + + free(rcount); } } } @@ -169,11 +499,19 @@ report(unsigned int const hist[], int main(int argc, const char ** argv) { - struct cmdline_info cmdline; + struct CmdlineInfo cmdline; FILE * ifP; + int rows, cols; + int format; gray maxval; - unsigned int * rcount; /* malloc'ed array */ - unsigned int * hist; /* malloc'ed array */ + /* Stated maxval of the image */ + gray mmaxval; + /* Maxval we assume, which may be greater than the stated maxval + so that we can process invalid pixels in the image that exceed + the maxval. + */ + unsigned long int totalPixelCt; + unsigned long int * hist; /* malloc'ed array */ pm_proginit(&argc, argv); @@ -181,18 +519,23 @@ main(int argc, const char ** argv) { ifP = pm_openr(cmdline.inputFileName); - buildHistogram(ifP, &hist, &maxval); + pgm_readpgminit(ifP, &cols, &rows, &maxval, &format); + + if (ULONG_MAX / cols < rows) + pm_error("Too many pixels (%u x %u) in image. " + "Maximum computable is %lu", + cols, rows, ULONG_MAX); + + totalPixelCt = cols * rows; - countCumulative(hist, maxval, &rcount); + mmaxval = cmdline.forensic ? universalMaxval(maxval, format) : maxval; - report(hist, rcount, maxval); + buildHistogram(ifP, format, cols, rows, mmaxval, &hist); + + reportFromHistogram(hist, mmaxval, maxval, totalPixelCt, cmdline); - free(rcount); free(hist); pm_close(ifP); return 0; } - - - |