From 1b17cc595dbefdf5f5111db8534ec3bce500412e Mon Sep 17 00:00:00 2001 From: giraffedata Date: Fri, 3 Jul 2015 16:54:51 +0000 Subject: cleanup: factor out runlength encoding git-svn-id: http://svn.code.sf.net/p/netpbm/code/trunk@2600 9d0c8265-081b-0410-96cb-a4ca84ce46f8 --- common.mk | 2 +- converter/other/pnmtosgi.c | 88 ++++------- converter/pbm/pbmtomacp.c | 84 ++-------- converter/ppm/ppmtoilbm.c | 72 +++------ converter/ppm/ppmtopjxl.c | 53 +------ lib/Makefile | 3 +- lib/util/Makefile | 1 + lib/util/runlength.c | 374 +++++++++++++++++++++++++++++++++++++++++++++ lib/util/runlength.h | 51 +++++++ test/ilbm-roundtrip.ok | 8 + test/ilbm-roundtrip.test | 21 ++- test/sgi-roundtrip.ok | 2 + test/sgi-roundtrip.test | 9 ++ 13 files changed, 539 insertions(+), 229 deletions(-) create mode 100644 lib/util/runlength.c create mode 100644 lib/util/runlength.h diff --git a/common.mk b/common.mk index a5431727..dd7f4ed3 100644 --- a/common.mk +++ b/common.mk @@ -150,7 +150,7 @@ IMPORTINC_LIB_HEADERS := \ IMPORTINC_LIB_UTIL_HEADERS := \ bitarith.h bitio.h bitreverse.h filename.h intcode.h floatcode.h io.h \ matrix.h mallocvar.h \ - nsleep.h nstring.h pm_c_util.h shhopt.h token.h \ + nsleep.h nstring.h pm_c_util.h runlength.h shhopt.h token.h \ wordaccess.h wordaccess_generic.h wordaccess_64_le.h \ wordaccess_be_aligned.h wordaccess_be_unaligned.h \ wordintclz.h diff --git a/converter/other/pnmtosgi.c b/converter/other/pnmtosgi.c index a8df5328..cc57349f 100644 --- a/converter/other/pnmtosgi.c +++ b/converter/other/pnmtosgi.c @@ -17,30 +17,29 @@ ** Feb 2010 afu ** Added dimension check to prevent short int from overflowing */ + +#include + #include "pnm.h" #include "sgi.h" #include "mallocvar.h" - +#include "runlength.h" /*#define DEBUG*/ -typedef short ScanElem; +typedef uint16_t ScanElem; typedef struct { ScanElem * data; long length; } ScanLine; -#define WORSTCOMPR(x) (2*(x) + 2) - - #define MAXVAL_BYTE 255 #define MAXVAL_WORD 65535 #define INT16MAX 32767 static char storage = STORAGE_RLE; static ScanLine * channel[3]; -static ScanElem * rletemp; static xel * pnmrow; @@ -108,55 +107,13 @@ writeChannels(unsigned int const cols, for (col = 0; col < channel[i][row].length; ++col) { (*put)(channel[i][row].data[col]); } + pm_rlenc_freebuf((unsigned char *) channel[i][row].data); } } } -static int -rleCompress(ScanElem * const inbuf, - unsigned int const size) { - - /* slightly modified RLE algorithm from ppmtoilbm.c written by Robert - A. Knop (rknop@mop.caltech.edu) - */ - - int in, out, hold, count; - ScanElem *outbuf = rletemp; - - in = out = 0; - while (in < size) { - if ((in < size-1) && (inbuf[in] == inbuf[in+1])) { - /*Begin replicate run*/ - for (count = 0, hold = in; in < size && - inbuf[in] == inbuf[hold] && count < 127; - ++in, ++count) - ; - outbuf[out++] = (ScanElem)(count); - outbuf[out++] = inbuf[hold]; - } else { - /*Do a literal run*/ - hold = out; - ++out; - count = 0; - while (((in >= size-2) && (in < size)) - || ((in < size-2) && ((inbuf[in] != inbuf[in+1]) - || (inbuf[in] != inbuf[in+2])))) { - outbuf[out++] = inbuf[in++]; - if (++count >= 127) - break; - } - outbuf[hold] = (ScanElem)(count | 0x80); - } - } - outbuf[out++] = (ScanElem)0; /* terminator */ - - return out; -} - - - static ScanElem * compress(ScanElem * const tempArg, unsigned int const row, @@ -165,7 +122,13 @@ compress(ScanElem * const tempArg, unsigned int const chanNum, long * const table, unsigned int const bpc) { +/*---------------------------------------------------------------------------- + Compress a row, putting results in global 'channel' array, in newly + allocated storage (which Caller must free). + Except that if the compression is null compression, we make 'channel' + point to existing storage, which Caller must not free. Yuck. +-----------------------------------------------------------------------------*/ ScanElem * retval; switch (storage) { @@ -176,16 +139,26 @@ compress(ScanElem * const tempArg, break; case STORAGE_RLE: { unsigned int const tabrow = chanNum * rows + row; - unsigned int const len = rleCompress(tempArg, cols); - /* writes result into rletemp */ - unsigned int i; + + unsigned int len; + size_t lenBytes; ScanElem * p; - + + pm_rlenc_allocoutbuf((unsigned char **) &p, cols, PM_RLE_SGI16); + + pm_rlenc_compressword(tempArg,(unsigned char *) p, PM_RLE_SGI16, + cols, &lenBytes); + + assert((unsigned)lenBytes == lenBytes); + /* Guaranteed by pm_rlenc_compressword() */ + + len = lenBytes / 2; /* sizeof(ScanElem) */ channel[chanNum][row].length = len; - MALLOCARRAY(p, len); + REALLOCARRAY(p, len); /* reclaim some space */ + if (p == NULL) + pm_error("realloc failure while reclaiming memory space " + "for output"); channel[chanNum][row].data = p; - for (i = 0; i < len; ++i) - p[i] = rletemp[i]; table[tabrow] = len * bpc; retval = tempArg; } break; @@ -213,7 +186,6 @@ buildChannels(FILE * const ifP, if (storage != STORAGE_VERBATIM) { MALLOCARRAY_NOFAIL(table, channels * rows); - MALLOCARRAY_NOFAIL(rletemp, WORSTCOMPR(cols)); } else table = NULL; @@ -247,8 +219,6 @@ buildChannels(FILE * const ifP, } free(temp); - if (table) - free(rletemp); return table; } diff --git a/converter/pbm/pbmtomacp.c b/converter/pbm/pbmtomacp.c index 7ddb1ef5..df5cbb0c 100644 --- a/converter/pbm/pbmtomacp.c +++ b/converter/pbm/pbmtomacp.c @@ -41,6 +41,7 @@ #include "pbm.h" #include "shhopt.h" #include "mallocvar.h" +#include "runlength.h" #include "macp.h" #define MIN3(a,b,c) (MIN((MIN((a),(b))),(c))) @@ -281,60 +282,6 @@ writeMacpRowPacked(const bit * const packedBits, -static void -packit(const bit * const sourceBits, - unsigned int const imageColCharCt, - unsigned char * const packedBits, - unsigned int * const packedImageLengthP ) { -/*-------------------------------------------------------------------------- - Compress according to packbits algorithm, a byte-level run-length encoding - scheme. - - Each row is encoded separately. - - The following code does not produce optimum output when there are 2-byte - long sequences between longer ones: the 2-byte run in between does not get - packed, using up 3 bytes where 2 would do. -----------------------------------------------------------------------------*/ - int charcount, packcount; - enum {EQUAL, UNEQUAL} status; - bit * count; - - for (packcount = 0, charcount = 0, status = EQUAL; - charcount < imageColCharCt; - ) { - bit const save = sourceBits[charcount++]; - - int newcount; - - newcount = 1; - while (charcount < imageColCharCt && sourceBits[charcount] == save) { - ++charcount; - ++newcount; - } - if (newcount > 2) { - count = (unsigned char *) &packedBits[packcount++]; - *count = 257 - newcount; - packedBits[packcount++] = save; - status = EQUAL; - } else { - if (status == EQUAL) { - count = (unsigned char *) &packedBits[packcount++]; - *count = newcount - 1; - } else - *count += newcount; - - for( ; newcount > 0; --newcount) { - packedBits[packcount++] = save; - } - status = UNEQUAL; - } - } - *packedImageLengthP = packcount; -} - - - static void writeMacpRow(bit * const imageBits, unsigned int const leftMarginCharCt, @@ -352,31 +299,30 @@ writeMacpRow(bit * const imageBits, else { unsigned int const rightMarginCharCt = MACP_COLCHARS - leftMarginCharCt - imageColCharCt; - unsigned char * const packedBits = malloc(MACP_COLCHARS+1); - - unsigned int packedImageLength; + unsigned char packedBits[MACP_COLCHARS+1]; + size_t packedImageLength; - if (packedBits == NULL) - pm_error("Failed to get memory for a %u-column row buffer", - MACP_COLCHARS); + if (pm_rlenc_maxbytes(MACP_COLCHARS, PM_RLE_PACKBITS) + > MACP_COLCHARS + 1) + pm_error("INTERNAL ERROR: RLE buffer too small"); - packit(imageBits, imageColCharCt, packedBits, &packedImageLength); + pm_rlenc_compressbyte(imageBits, packedBits, PM_RLE_PACKBITS, + imageColCharCt, &packedImageLength); - /* Check if we are we better off with compression. If not, send row - unpacked. See note at top of file. - */ - if (packedImageLength + (leftMarginCharCt > 0 ? 1 : 0) * 2 + (rightMarginCharCt > 0 ? 1 : 0) * 2 - < MACP_COLCHARS) + < MACP_COLCHARS) { + /* It's smaller compressed, so do that */ writeMacpRowPacked(packedBits, leftMarginCharCt, packedImageLength, rightMarginCharCt, ofP); - else /* Extremely rare */ + } else { /* Extremely rare */ + /* It's larger compressed, so do it uncompressed. See note + at top of file. + */ writeMacpRowUnpacked(imageBits, leftMarginCharCt, imageColCharCt, ofP); - - free(packedBits); + } } } diff --git a/converter/ppm/ppmtoilbm.c b/converter/ppm/ppmtoilbm.c index 9f1aca46..595aa3f4 100644 --- a/converter/ppm/ppmtoilbm.c +++ b/converter/ppm/ppmtoilbm.c @@ -38,8 +38,11 @@ ** - added IFF text chunks ** ** Feb 2010: afu -** Added dimension check to prevent short int from overflowing. -** +** - added dimension check to prevent short int from overflowing. +** +** June 2015: afu +** - moved byterun1 (or Packbits) compression to lib/util/runlenth.c +** - fixed bug with HAM -nocompress ** ** TODO: ** - multipalette capability (PCHG chunk) for std and HAM @@ -64,6 +67,7 @@ ** implied warranty. */ +#include #include #include "pm_c_util.h" @@ -71,6 +75,7 @@ #include "ppm.h" #include "ppmfloyd.h" #include "pbm.h" +#include "runlength.h" #include "ilbm.h" #include "lum.h" @@ -418,46 +423,6 @@ writeBmhd(int const cols, /************ compression ************/ - - -static int -runbyte1(int const size) { - -/* runbyte1 algorithm by Robert A. Knop (rknop@mop.caltech.edu) */ - - int in,out,count,hold; - unsigned char *inbuf = coded_rowbuf; - unsigned char *outbuf = compr_rowbuf; - - - in=out=0; - while( in=size-2)&&(in=128 ) - break; - } - outbuf[hold]=count-1; - } - } - return(out); -} - - - static void storeBodyrow(unsigned char * const row, int const len) { @@ -480,22 +445,26 @@ storeBodyrow(unsigned char * const row, -static int -compressRow(int const bytes) { +static unsigned int +compressRow(unsigned int const bytes) { - int newbytes; + size_t compressedByteCt; - switch( compmethod ) { + switch (compmethod) { case cmpByteRun1: - newbytes = runbyte1(bytes); + pm_rlenc_compressbyte( + coded_rowbuf, compr_rowbuf, PM_RLE_PACKBITS, bytes, + &compressedByteCt); break; default: pm_error("compressRow(): unknown compression method %d", compmethod); } - storeBodyrow(compr_rowbuf, newbytes); + storeBodyrow(compr_rowbuf, compressedByteCt); + + assert((unsigned)compressedByteCt == compressedByteCt); - return newbytes; + return (unsigned)compressedByteCt; } @@ -2020,7 +1989,8 @@ main(int argc, char ** argv) { if( ++argn >= argc ) pm_error("-camg requires a value"); value = strtol(argv[argn], &tail, 16); - /* TODO: should do some error checking here */ + if(argv[argn] == tail) + pm_error("-camg requires a value"); viewportmodes |= value; gen_camg = 1; } @@ -2329,7 +2299,7 @@ main(int argc, char ** argv) { for (i = 0; i < RowBytes(cols); ++i) coded_rowbuf[i] = 0; if (DO_COMPRESS) - MALLOCARRAY_NOFAIL(compr_rowbuf, WORSTCOMPR(RowBytes(cols))); + pm_rlenc_allocoutbuf(&compr_rowbuf, RowBytes(cols), PM_RLE_PACKBITS); } switch (mode) { diff --git a/converter/ppm/ppmtopjxl.c b/converter/ppm/ppmtopjxl.c index ddf49638..90bcef0f 100644 --- a/converter/ppm/ppmtopjxl.c +++ b/converter/ppm/ppmtopjxl.c @@ -20,6 +20,7 @@ #include "pm_c_util.h" #include "nstring.h" #include "ppm.h" +#include "runlength.h" #define MAXCOLORS 1024 @@ -153,52 +154,12 @@ putbits(int const bArg, /* remove trailing zeros */ printf("\033*b"); if (num && !nopack) { /* TIFF 4.0 packbits encoding */ - unsigned int i; - int start = 0; - int next; - runcnt[start] = 0; - for (i = 1; i < num; ++i) { - if (inrow[i] == inrow[i-1]) { - if (runcnt[start] <= 0 && runcnt[start] > -127) - runcnt[start]--; - else - runcnt[start = i] = 0; - } else { - if (runcnt[start] >= 0 && runcnt[start] < 127) - runcnt[start]++; - else - runcnt[start = i] = 0; - } - } - for (i = 0, start = 0; i < num; i = next) { - int count = runcnt[i]; - int from = i; - if (count >= 0) { /* merge two-byte runs */ - for (;;) { - next = i+1+runcnt[i]; - if(next >= num || runcnt[next] < 0 || - count+runcnt[next]+1 > 127) - break; - count += runcnt[next]+1; - i = next; - } - } - next = i + 1 + ((runcnt[i] < 0) ? -runcnt[i] : runcnt[i]); - if (next < num && count > 0 && - runcnt[next] < 0 && runcnt[next] > -127) { - --count; - --next; - runcnt[next] = runcnt[next+1]-1; - } - outrow[start++] = count; - if (count >= 0) { - while (count-- >= 0) - outrow[start++] = inrow[from++]; - } else - outrow[start++] = inrow[from]; - } - if (start < num) { - num = start; + size_t outSize; + pm_rlenc_compressbyte( + (unsigned char *)inrow, (unsigned char *)outrow, + PM_RLE_PACKBITS, num, &outSize); + if (outSize < num) { + num = outSize; if (!pack) { printf("2m"); pack = true; diff --git a/lib/Makefile b/lib/Makefile index fa73d194..0097a04e 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -46,6 +46,7 @@ LIBOBJECTS_X = \ util/matrix.o \ util/nsleep.o \ util/nstring.o \ + util/runlength.o \ util/shhopt.o \ util/token.o \ util/vasprintf.o \ @@ -57,7 +58,7 @@ INTERFACE_HEADERS = colorname.h \ pam.h pamdraw.h pammap.h pbm.h pbmfont.h \ pgm.h pm.h pm_gamma.h pm_system.h pnm.h \ ppm.h ppmcmap.h ppmdfont.h ppmdraw.h ppmfloyd.h \ - util/mallocvar.h util/shhopt.h \ + util/mallocvar.h util/runlength.h util/shhopt.h \ DATAFILES = rgb.txt diff --git a/lib/util/Makefile b/lib/util/Makefile index c8522a04..02119edf 100644 --- a/lib/util/Makefile +++ b/lib/util/Makefile @@ -19,6 +19,7 @@ UTILOBJECTS = \ matrix.o \ nsleep.o \ nstring.o \ + runlength.o \ shhopt.o \ token.o \ vasprintf.o \ diff --git a/lib/util/runlength.c b/lib/util/runlength.c new file mode 100644 index 00000000..e5c60db0 --- /dev/null +++ b/lib/util/runlength.c @@ -0,0 +1,374 @@ +/*============================================================================= + runlength.c +=============================================================================== + "Packbits" run-length encoding and variants. + + Copyright (C) 2015 by Akira Urushibata (afu@wta.att.ne.jp). + + Permission to use, copy, modify, and distribute this software and its + documentation for any purpose and without fee is hereby granted, provided + that the above copyright notice appear in all copies and that both that + copyright notice and this permission notice appear in supporting + documentation. This software is provided "as is" without express or implied + warranty. + + Functions pm_rlenc_byte() and pm_rlenc_word() are based on algorithm + originally by Robert A. Knop (rknop@mop.caltech.edu). + + Those who wish to incorporate the code herein into their own programs are + strongly discouraged from removing the comments within borders consisting + of "+" marks. This is a practical consideration based on code inspection + and bug fixes of various programs which use run-length compression. + +=============================================================================== + + Packbits is a simple yet efficient simple run-length compression method. It + has a provision for uncompressed sequences which allows efficient encoding + of noisy input. + + A survey of netpbm source code in 2015 found Packbits encoding in the + following Netpbm programs: pbmtoescp2, pbmtomacp, pnmtopalm, pnmtopclxl, + pnmtops, ppmtoilbm and ppmtopjxl. + + Packbits is an option in the TIFF standard; pamtotiff can generate TIFF + images that use Packbits compression, but does so via code in the TIFF + library (not part of Netpbm). + + Variants of Packbits are employed in pnmtosgi, pamtopdbimg, pbmtoppa + and pbmtogo. + + All the above programs formerly did the same compression through different + code. This redundancy bloated the Netpbm package and made maintenance + difficult. This maintenance difficulty surfaced as an issue when tests with + valgrind revealed multiple memory-related problems in the above programs. + Indeed, just determining which programs do this compression by this method + was not simple because of differences in terminology. "Packbits" is often + called "run length encoding." In ppmtoilbm the name is "byterun1." + + Today, all Netpbm programs that do Packbits compression use the facilities + in this file for it. +=============================================================================*/ + +#include + +#include "pm.h" +#include "pm_c_util.h" +#include "runlength.h" +#include "mallocvar.h" + + + +static const char * const errorUndefinedMode = + "Internal error: compression mode %u not supported"; + + + +/*----------------------------------------------------------------------------- + Run length encoding + + In this simple run-length encoding scheme, compressed and uncompressed + strings follow a single index or "flag" byte N. There are several + variations, differing in the format of the flag byte, maximum string length + and element size (byte or word). + + In the most widely used version, Packbits, the meaning of the flag byte N + is defined as follows: + + 0-127 means the next N+1 bytes are uncompressed. + 129-255 means the next byte is to be repeated 257-N times. + 128 is not used. + + The name "Packbits" is misleading: it packs bytes, not bits. +-----------------------------------------------------------------------------*/ + + + +void +pm_rlenc_compressbyte(const unsigned char * const inbuf, + unsigned char * const outbuf, + enum pm_RleMode const mode, + size_t const inSize, + size_t * const outputSizeP) { +/*----------------------------------------------------------------------------- + Compress the contents of input buffer 'inbuf' with Packbits encoding into + output buffer 'outbuf'. 'inSize' is the number of bytes of data in 'inbuf'. + Return as *outputSizeP the number of bytes we put in 'outbuf'. + + 'outbuf' should be allocated with pm_rlenc_allocoutbuf(). + + Always encode 3-byte repeat sequences. + + Encode 2-byte repeat sequences only when they are at the start of the block. + This ensures that the output is never unnecessary bloated. + + Original algorithm by Robert A. Knop (rknop@mop.caltech.edu) +-----------------------------------------------------------------------------*/ + unsigned int const maxRun = 128; + + size_t inCurs, outCurs; + + if (mode != PM_RLE_PACKBITS) + pm_error(errorUndefinedMode, mode); + + for (inCurs = 0, outCurs = 0; inCurs < inSize; ) { + if ((inCurs < inSize - 1) && (inbuf[inCurs] == inbuf[inCurs+1])) { + /*Begin replicate run*/ + size_t const hold = inCurs; + size_t count; + for (count = 0; + inCurs < inSize && + inbuf[inCurs] == inbuf[hold] && + count < maxRun; + ++inCurs, ++count) + ; + + outbuf[outCurs++] = (unsigned char)(257 - count); + outbuf[outCurs++] = inbuf[hold]; + } else { + /*Do a non-run*/ + size_t const hold = outCurs; + size_t count; + ++outCurs; + count = 0; + while(((inCurs + 2 >= inSize) && (inCurs < inSize) ) || + ((inCurs + 2 < inSize) && + ((inbuf[inCurs] != inbuf[inCurs+1] ) + || (inbuf[inCurs] != inbuf[inCurs+2] ) ) ) ) { + outbuf[outCurs++] = inbuf[inCurs++]; + if (++count >= 128) + break; + } + outbuf[hold] = (unsigned char)(count - 1); + } + } + *outputSizeP = outCurs; +} + + + +static void +setFlagElement(void * const outP, + enum pm_RleMode const mode, + bool const isRepeatRun, + size_t const count) { +/*--------------------------------------------------------------------------- + Write the flag byte or word at specified point in the output buffer. +-----------------------------------------------------------------------------*/ + switch (mode) { + case PM_RLE_SGI16: + * (uint16_t *) outP = (isRepeatRun ? 0x00 : 0x80 ) | count; + break; + case PM_RLE_PALM16: + * (unsigned char *) outP = isRepeatRun ? + (unsigned char)(257 - count) : (unsigned char) (count - 1); + break; + default: + pm_error(errorUndefinedMode, mode); + } +} + + + +void +pm_rlenc_compressword(const uint16_t * const inbuf, + unsigned char * const outbuf, + enum pm_RleMode const mode, + size_t const inSize, + size_t * const outputSizeP) { +/*--------------------------------------------------------------------------- + Similar to pm_rlenc_byte(), but this works with two-byte elements. The + difference between SGI16 and PALM16 is the size of the flag. SGI16 : 16 + bits; PALM16 : 8 bits + + 'inSize' is a number of words,but *outputSizeP is a number of bytes. +-----------------------------------------------------------------------------*/ + size_t inCurs, outCurs; + size_t flagSz; + unsigned int maxRunSz; + + switch (mode) { + case PM_RLE_SGI16: + flagSz = 2; + maxRunSz = 127; + break; + case PM_RLE_PALM16: + flagSz = 1; + maxRunSz = 128; + break; + default: + pm_error(errorUndefinedMode, mode); + } + + for (inCurs = 0, outCurs = 0; inCurs < inSize; ) { + if ((inCurs < inSize - 1) && (inbuf[inCurs] == inbuf[inCurs+1])) { + uint16_t const runValue = inbuf[inCurs]; + size_t count; + /* Do a run of 'runValue' values */ + for (count = 0; + count < maxRunSz && + inCurs < inSize && + inbuf[inCurs] == runValue; + ++inCurs, ++count) + ; + setFlagElement(&outbuf[outCurs], mode, TRUE, count); + outCurs += flagSz; + * (uint16_t *) &outbuf[outCurs] = runValue; + outCurs += 2; + } else { + /*Do a non run*/ + size_t const nonrunStart = inCurs; + size_t count; + count = 0; + while (count < maxRunSz && + ((inCurs + 2 >= inSize && inCurs < inSize) || + (inCurs + 2 < inSize && + (inbuf[inCurs] != inbuf[inCurs+1] + || inbuf[inCurs] != inbuf[inCurs+2])))) { + ++inCurs; + ++count; + } + setFlagElement(&outbuf[outCurs], mode, FALSE, count); + outCurs += flagSz; + memcpy(&outbuf[outCurs], &inbuf[nonrunStart], count * 2); + outCurs += count * 2; + } + } + + if (mode == PM_RLE_SGI16) { + * (uint16_t *) &outbuf[outCurs] = 0; /* terminator */ + outCurs += 2; + } + + *outputSizeP = outCurs; +} + + + +/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + Worst case output size + + The term "worst-case output size" can mean one of two things depending on + whether the encoder is efficient or not. + + Sub-optimal encoder: the output size can be up to twice the input size. + This happens (or one may rather say "is achieved by a determined encoder") + when input is split into one-byte blocks. + + Efficient encoder: the output is larger than the input by the minimum + number of flag bytes. The worst case happens when there are no repeat + sequences in the input. + + The key to an efficient encoder is correct handling of short, inefficient + sequences. The following algorithm (not actually used in this file) + describes the idea. + + A run is a maximal set of two or more consecutive identical values in the + input. A nonrun is a maximal set of values in which every value is + different from its neighbors. + + A compressed block is one that encodes a sequence of identical values + (which could be a run or just part of a run) as a value and a count. + count. An uncompressed block is one that encodes a sequence of any values + by listing the values individually. + + Start by encoding the entire input as uncompressed blocks. Seek runs that + can be encoded with compressed blocks, but only if doing so doesn't make + the output larger. + + Criteria to avoid bloat: + + - Overhead for a single uncompressed block: 1 byte. + + - Overhead for one uncompressed block and a compressed block: 2 bytes. + + - Overhead for two uncompressed blocks and a compressed block: 3 bytes. + + - New blocks at the edge of any existing block add 1 byte of overhead. + New blocks in the middle of existing blocks add 2 bytes of overhead. + + - Whenever savings are larger than the added overhead, encode the run + as a compressed block. ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ + + + +size_t +pm_rlenc_maxbytes(size_t const inSize, /* number of elements */ + enum pm_RleMode const mode) { +/*--------------------------------------------------------------------------- + Calculate worst case output size from input size and encoding mode. + + 'inSize' counts the number of elements, not bytes: input size is (2 * + inSize) bytes if input is an array of 16-bit words. + + Return value is the maximum possible output size in bytes regardless of + type of input. + + Abort the program if the maximum possible output size is greater than + INT_MAX. +-----------------------------------------------------------------------------*/ + /* The upper limit could be raised above INT_MAX, but no program needs that + today. + */ + size_t blockSize; + size_t flagSize; + size_t itemSize; + size_t miscSize; + size_t overhead; + + switch (mode) { + case PM_RLE_PACKBITS: + blockSize = 128; flagSize = 1; itemSize = 1; miscSize = 0; break; + case PM_RLE_SGI8: + blockSize = 127; flagSize = 1; itemSize = 1; miscSize = 0; break; + case PM_RLE_GRAPHON: case PM_RLE_PPA: + blockSize = 64; flagSize = 1; itemSize = 1; miscSize = 0; break; + case PM_RLE_SGI16: + blockSize = 127; flagSize = 2; itemSize = 2; miscSize = 2; break; + case PM_RLE_PALM16: + blockSize = 128; flagSize = 1; itemSize = 2; miscSize = 0; break; + default: + pm_error(errorUndefinedMode, mode); + } + + overhead = miscSize + + (inSize / blockSize + (inSize % blockSize > 0 ? 1 : 0) ) * flagSize; + + if (inSize > INT_MAX / itemSize || + inSize * itemSize > INT_MAX - overhead) + pm_error("Cannot do RLE compression. Input too large."); + + return inSize * itemSize + overhead; +} + + + +void +pm_rlenc_allocoutbuf(unsigned char ** const outbufP, + size_t const inSize, /* element count */ + enum pm_RleMode const mode) { +/*--------------------------------------------------------------------------- + Allocate an output buffer sufficient for input with inSize elements, using + compression mode 'mode'. Element may be byte or word, whichever 'mode' + implies. +-----------------------------------------------------------------------------*/ + size_t const size = pm_rlenc_maxbytes(inSize, mode); + + unsigned char * outbuf; + + MALLOCARRAY(outbuf, size); + if (outbuf == NULL) + pm_error("Out of memory trying to get %u bytes for RLE output buffer", + (unsigned)size); + + *outbufP = outbuf; +} + + + +void +pm_rlenc_freebuf(void * const buf) { + free(buf); +} + + diff --git a/lib/util/runlength.h b/lib/util/runlength.h new file mode 100644 index 00000000..4857ae61 --- /dev/null +++ b/lib/util/runlength.h @@ -0,0 +1,51 @@ +#ifndef RUNLENGTH_INCLUDED +#define RUNLENGTH_INCLUDED + +#include "pm_config.h" + +#include + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +} /* to fake out automatic code indenters */ +#endif + + +enum pm_RleMode { PM_RLE_PACKBITS, /* most common mode */ + PM_RLE_GRAPHON, /* reserved */ + PM_RLE_PPA, /* reserved */ + PM_RLE_SGI8, /* reserved */ + PM_RLE_SGI16, + PM_RLE_PALM16 + }; + +size_t +pm_rlenc_maxbytes (size_t const inSize, + enum pm_RleMode const mode); + +void +pm_rlenc_allocoutbuf(unsigned char ** const outbufP, + size_t const inSize, + enum pm_RleMode const mode); + + +void +pm_rlenc_freebuf(void * const buf); + +void +pm_rlenc_compressbyte(const unsigned char * const inbuf, + unsigned char * const outbuf, + enum pm_RleMode const mode, + size_t const inSize, + size_t * const outputSizeP); + +void +pm_rlenc_compressword(const uint16_t * const inbuf, + unsigned char * const outbuf, + enum pm_RleMode const mode, + size_t const itemCnt, + size_t * const outputSizeP); + +#endif diff --git a/test/ilbm-roundtrip.ok b/test/ilbm-roundtrip.ok index cd357916..54574a18 100644 --- a/test/ilbm-roundtrip.ok +++ b/test/ilbm-roundtrip.ok @@ -1,2 +1,10 @@ +829921912 685 +829921912 685 +829921912 685 +829921912 685 +1926073387 101484 +1926073387 101484 1926073387 101484 984199586 101484 +2059976475 661 +2059976475 661 diff --git a/test/ilbm-roundtrip.test b/test/ilbm-roundtrip.test index a13fc8be..f62368ff 100755 --- a/test/ilbm-roundtrip.test +++ b/test/ilbm-roundtrip.test @@ -2,10 +2,27 @@ # This script tests: ppmtoilbm ilbmtoppm # Also requires: pamseq pamdepth pamtopnm pnmremap +#Test. 1 Should produce 829921912 685 four times +#Output is PPM raw, 14 by 16 maxval 255 +ppmtoilbm testgrid.pbm | ilbmtoppm | cksum +ppmtoilbm -aga testgrid.pbm | ilbmtoppm | cksum +ppmtoilbm -ham6 testgrid.pbm | ilbmtoppm | cksum +ppmtoilbm -ham8 testgrid.pbm | ilbmtoppm | cksum -#Test. Should produce 1926073387 101484 + +#Test. 2 Should produce 1926073387 101484 three times ppmtoilbm testimg.ppm | ilbmtoppm | cksum +ppmtoilbm -24force testimg.ppm | ilbmtoppm | cksum +ppmtoilbm -dcbits 8 8 8 -nocompress testimg.ppm | ilbmtoppm | cksum + -#Test. Should print 984199586 101484 +#Test. 3 Should print 984199586 101484 pamseq 3 5 -tupletype=RGB | pamdepth 255 | pamtopnm | \ pnmremap -mapfile=- testimg.ppm | ppmtoilbm | ilbmtoppm | cksum + + +#Test. 4 Should print 2059976475 661 twice +pamseq 3 5 -tupletype=RGB | pamtopnm | \ + ppmtoilbm -compress | ilbmtoppm | cksum +pamseq 3 5 -tupletype=RGB | pamtopnm | \ + ppmtoilbm -nocompress | ilbmtoppm | cksum diff --git a/test/sgi-roundtrip.ok b/test/sgi-roundtrip.ok index 541d59b1..b1bd5ba6 100644 --- a/test/sgi-roundtrip.ok +++ b/test/sgi-roundtrip.ok @@ -4,3 +4,5 @@ 1926073387 101484 538848130 235 538848130 235 +2394972481 463 +2394972481 463 diff --git a/test/sgi-roundtrip.test b/test/sgi-roundtrip.test index 5012aab7..88efb75e 100755 --- a/test/sgi-roundtrip.test +++ b/test/sgi-roundtrip.test @@ -32,3 +32,12 @@ rm ${b_sgi} ${b_red} ${b_grn} ${b_blu} # Test 3. Should produce 2425386270 41 twice pnmtosgi testgrid.pbm | sgitopnm | cksum # Defaults to -rle pnmtosgi -verbatim testgrid.pbm | sgitopnm | cksum + + +testgrid_pgm=${tmpdir}/testgrid.pgm + +# Test 4. Should produce 2394972481 463 twice +pamdepth 65535 testgrid.pbm | pamtopnm | tee ${testgrid_pgm} | \ + pnmtosgi -rle | sgitopnm | cksum +pnmtosgi -verbatim ${testgrid_pgm} | sgitopnm | cksum +rm ${testgrid_pgm} -- cgit 1.4.1