From 6fd77330f779af71eb4bcf330290947d181d29f7 Mon Sep 17 00:00:00 2001 From: giraffedata Date: Fri, 12 Mar 2010 03:00:39 +0000 Subject: Speedup from afu git-svn-id: http://svn.code.sf.net/p/netpbm/code/trunk@1147 9d0c8265-081b-0410-96cb-a4ca84ce46f8 --- editor/pbmpscale.c | 467 ++++++++++++++++++++++++++++++++--------------------- 1 file changed, 286 insertions(+), 181 deletions(-) (limited to 'editor') diff --git a/editor/pbmpscale.c b/editor/pbmpscale.c index 6b9ffe8d..4a81486c 100644 --- a/editor/pbmpscale.c +++ b/editor/pbmpscale.c @@ -3,20 +3,61 @@ */ #include - -#include "pm_c_util.h" -#include "mallocvar.h" #include "pbm.h" +#include "mallocvar.h" +#include "bitarith.h" + +#define LEFTBITS pm_byteLeftBits +#define RIGHTBITS pm_byteRightBits + +/* Table for translating bit pattern into "corners" flag element */ + +unsigned char const +transTable[512] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x04, + 0x00, 0x00, 0x04, 0x04, 0xaa, 0xa2, 0x82, 0x82, 0x8a, 0x82, 0x82, 0x82, + 0xa0, 0xa0, 0x40, 0x40, 0xc0, 0xc0, 0xc0, 0xc0, 0x00, 0x00, 0x10, 0x10, + 0x00, 0x00, 0x10, 0x10, 0x00, 0x00, 0x28, 0x28, 0x00, 0x00, 0x28, 0x28, + 0x0a, 0x03, 0x01, 0x03, 0x0a, 0x03, 0x01, 0x03, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x04, 0x04, 0x00, 0x00, 0x04, 0x04, 0xa8, 0xa0, 0xc0, 0xc0, + 0x88, 0x80, 0x80, 0x80, 0xa0, 0xa0, 0xc0, 0xc0, 0x80, 0x80, 0x80, 0x80, + 0x00, 0x00, 0x10, 0x10, 0x00, 0x00, 0x10, 0x10, 0x00, 0x00, 0x28, 0x28, + 0x00, 0x00, 0x28, 0x28, 0x0c, 0xff, 0xff, 0xff, 0x08, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0x01, 0x01, 0x0a, 0x0a, 0x01, 0x01, 0x0a, 0x0a, + 0x28, 0x30, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x30, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x40, 0x40, 0xa0, 0xa0, 0x40, 0x40, 0xa0, 0xa0, + 0x82, 0x82, 0xff, 0xff, 0x82, 0x82, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, + 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x01, 0x01, 0x0a, 0x0a, + 0x01, 0x01, 0x0a, 0x0a, 0x28, 0x30, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x10, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x40, 0xa0, 0xa0, + 0x40, 0x40, 0xa0, 0xa0, 0x82, 0x82, 0xff, 0xff, 0x82, 0x82, 0xff, 0xff, + 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x04, 0x04, 0x00, 0x00, 0x04, 0x04, 0x2a, 0x22, 0x03, 0x02, + 0x0a, 0x02, 0x03, 0x02, 0x30, 0x20, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x10, 0x10, 0x00, 0x00, 0x10, 0x10, 0x00, 0x00, 0x28, 0x28, + 0x00, 0x00, 0x28, 0x28, 0x0a, 0x02, 0x03, 0x02, 0x0a, 0x02, 0x03, 0x02, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x04, 0x00, 0x00, 0x04, 0x04, + 0x28, 0x20, 0xff, 0xff, 0x08, 0xff, 0xff, 0xff, 0x30, 0x20, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x10, 0x10, 0x00, 0x00, 0x10, 0x10, + 0x00, 0x00, 0x28, 0x28, 0x00, 0x00, 0x28, 0x28, 0x0c, 0xff, 0xff, 0xff, + 0x08, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x01, 0x01, 0x0a, 0x0a, + 0x01, 0x01, 0x0a, 0x0a, 0x28, 0x20, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, + 0x30, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x40, 0xa0, 0xa0, + 0x40, 0x40, 0xa0, 0xa0, 0x82, 0x82, 0xff, 0xff, 0x82, 0x82, 0xff, 0xff, + 0x04, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x01, 0x01, 0x0a, 0x0a, 0x01, 0x01, 0x0a, 0x0a, 0x28, 0x20, 0x00, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x30, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x40, 0x40, 0xa0, 0xa0, 0x40, 0x40, 0xa0, 0xa0, 0x82, 0x82, 0xff, 0xff, + 0x82, 0x82, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; -/* input bitmap size and storage */ -static int rows, cols, format; -static bit * inrow[3]; - -#define THISROW (1) - -/* compass directions from west clockwise */ -static int const xd_pscale[] = { -1, -1, 0, 1, 1, 1, 0, -1 }; -static int const yd_pscale[] = { 0, -1, -1, -1, 0, 1, 1, 1 }; /* starting positions for corners */ #define NE(f) ((f) & 3) @@ -24,7 +65,7 @@ static int const yd_pscale[] = { 0, -1, -1, -1, 0, 1, 1, 1 }; #define SW(f) (((f) >> 4) & 3) #define NW(f) (((f) >> 6) & 3) - +#define MAX(x,y) ((x) > (y) ? (x) : (y)) static void validateComputableDimensions(unsigned int const width, @@ -47,222 +88,286 @@ validateComputableDimensions(unsigned int const width, -static int -corner(uint16_t const pat) { -/* list of corner patterns; bit 7 is current color, bits 0-6 are squares - * around (excluding square behind), going clockwise. - * The high byte of the patterns is a mask, which determines which bits are - * not ignored. - */ +static void +writeBitSpan(unsigned char * const packedBitrow, + int const cols, + int const offset, + int const color) { +/*---------------------------------------------------------------------------- + Write white (color="0") or black (="1") bits into packedBitrow[], + starting at 'offset', length 'cols'. +-----------------------------------------------------------------------------*/ + unsigned char * const dest = & packedBitrow[offset/8]; + unsigned int const rs = offset % 8; + unsigned int const trs = (cols + rs) % 8; + unsigned int const colBytes = pbm_packed_bytes(cols + rs); + unsigned int const last = colBytes - 1; - uint16_t const patterns[] = { - 0x0000, 0xd555, /* no corner */ - 0x0001, 0xffc1, 0xd514, /* normal corner */ - 0x0002, 0xd554, 0xd515, 0xbea2, 0xdfc0, 0xfd81, 0xfd80, 0xdf80, - /* reduced corners */ - 0x0003, 0xbfa1, 0xfec2 /* reduced if > 1 */ - }; - - /* search for corner patterns, return type of corner found: - * 0 = no corner, - * 1 = normal corner, - * 2 = reduced corner, - * 3 = reduced if cutoff > 1 - */ + unsigned char const origHead = dest[0]; + unsigned char const origEnd = dest[last]; unsigned int i; - unsigned int r; - r = 0; /* initial value */ + for( i = 0; i < colBytes; ++i) + dest[i] = color * 0xff; - for (i = 0; i < sizeof(patterns)/sizeof(uint16_t); ++i) { - if (patterns[i] < 0x100) - r = patterns[i]; - else if ((pat & (patterns[i] >> 8)) == - (patterns[i] & (patterns[i] >> 8))) - return r; - } - return 0; -} + if (rs > 0) + dest[0] = LEFTBITS(origHead, rs) | RIGHTBITS(dest[0], 8-rs); + if (trs > 0) + dest[last] = LEFTBITS(dest[last], trs) | RIGHTBITS(origEnd, 8-trs); +} static void -nextrow_pscale(FILE * const ifP, - int const row) { +setFlags( const bit * const prevrow, + const bit * const thisrow, + const bit * const nextrow, + unsigned char * const flags, + unsigned int const cols ) { /*---------------------------------------------------------------------------- - Get a new row. + Scan one row, examining the row above and row below, and determine + whether there are "corners" for each pixel. Feed a 9 bit sample into + pre-calculated array transTable[512] to calculate all four corner statuses + at once. + + Bits in the 9 bit sample represent the current pixel and neighbors: + NW : N : NE : W: Current : E : SW : S : SE + + Bits in flag are divided into 4 fields of width 2 each: + NW : SW : SE : NE + + Code 0xff is an exception. It is a variation of 0x00. + 0x00 : no corners, no color change from above row (Current == N) + 0xff : no corners, but color changed (Current != N) + + Most transTable[] entries are "no corners". + 0x00 appears 180 times, 0xff 109 times. + + The following code is from the previous version, which examined + the corners one by one: + +// list of corner patterns; bit 7 is current color, bits 0-6 are squares +// around (excluding square behind), going clockwise. +// The high byte of the patterns is a mask, which determines which bits are +// not ignored. + +uint16_t const patterns[] + = { 0x0000, 0xd555, // no corner + 0x0001, 0xffc1, 0xd514, // normal corner + 0x0002, 0xd554, 0xd515, 0xbea2, 0xdfc0, 0xfd81, 0xfd80, 0xdf80, + // reduced corners + 0x0003, 0xbfa1, 0xfec2 }; // reduced if cutoff > 1 + + For example, the NE corner is examined with the following 8 bit sample: + Current : W : NW : N : NE : E : SE : S + (SW is the "square behind") + -----------------------------------------------------------------------------*/ - bit * shuffle; - - shuffle = inrow[0]; - inrow[0] = inrow[1]; - inrow[1] = inrow[2]; - inrow[2] = shuffle ; - - if (row < rows) { - if (shuffle == NULL) - inrow[2] = shuffle = pbm_allocrow(cols); - pbm_readpbmrow(ifP, inrow[2], cols, format); - } else - inrow[2] = NULL; /* discard storage */ + uint32_t prevrow24 = prevrow[0]; /* higher bits are set to 0 */ + uint32_t thisrow24 = thisrow[0]; + uint32_t nextrow24 = nextrow[0]; + unsigned int col; + + for (col = 0; col < cols; ++col) { + unsigned int sample; + unsigned int const col8=col/8; + unsigned int const offset=col%8; + + if(offset ==0){ + prevrow24 = prevrow24 << 8 | prevrow[ col8 +1 ]; + thisrow24 = thisrow24 << 8 | thisrow[ col8 +1 ]; + nextrow24 = nextrow24 << 8 | nextrow[ col8 +1 ]; + } + + sample= ( ( prevrow24 >> ( 8 -offset) ) & 0x01c0 ) + | ( ( thisrow24 >> (11 -offset) ) & 0x0038 ) + | ( ( nextrow24 >> (14 -offset) ) & 0x0007 ); + + flags[col] = transTable[sample]; + } } - static void -setFlags(unsigned char * flags) { - - unsigned int col; - - for (col = 0; col < cols; ++col) { - uint16_t const thispoint = (inrow[THISROW][col] != PBM_WHITE) << 7; - - unsigned int i, k; - unsigned char vec; - - vec = 0; /* initial value */ - - for (k = 0; k < 8; ++k) { - int const x = col + xd_pscale[k]; - int const y = THISROW + yd_pscale[k]; - vec <<= 1; - if (x >= 0 && x < cols && inrow[y]) - vec |= (inrow[y][x] != PBM_WHITE) ; - } - - vec = (vec >> 1 | vec << 7); - - flags[col] = 0 ; - for (i = 0; i != 8; i += 2) { - flags[col] |= corner(thispoint | (vec & 0x7f) ) << i ; - vec = (vec >> 6 | vec << 2); +expandRow(const bit * const thisrow, + const bit * const prevrow, + bit * const outrow, + unsigned char * const flags, + unsigned int const cols, + int const scale, + int const cutoff, + int const ucutoff) { +/*---------------------------------------------------------------------------- + Process one row, using flags array as reference. If pixel has no corners + output a NxN square of the given color, otherwise output with the + specified corner area(s) clipped off. +-----------------------------------------------------------------------------*/ + unsigned int i, col; + unsigned int const outcols = cols * scale; + + for (i = 0; i < scale; i++) { + unsigned int outcol=0; + + int const zone = (i > ucutoff) - (i < cutoff) ; + int const cut1 = (zone < 0) ? (cutoff - i) : + (zone > 0) ? (i - ucutoff) : 0 ; + int cut[4]; + cut[0] = 0; + cut[1] = cut1; + cut[2] = cut1 ? cut1-1 : 0; + cut[3] = (cut1 && cutoff > 1) ? cut1-1 : cut1 ; + + for (col = 0; col < cols; ++col) { + unsigned int const col8=col/8; + unsigned int const offset=col%8; + int const pix = ( thisrow[col8] >> (7-offset) ) &0x01 ; + int const flag = flags[col] ; + int cutl, cutr; + + if( flag == 0x00 ) { + /* There are no corners, no color change */ + outcol += scale; + } else { + + switch (zone) { + case -1: + if( i==0 && flag == 0xff ) { /* No corners, color changed */ + cutl = cutr = 0; + flags[col]=0x00; /* Use above skip procedure next cycle */ + } else { + cutl = cut[ NW(flag) ]; + cutr = cut[ NE(flag) ]; + } + break; + case 0: + cutl = cutr = 0; + break ; + case 1: + cutl = cut[ SW(flag) ]; + cutr = cut[ SE(flag) ]; + break; + } + + if (cutl>0){ + writeBitSpan(outrow, cutl, outcol, !pix); + outcol +=cutl; + } + { + unsigned int const center = scale-cutl-cutr; + if (center > 0){ + writeBitSpan(outrow, center, outcol, pix); + outcol +=center; + } + } + if (cutr>0){ + writeBitSpan(outrow, cutr, outcol, !pix); + outcol +=cutr; + } + } } + pbm_writepbmrow_packed(stdout, outrow, outcols, 0) ; } } - int main(int argc, char ** argv) { - FILE * ifP; + bit ** buffer; + bit * prevrow; + bit * thisrow; + bit * nextrow; + bit * edgerow; bit * outrow; - unsigned int row; - int scale; - unsigned int outcols; - unsigned int outrows; - int cutoff; - int ucutoff; - unsigned char * flags; + unsigned int row, i; + int scale, cols, rows, format, outcols, outrows, cutoff, ucutoff ; + unsigned char *flags; - pbm_init(&argc, argv); + pbm_init( &argc, argv ); - if (argc-1 < 1) - pm_error("You must specify the scale factor as an argument"); + if (argc < 2) + pm_usage("scale [pbmfile]"); scale = atoi(argv[1]); if (scale < 1) pm_error("Scale argument must be at least one. You specified '%s'", argv[1]); - if (argc-1 == 2) + if (argc == 3) ifP = pm_openr(argv[2]); else ifP = stdin ; - inrow[0] = inrow[1] = inrow[2] = NULL; - pbm_readpbminit(ifP, &cols, &rows, &format) ; validateComputableDimensions(cols, rows, scale); + outcols= cols * scale; outrows= rows * scale; - outcols = cols * scale; - outrows = rows * scale; + /* Initialize input buffers. + We add a margin of 8 bits on the right of the three rows. - outrow = pbm_allocrow(outcols); - MALLOCARRAY(flags, cols); - if (flags == NULL) - pm_error("out of memory") ; + On the top and bottom of the image we place an imaginary blank row + ("edgerow") to facilitate the process. + */ + + buffer = pbm_allocarray_packed(cols+8, 3); + edgerow = pbm_allocrow_packed(cols+8); + + for(i=0; i < pbm_packed_bytes(cols+8); ++i) + edgerow[i] = 0x00; + + for(i=0; i < 3; ++i) /* Add blank bytes at right edges */ + buffer[i][ pbm_packed_bytes( cols +8 ) -1 ] = 0x00; + + thisrow = edgerow; + nextrow = buffer[0]; + + /* Read the top line into nextrow and clean the right end. */ + + pbm_readpbmrow_packed(ifP, nextrow, cols, format); + if (cols%8>0){ + nextrow[pbm_packed_bytes(cols) -1 ] >>= (8 - cols%8); + nextrow[pbm_packed_bytes(cols) -1 ] <<= (8 - cols%8); + } + + outrow = pbm_allocrow_packed(outcols); + for(i=0; i < pbm_packed_bytes(outcols); ++i) + outrow[i] = 0x00; + + MALLOCARRAY_NOFAIL(flags, cols); pbm_writepbminit(stdout, outcols, outrows, 0) ; cutoff = scale / 2; ucutoff = scale - 1 - cutoff; - nextrow_pscale(ifP, 0); + for (row = 0; row < rows; ++row) { - unsigned int i; - nextrow_pscale(ifP, row + 1); - - setFlags(flags); - - for (i = 0; i < scale; ++i) { - int const zone = (i > ucutoff) - (i < cutoff) ; - int const cut = (zone < 0) ? (cutoff - i) : - (zone > 0) ? (i - ucutoff) : 0 ; - - unsigned int col; - unsigned int outcol; - - outcol = 0; /* initial value */ - - for (col = 0; col < cols; ++col) { - int const pix = inrow[THISROW][col] ; - int const flag = flags[col] ; - int cutl, cutr; - - switch (zone) { - case -1: - switch (NW(flag)) { - case 0: cutl = 0; break; - case 1: cutl = cut; break; - case 2: cutl = cut ? cut-1 : 0; break; - case 3: cutl = (cut && cutoff > 1) ? cut-1 : cut; break; - default: cutl = 0; /* Should never reach here */ - } - switch (NE(flag)) { - case 0: cutr = 0; break; - case 1: cutr = cut; break; - case 2: cutr = cut ? cut-1 : 0; break; - case 3: cutr = (cut && cutoff > 1) ? cut-1 : cut; break; - default: cutr = 0; /* Should never reach here */ - } - break; - case 0: - cutl = cutr = 0; - break ; - case 1: - switch (SW(flag)) { - case 0: cutl = 0; break; - case 1: cutl = cut; break; - case 2: cutl = cut ? cut-1 : 0; break; - case 3: cutl = (cut && cutoff > 1) ? cut-1 : cut; break; - default: cutl = 0; /* should never reach here */ - } - switch (SE(flag)) { - case 0: cutr = 0; break; - case 1: cutr = cut; break; - case 2: cutr = cut ? cut-1 : 0; break; - case 3: cutr = (cut && cutoff > 1) ? cut-1 : cut; break; - default: cutr = 0; /* should never reach here */ - } - break; - default: cutl = 0; cutr = 0; /* Should never reach here */ - } - - { - unsigned int k; - for (k = 0; k < cutl; ++k) /* left part */ - outrow[outcol++] = !pix ; - for (k = 0; k < scale-cutl-cutr; ++k) /* center part */ - outrow[outcol++] = pix ; - for (k = 0; k < cutr; ++k) /* right part */ - outrow[outcol++] = !pix ; - } - } - pbm_writepbmrow(stdout, outrow, scale * cols, 0) ; - } + prevrow = thisrow; /* Slide up the input row window */ + thisrow = nextrow; + if( row < rows -1){ + nextrow = buffer[(row+1)%3]; + /* We take the address directly instead of shuffling the rows. + This provision is for proper handling of the initial edgerow. */ + + pbm_readpbmrow_packed(ifP, nextrow, cols, format); + if (cols%8>0){ + nextrow[pbm_packed_bytes(cols) -1 ] >>= (8 - cols%8); + nextrow[pbm_packed_bytes(cols) -1 ] <<= (8 - cols%8); + } + } + else /* Bottom of image. */ + nextrow = edgerow; + + setFlags( prevrow, thisrow, nextrow, flags, cols); + + expandRow(thisrow, prevrow, outrow, flags, cols, scale, + cutoff, ucutoff); } + pbm_freearray(buffer,3); + pbm_freerow(edgerow); + pbm_freerow(outrow); + free (flags); pm_close(ifP); return 0; } -- cgit 1.4.1