diff options
Diffstat (limited to 'editor')
-rw-r--r-- | editor/pamflip/Makefile | 6 | ||||
-rw-r--r-- | editor/pamflip/pamflip_sse.c | 42 | ||||
-rw-r--r-- | editor/pamfunc.c | 128 | ||||
-rw-r--r-- | editor/pnmshear.c | 37 |
4 files changed, 155 insertions, 58 deletions
diff --git a/editor/pamflip/Makefile b/editor/pamflip/Makefile index 497c5379..83e961a7 100644 --- a/editor/pamflip/Makefile +++ b/editor/pamflip/Makefile @@ -5,6 +5,8 @@ endif SUBDIR = editor/pamflip VPATH=.:$(SRCDIR)/$(SUBDIR) +default: all + include $(BUILDDIR)/config.mk SUBDIRS = @@ -21,10 +23,12 @@ OBJECTS = $(PAMFLIP_OBJECTS) MERGE_OBJECTS = $(OBJECTS:%.o=%.o2) +include $(SRCDIR)/common.mk + .PHONY: all all: $(BINARIES) $(SUBDIRS:%=%/all) -include $(SRCDIR)/common.mk +pamflip_sse.o pamflip_sse.o2: CFLAGS_TARGET = $(CFLAGS_SSE) pamflip: $(PAMFLIP_OBJECTS) $(NETPBMLIB) $(LIBOPT) $(LD) -o $@ $(PAMFLIP_OBJECTS) \ diff --git a/editor/pamflip/pamflip_sse.c b/editor/pamflip/pamflip_sse.c index 5a256f9d..eccbe965 100644 --- a/editor/pamflip/pamflip_sse.c +++ b/editor/pamflip/pamflip_sse.c @@ -28,7 +28,11 @@ #include "pamflip_sse.h" -#if HAVE_GCC_SSE2 && defined(__SSE2__) +/* Note that WANT_SSE implies the user expects SSE to be available + (i.e. <emmintrin.h> exists). +*/ + +#if WANT_SSE && defined(__SSE2__) /*---------------------------------------------------------------------------- This is a specialized routine for row-for-column PBM transformations. @@ -68,6 +72,8 @@ possibility. -----------------------------------------------------------------------------*/ +#include <emmintrin.h> + typedef char v16qi __attribute__ ((vector_size (16))); typedef int v4di __attribute__ ((vector_size (16))); @@ -82,10 +88,23 @@ typedef int v4di __attribute__ ((vector_size (16))); variable must be vector from the beginning. Changes for your local system are okay, but if you intend to - publish the them, please specify the compiler version you used. + publish them, please specify the compiler version you used. + + This code has been tested on gcc versions 4.2.0, 4.2.4, 4.3.2, + 4.4.3, 4.4.4, 4.5.0, 4.5.2, 4.6.0 and 4.6.1 clang versions + 3.0, 3.2, 3.3. + + We use SSE instructions in "_mm_" form in favor of "__builtin_". + In GCC the "__builtin_" form is documented but "_mm_" is not. + Former versions of this source file used "__builtin_". This was + changed to make possible compilation with clang. + + _mm_slli_epi32 : __builtin_ia32_pslldi128 + _mm_cmpeq_epi8 : __builtin_ia32_pcmpeqb128 + _mm_movemask_epi8 : __builtin_ia32_pmovmskb128 + + The conversion requires <emmintrin.h> . - This code has been tested on gcc versions: 4.2.0, 4.2.4, 4.3.2, - 4.4.3, 4.4.4 and 4.5.0 . */ @@ -133,9 +152,10 @@ transpose16Bitrows(unsigned int const cols, block[12][col8], block[13][col8], block[14][col8], block[15][col8] }; - register v16qi const compare =__builtin_ia32_pcmpeqb128(vReg,zero128); + register __m128i const compare = + _mm_cmpeq_epi8((__m128i)vReg, (__m128i)zero128); - if (__builtin_ia32_pmovmskb128(compare) != 0xffff) { + if (_mm_movemask_epi8(compare) != 0xffff) { /* There is some black content in this block; write to outplane */ @@ -147,10 +167,10 @@ transpose16Bitrows(unsigned int const cols, for (i = 0; i < 7; ++i) { /* GCC (>=4.2) automatically unrolls this loop */ outplane[outrow++][outcol16] = - __builtin_ia32_pmovmskb128(vReg); - vReg = (v16qi)__builtin_ia32_pslldi128 ((v4di)vReg, 1); + _mm_movemask_epi8((__m128i)vReg); + vReg = (v16qi)_mm_slli_epi32((__m128i)vReg, 1); } - outplane[outrow][outcol16] = __builtin_ia32_pmovmskb128(vReg); + outplane[outrow][outcol16] = _mm_movemask_epi8((__m128i)vReg); } else { /* The block is completely white; skip. */ } @@ -391,12 +411,12 @@ pamflip_transformRowsToColumnsPbmSse(const struct pam * const inpamP, pbm_freearray(outplane, outpamP->height + 7); pbm_freearray(inrow, 16); } -#else /* SSE functions exist */ +#else /* WANT_SSE */ void pamflip_transformRowsToColumnsPbmSse(const struct pam * const inpamP, const struct pam * const outpamP, - struct xformCore const xformCore) { + struct xformCore const xformCore) { /* Nobody is supposed to call this */ assert(false); diff --git a/editor/pamfunc.c b/editor/pamfunc.c index 2f6ac682..5945b82d 100644 --- a/editor/pamfunc.c +++ b/editor/pamfunc.c @@ -13,8 +13,8 @@ multiply/divide where possible. Especially when multiplying by an integer. - 2) For multiply/divide, give option of simply changing the maxval and - leaving the raster alone. + 2) speed up by not transforming the raster in the idempotent cases + (e.g. multiply by one). ******************************************************************************/ @@ -23,7 +23,7 @@ #include "shhopt.h" #include "pam.h" -enum function { +enum Function { FN_MULTIPLY, FN_DIVIDE, FN_ADD, @@ -42,22 +42,23 @@ enum function { a "max" function. */ -struct cmdlineInfo { +struct CmdlineInfo { /* All the information the user supplied in the command line, in a form easy for the program to use. */ - const char *inputFilespec; /* Filespec of input file */ - enum function function; + const char * inputFileName; + enum Function function; union { - float multiplier; - float divisor; - int adder; - int subtractor; + float multiplier; + float divisor; + int adder; + int subtractor; unsigned int max; unsigned int min; unsigned int mask; unsigned int shiftCount; } u; + unsigned int changemaxval; unsigned int verbose; }; @@ -80,8 +81,8 @@ parseHex(const char * const hexString) { static void -parseCommandLine(int argc, char ** const argv, - struct cmdlineInfo * const cmdlineP) { +parseCommandLine(int argc, const char ** const argv, + struct CmdlineInfo * const cmdlineP) { /*---------------------------------------------------------------------------- Note that the file spec array we return is stored in the storage that was passed to us as the argv array. @@ -126,13 +127,16 @@ parseCommandLine(int argc, char ** const argv, &shiftleftSpec, 0); OPTENT3(0, "shiftright", OPT_UINT, &cmdlineP->u.shiftCount, &shiftrightSpec, 0); - OPTENT3(0, "verbose", OPT_FLAG, NULL, &cmdlineP->verbose, 0); + OPTENT3(0, "verbose", OPT_FLAG, NULL, &cmdlineP->verbose, + 0); + OPTENT3(0, "changemaxval", OPT_FLAG, NULL, &cmdlineP->changemaxval, + 0); opt.opt_table = option_def; opt.short_allowed = FALSE; /* We have no short (old-fashioned) options */ opt.allowNegNum = FALSE; /* We have no parms that are negative numbers */ - pm_optParseOptions3(&argc, argv, opt, sizeof(opt), 0); + pm_optParseOptions3(&argc, (char **)argv, opt, sizeof(opt), 0); /* Uses and sets argc, argv, and some of *cmdlineP and others. */ if (multiplierSpec + divisorSpec + adderSpec + subtractorSpec + @@ -186,16 +190,17 @@ parseCommandLine(int argc, char ** const argv, argc-1); if (argc-1 < 1) - cmdlineP->inputFilespec = "-"; + cmdlineP->inputFileName = "-"; else - cmdlineP->inputFilespec = argv[1]; + cmdlineP->inputFileName = argv[1]; + free(option_def); } static bool -isDyadicMaskFunction(enum function const fn) { +isDyadicMaskFunction(enum Function const fn) { return (fn == FN_AND || fn == FN_OR || fn == FN_XOR); } @@ -203,7 +208,7 @@ isDyadicMaskFunction(enum function const fn) { static bool -isMaskFunction(enum function const fn) { +isMaskFunction(enum Function const fn) { return (isDyadicMaskFunction(fn) || fn == FN_NOT); } @@ -211,7 +216,7 @@ isMaskFunction(enum function const fn) { static bool -isShiftFunction(enum function const fn) { +isShiftFunction(enum Function const fn) { return (fn == FN_SHIFTLEFT || fn == FN_SHIFTRIGHT); } @@ -219,7 +224,7 @@ isShiftFunction(enum function const fn) { static bool -isBitstringFunction(enum function const fn) { +isBitstringFunction(enum Function const fn) { return isMaskFunction(fn) || isShiftFunction(fn); } @@ -227,7 +232,7 @@ isBitstringFunction(enum function const fn) { static void -validateFunction(struct cmdlineInfo const cmdline, +validateFunction(struct CmdlineInfo const cmdline, const struct pam * const pamP) { if (isBitstringFunction(cmdline.function)) { @@ -259,7 +264,58 @@ validateFunction(struct cmdlineInfo const cmdline, static void -applyFunction(struct cmdlineInfo const cmdline, +planTransform(struct CmdlineInfo const cmdline, + sample const inputMaxval, + sample * const outputMaxvalP, + bool * const mustChangeRasterP) { +/*---------------------------------------------------------------------------- + Plan the transform described by 'cmdline', given the maxval of the input + image is 'inputMaxval. + + The plan just consists of whether to change the maxval or the raster. + Some multiplications and divisions can be achieved just by changing the + maxval and leaving the samples in the raster alone. +-----------------------------------------------------------------------------*/ + if (cmdline.changemaxval) { + /* User allows us to change the maxval, if that makes it easier */ + if (cmdline.function == FN_MULTIPLY || cmdline.function == FN_DIVIDE) { + float const multiplier = + cmdline.function == FN_MULTIPLY ? cmdline.u.multiplier : + (1/cmdline.u.divisor); + + float const neededMaxval = inputMaxval / multiplier; + + if (neededMaxval + 0.5 < inputMaxval) { + /* Lowering the maxval might make some of the sample values + higher than the maxval, so we'd have to modify the raster + to clip them. + */ + *outputMaxvalP = inputMaxval; + *mustChangeRasterP = true; + } else if (neededMaxval > PAM_OVERALL_MAXVAL) { + *outputMaxvalP = inputMaxval; + *mustChangeRasterP = true; + } else { + *outputMaxvalP = ROUNDU(neededMaxval); + *mustChangeRasterP = false; + } + } else { + *outputMaxvalP = inputMaxval; + *mustChangeRasterP = true; + } + } else { + *outputMaxvalP = inputMaxval; + *mustChangeRasterP = true; + } + if (*outputMaxvalP != inputMaxval) + pm_message("Changing maxval to %u because of -changemaxval", + (unsigned)*outputMaxvalP); +} + + + +static void +applyFunction(struct CmdlineInfo const cmdline, struct pam const inpam, struct pam const outpam, tuple * const inputRow, @@ -275,10 +331,10 @@ applyFunction(struct cmdlineInfo const cmdline, divide, both cmdline.u.divisor and oneOverDivisor are meaningless. */ - int col; + unsigned int col; for (col = 0; col < inpam.width; ++col) { - int plane; + unsigned int plane; for (plane = 0; plane < inpam.depth; ++plane) { sample const inSample = inputRow[col][plane]; sample outSample; /* Could be > maxval */ @@ -330,21 +386,22 @@ applyFunction(struct cmdlineInfo const cmdline, int -main(int argc, char *argv[]) { +main(int argc, const char *argv[]) { FILE * ifP; tuple * inputRow; /* Row from input image */ tuple * outputRow; /* Row of output image */ - int row; - struct cmdlineInfo cmdline; + unsigned int row; + struct CmdlineInfo cmdline; struct pam inpam; /* Input PAM image */ struct pam outpam; /* Output PAM image */ + bool mustChangeRaster; - pnm_init(&argc, argv); + pm_proginit(&argc, argv); parseCommandLine(argc, argv, &cmdline); - ifP = pm_openr(cmdline.inputFilespec); + ifP = pm_openr(cmdline.inputFileName); pnm_readpaminit(ifP, &inpam, PAM_STRUCT_SIZE(tuple_type)); @@ -355,16 +412,21 @@ main(int argc, char *argv[]) { outpam = inpam; /* Initial value -- most fields should be same */ outpam.file = stdout; + planTransform(cmdline, inpam.maxval, &outpam.maxval, &mustChangeRaster); + pnm_writepaminit(&outpam); outputRow = pnm_allocpamrow(&outpam); - for (row = 0; row < inpam.height; row++) { + for (row = 0; row < inpam.height; ++row) { pnm_readpamrow(&inpam, inputRow); - applyFunction(cmdline, inpam, outpam, inputRow, outputRow); + if (mustChangeRaster) { + applyFunction(cmdline, inpam, outpam, inputRow, outputRow); - pnm_writepamrow(&outpam, outputRow); + pnm_writepamrow(&outpam, outputRow); + } else + pnm_writepamrow(&outpam, inputRow); } pnm_freepamrow(outputRow); pnm_freepamrow(inputRow); @@ -374,3 +436,5 @@ main(int argc, char *argv[]) { return 0; } + + diff --git a/editor/pnmshear.c b/editor/pnmshear.c index 657f265d..99fa3026 100644 --- a/editor/pnmshear.c +++ b/editor/pnmshear.c @@ -1,4 +1,4 @@ -/* pnmshear.c - read a portable anymap and shear it by some angle + /* pnmshear.c - read a portable anymap and shear it by some angle ** ** Copyright (C) 1989, 1991 by Jef Poskanzer. ** @@ -17,6 +17,7 @@ #include <string.h> #include "pm_c_util.h" +#include "mallocvar.h" #include "ppm.h" #include "pnm.h" #include "shhopt.h" @@ -24,11 +25,13 @@ #define SCALE 4096 #define HALFSCALE 2048 -struct cmdline_info { + + +struct CmdlineInfo { /* All the information the user supplied in the command line, in a form easy for the program to use. */ - const char * input_filespec; /* Filespec of input file */ + const char * inputFileName; /* Name of input file */ double angle; /* requested shear angle, in radians */ unsigned int noantialias; /* -noantialias option */ const char * background; /* NULL if none */ @@ -37,15 +40,17 @@ struct cmdline_info { static void -parseCommandLine(int argc, char ** argv, - struct cmdline_info *cmdlineP) { +parseCommandLine(int argc, const char ** argv, + struct CmdlineInfo *cmdlineP) { optStruct3 opt; unsigned int option_def_index = 0; - optEntry *option_def = malloc(100*sizeof(optEntry)); + optEntry * option_def; unsigned int backgroundSpec; + MALLOCARRAY(option_def, 100); + OPTENT3(0, "noantialias", OPT_FLAG, NULL, &cmdlineP->noantialias, 0); OPTENT3(0, "background", OPT_STRING, &cmdlineP->background, &backgroundSpec, 0); @@ -54,7 +59,7 @@ parseCommandLine(int argc, char ** argv, opt.short_allowed = FALSE; opt.allowNegNum = TRUE; - pm_optParseOptions3(&argc, argv, opt, sizeof(opt), 0); + pm_optParseOptions3(&argc, (char **)argv, opt, sizeof(opt), 0); if (!backgroundSpec) cmdlineP->background = NULL; @@ -68,15 +73,16 @@ parseCommandLine(int argc, char ** argv, pm_error("Angle argument is not a valid floating point number: " "'%s'", argv[1]); if (argc-1 < 2) - cmdlineP->input_filespec = "-"; + cmdlineP->inputFileName = "-"; else { - cmdlineP->input_filespec = argv[2]; + cmdlineP->inputFileName = argv[2]; if (argc-1 > 2) pm_error("too many arguments (%d). " "The only arguments are shear angle and filespec.", argc-1); } } + free(option_def); } @@ -200,7 +206,7 @@ backgroundColor(const char * const backgroundColorName, int -main(int argc, char * argv[]) { +main(int argc, const char * argv[]) { FILE * ifP; xel * xelrow; @@ -212,13 +218,13 @@ main(int argc, char * argv[]) { xelval maxval, newmaxval; double shearfac; - struct cmdline_info cmdline; + struct CmdlineInfo cmdline; - pnm_init(&argc, argv); + pm_proginit(&argc, argv); parseCommandLine(argc, argv, &cmdline); - ifP = pm_openr(cmdline.input_filespec); + ifP = pm_openr(cmdline.inputFileName); pnm_readpnminit(ifP, &cols, &rows, &maxval, &format); xelrow = pnm_allocrow(cols); @@ -256,7 +262,7 @@ main(int argc, char * argv[]) { shearCols = (rows - row) * shearfac; shearRow(xelrow, cols, newxelrow, newcols, - shearCols, format, bgxel, !cmdline.noantialias); + shearCols, format, bgxel, !cmdline.noantialias); pnm_writepnmrow(stdout, newxelrow, newcols, newmaxval, newformat, 0); } @@ -266,3 +272,6 @@ main(int argc, char * argv[]) { return 0; } + + + |