4 files changed, 155 insertions, 58 deletions
diff --git a/editor/pamflip/Makefile b/editor/pamflip/Makefile
index 497c5379..83e961a7 100644
--- a/editor/pamflip/Makefile
+++ b/editor/pamflip/Makefile
@@ -5,6 +5,8 @@ endif
 SUBDIR = editor/pamflip
 VPATH=.:$(SRCDIR)/$(SUBDIR)
 
+default: all
+
 include $(BUILDDIR)/config.mk
 
 SUBDIRS =
@@ -21,10 +23,12 @@ OBJECTS = $(PAMFLIP_OBJECTS)
 
 MERGE_OBJECTS = $(OBJECTS:%.o=%.o2)
 
+include $(SRCDIR)/common.mk
+
 .PHONY: all
 all: $(BINARIES) $(SUBDIRS:%=%/all)
 
-include $(SRCDIR)/common.mk
+pamflip_sse.o pamflip_sse.o2: CFLAGS_TARGET = $(CFLAGS_SSE)
 
 pamflip: $(PAMFLIP_OBJECTS) $(NETPBMLIB) $(LIBOPT)
 	$(LD) -o $@ $(PAMFLIP_OBJECTS) \
diff --git a/editor/pamflip/pamflip_sse.c b/editor/pamflip/pamflip_sse.c
index 5a256f9d..eccbe965 100644
--- a/editor/pamflip/pamflip_sse.c
+++ b/editor/pamflip/pamflip_sse.c
@@ -28,7 +28,11 @@
 
 #include "pamflip_sse.h"
 
-#if HAVE_GCC_SSE2 && defined(__SSE2__)
+/* Note that WANT_SSE implies the user expects SSE to be available
+   (i.e. <emmintrin.h> exists).
+*/
+
+#if WANT_SSE && defined(__SSE2__)
 
 /*----------------------------------------------------------------------------
    This is a specialized routine for row-for-column PBM transformations.
@@ -68,6 +72,8 @@
    possibility.
 -----------------------------------------------------------------------------*/
 
+#include <emmintrin.h>
+
 typedef char v16qi __attribute__ ((vector_size (16)));
 typedef int  v4di  __attribute__ ((vector_size (16)));
 
@@ -82,10 +88,23 @@ typedef int  v4di  __attribute__ ((vector_size (16)));
    variable must be vector from the beginning.
 
    Changes for your local system are okay, but if you intend to
-   publish the them, please specify the compiler version you used.
+   publish them, please specify the compiler version you used.
+
+   This code has been tested on gcc versions 4.2.0, 4.2.4, 4.3.2,
+   4.4.3, 4.4.4, 4.5.0, 4.5.2, 4.6.0 and 4.6.1 clang versions
+   3.0, 3.2, 3.3.
+
+   We use SSE instructions in "_mm_" form in favor of "__builtin_".
+   In GCC the "__builtin_" form is documented but "_mm_" is not.
+   Former versions of this source file used "__builtin_".  This was
+   changed to make possible compilation with clang.
+
+   _mm_slli_epi32 : __builtin_ia32_pslldi128
+   _mm_cmpeq_epi8 : __builtin_ia32_pcmpeqb128
+   _mm_movemask_epi8 : __builtin_ia32_pmovmskb128
+
+   The conversion requires <emmintrin.h> .
 
-   This code has been tested on gcc versions: 4.2.0, 4.2.4, 4.3.2,
-   4.4.3, 4.4.4 and 4.5.0 .
 */
 
 
@@ -133,9 +152,10 @@ transpose16Bitrows(unsigned int const cols,
             block[12][col8], block[13][col8],
             block[14][col8], block[15][col8] };
 
-        register v16qi const compare =__builtin_ia32_pcmpeqb128(vReg,zero128);
+        register __m128i const compare =
+            _mm_cmpeq_epi8((__m128i)vReg, (__m128i)zero128);
 
-        if (__builtin_ia32_pmovmskb128(compare) != 0xffff) {
+        if (_mm_movemask_epi8(compare) != 0xffff) {
 
             /* There is some black content in this block; write to outplane */
             
@@ -147,10 +167,10 @@ transpose16Bitrows(unsigned int const cols,
             for (i = 0; i < 7; ++i) {
                 /* GCC (>=4.2) automatically unrolls this loop */  
                 outplane[outrow++][outcol16] =
-                    __builtin_ia32_pmovmskb128(vReg);
-                vReg = (v16qi)__builtin_ia32_pslldi128 ((v4di)vReg, 1);
+                    _mm_movemask_epi8((__m128i)vReg);
+                vReg = (v16qi)_mm_slli_epi32((__m128i)vReg, 1);
             }
-            outplane[outrow][outcol16] = __builtin_ia32_pmovmskb128(vReg);
+            outplane[outrow][outcol16] = _mm_movemask_epi8((__m128i)vReg);
         } else {
             /* The block is completely white; skip. */
         }
@@ -391,12 +411,12 @@ pamflip_transformRowsToColumnsPbmSse(const struct pam * const inpamP,
     pbm_freearray(outplane, outpamP->height + 7);
     pbm_freearray(inrow, 16);
 }
-#else  /* SSE functions exist */
+#else  /* WANT_SSE */
 
 void
 pamflip_transformRowsToColumnsPbmSse(const struct pam * const inpamP,
                                      const struct pam * const outpamP,
-                                     struct xformCore const xformCore) { 
+                                     struct xformCore   const xformCore) { 
 
     /* Nobody is supposed to call this */
     assert(false);
diff --git a/editor/pamfunc.c b/editor/pamfunc.c
index 2f6ac682..5945b82d 100644
--- a/editor/pamfunc.c
+++ b/editor/pamfunc.c
@@ -13,8 +13,8 @@
   multiply/divide where possible.  Especially when multiplying by an 
   integer.
 
-  2) For multiply/divide, give option of simply changing the maxval and
-  leaving the raster alone.
+  2) speed up by not transforming the raster in the idempotent cases
+  (e.g. multiply by one).
 
 ******************************************************************************/
 
@@ -23,7 +23,7 @@
 #include "shhopt.h"
 #include "pam.h"
 
-enum function {
+enum Function {
     FN_MULTIPLY,
     FN_DIVIDE,
     FN_ADD,
@@ -42,22 +42,23 @@ enum function {
    a "max" function.
 */
 
-struct cmdlineInfo {
+struct CmdlineInfo {
     /* All the information the user supplied in the command line,
        in a form easy for the program to use.
     */
-    const char *inputFilespec;  /* Filespec of input file */
-    enum function function;
+    const char * inputFileName;
+    enum Function function;
     union {
-        float multiplier;
-        float divisor;
-        int adder;
-        int subtractor;
+        float        multiplier;
+        float        divisor;
+        int          adder;
+        int          subtractor;
         unsigned int max;
         unsigned int min;
         unsigned int mask;
         unsigned int shiftCount;
     } u;
+    unsigned int changemaxval;
     unsigned int verbose;
 };
 
@@ -80,8 +81,8 @@ parseHex(const char * const hexString) {
          
 
 static void
-parseCommandLine(int argc, char ** const argv,
-                 struct cmdlineInfo * const cmdlineP) {
+parseCommandLine(int argc, const char ** const argv,
+                 struct CmdlineInfo * const cmdlineP) {
 /*----------------------------------------------------------------------------
    Note that the file spec array we return is stored in the storage that
    was passed to us as the argv array.
@@ -126,13 +127,16 @@ parseCommandLine(int argc, char ** const argv,
             &shiftleftSpec,  0);
     OPTENT3(0,   "shiftright", OPT_UINT,   &cmdlineP->u.shiftCount,
             &shiftrightSpec, 0);
-    OPTENT3(0,   "verbose",    OPT_FLAG,   NULL, &cmdlineP->verbose,       0);
+    OPTENT3(0,   "verbose",      OPT_FLAG,   NULL, &cmdlineP->verbose,
+            0);
+    OPTENT3(0,   "changemaxval", OPT_FLAG,   NULL, &cmdlineP->changemaxval,
+            0);
 
     opt.opt_table = option_def;
     opt.short_allowed = FALSE;  /* We have no short (old-fashioned) options */
     opt.allowNegNum = FALSE;  /* We have no parms that are negative numbers */
 
-    pm_optParseOptions3(&argc, argv, opt, sizeof(opt), 0);
+    pm_optParseOptions3(&argc, (char **)argv, opt, sizeof(opt), 0);
         /* Uses and sets argc, argv, and some of *cmdlineP and others. */
 
     if (multiplierSpec + divisorSpec + adderSpec + subtractorSpec +
@@ -186,16 +190,17 @@ parseCommandLine(int argc, char ** const argv,
                  argc-1);
 
     if (argc-1 < 1)
-        cmdlineP->inputFilespec = "-";
+        cmdlineP->inputFileName = "-";
     else 
-        cmdlineP->inputFilespec = argv[1];
+        cmdlineP->inputFileName = argv[1];
     
+    free(option_def);
 }
 
 
 
 static bool
-isDyadicMaskFunction(enum function const fn) {
+isDyadicMaskFunction(enum Function const fn) {
 
     return (fn == FN_AND || fn == FN_OR || fn == FN_XOR);
 }
@@ -203,7 +208,7 @@ isDyadicMaskFunction(enum function const fn) {
 
 
 static bool
-isMaskFunction(enum function const fn) {
+isMaskFunction(enum Function const fn) {
 
     return (isDyadicMaskFunction(fn) || fn == FN_NOT);
 }
@@ -211,7 +216,7 @@ isMaskFunction(enum function const fn) {
 
 
 static bool
-isShiftFunction(enum function const fn) {
+isShiftFunction(enum Function const fn) {
 
     return (fn == FN_SHIFTLEFT || fn == FN_SHIFTRIGHT);
 }
@@ -219,7 +224,7 @@ isShiftFunction(enum function const fn) {
 
 
 static bool
-isBitstringFunction(enum function const fn) {
+isBitstringFunction(enum Function const fn) {
 
     return isMaskFunction(fn) || isShiftFunction(fn);
 }
@@ -227,7 +232,7 @@ isBitstringFunction(enum function const fn) {
 
 
 static void
-validateFunction(struct cmdlineInfo const cmdline,
+validateFunction(struct CmdlineInfo const cmdline,
                  const struct pam * const pamP) {
 
     if (isBitstringFunction(cmdline.function)) {
@@ -259,7 +264,58 @@ validateFunction(struct cmdlineInfo const cmdline,
 
 
 static void
-applyFunction(struct cmdlineInfo const cmdline,
+planTransform(struct CmdlineInfo const cmdline,
+              sample             const inputMaxval,
+              sample *           const outputMaxvalP,
+              bool *             const mustChangeRasterP) {
+/*----------------------------------------------------------------------------
+   Plan the transform described by 'cmdline', given the maxval of the input
+   image is 'inputMaxval.
+
+   The plan just consists of whether to change the maxval or the raster.
+   Some multiplications and divisions can be achieved just by changing the
+   maxval and leaving the samples in the raster alone.
+-----------------------------------------------------------------------------*/
+    if (cmdline.changemaxval) {
+        /* User allows us to change the maxval, if that makes it easier */
+        if (cmdline.function == FN_MULTIPLY || cmdline.function == FN_DIVIDE) {
+            float const multiplier =
+                cmdline.function == FN_MULTIPLY ? cmdline.u.multiplier :
+                (1/cmdline.u.divisor);
+
+            float const neededMaxval = inputMaxval / multiplier;
+
+            if (neededMaxval + 0.5 < inputMaxval) {
+                /* Lowering the maxval might make some of the sample values
+                   higher than the maxval, so we'd have to modify the raster
+                   to clip them.
+                */
+                *outputMaxvalP     = inputMaxval;
+                *mustChangeRasterP = true;
+            } else if (neededMaxval > PAM_OVERALL_MAXVAL) {
+                *outputMaxvalP     = inputMaxval;
+                *mustChangeRasterP = true;
+            } else {
+                *outputMaxvalP     = ROUNDU(neededMaxval);
+                *mustChangeRasterP = false;
+            }
+        } else {
+            *outputMaxvalP     = inputMaxval;
+            *mustChangeRasterP = true;
+        }
+    } else {
+        *outputMaxvalP     = inputMaxval;
+        *mustChangeRasterP = true;
+    }
+    if (*outputMaxvalP != inputMaxval)
+        pm_message("Changing maxval to %u because of -changemaxval",
+                   (unsigned)*outputMaxvalP);
+}
+
+
+
+static void
+applyFunction(struct CmdlineInfo const cmdline,
               struct pam         const inpam,
               struct pam         const outpam,
               tuple *            const inputRow,
@@ -275,10 +331,10 @@ applyFunction(struct cmdlineInfo const cmdline,
            divide, both cmdline.u.divisor and oneOverDivisor are
            meaningless.  
         */
-    int col;
+    unsigned int col;
 
     for (col = 0; col < inpam.width; ++col) {
-        int plane;
+        unsigned int plane;
         for (plane = 0; plane < inpam.depth; ++plane) {
             sample const inSample = inputRow[col][plane];
             sample outSample;  /* Could be > maxval  */
@@ -330,21 +386,22 @@ applyFunction(struct cmdlineInfo const cmdline,
 
 
 int
-main(int argc, char *argv[]) {
+main(int argc, const char *argv[]) {
 
     FILE * ifP;
     tuple * inputRow;   /* Row from input image */
     tuple * outputRow;  /* Row of output image */
-    int row;
-    struct cmdlineInfo cmdline;
+    unsigned int row;
+    struct CmdlineInfo cmdline;
     struct pam inpam;   /* Input PAM image */
     struct pam outpam;  /* Output PAM image */
+    bool mustChangeRaster;
 
-    pnm_init(&argc, argv);
+    pm_proginit(&argc, argv);
 
     parseCommandLine(argc, argv, &cmdline);
 
-    ifP = pm_openr(cmdline.inputFilespec);
+    ifP = pm_openr(cmdline.inputFileName);
 
     pnm_readpaminit(ifP, &inpam, PAM_STRUCT_SIZE(tuple_type));
 
@@ -355,16 +412,21 @@ main(int argc, char *argv[]) {
     outpam = inpam;    /* Initial value -- most fields should be same */
     outpam.file = stdout;
 
+    planTransform(cmdline, inpam.maxval, &outpam.maxval, &mustChangeRaster);
+
     pnm_writepaminit(&outpam);
 
     outputRow = pnm_allocpamrow(&outpam);
 
-    for (row = 0; row < inpam.height; row++) {
+    for (row = 0; row < inpam.height; ++row) {
         pnm_readpamrow(&inpam, inputRow);
 
-        applyFunction(cmdline, inpam, outpam, inputRow, outputRow);
+        if (mustChangeRaster) {
+            applyFunction(cmdline, inpam, outpam, inputRow, outputRow);
 
-        pnm_writepamrow(&outpam, outputRow);
+            pnm_writepamrow(&outpam, outputRow);
+        } else
+            pnm_writepamrow(&outpam, inputRow);
     }
     pnm_freepamrow(outputRow);
     pnm_freepamrow(inputRow);
@@ -374,3 +436,5 @@ main(int argc, char *argv[]) {
     return 0;
 }
 
+
+
diff --git a/editor/pnmshear.c b/editor/pnmshear.c
index 657f265d..99fa3026 100644
--- a/editor/pnmshear.c
+++ b/editor/pnmshear.c
@@ -1,4 +1,4 @@
-/* pnmshear.c - read a portable anymap and shear it by some angle
+ /* pnmshear.c - read a portable anymap and shear it by some angle
 **
 ** Copyright (C) 1989, 1991 by Jef Poskanzer.
 **
@@ -17,6 +17,7 @@
 #include <string.h>
 
 #include "pm_c_util.h"
+#include "mallocvar.h"
 #include "ppm.h"
 #include "pnm.h"
 #include "shhopt.h"
@@ -24,11 +25,13 @@
 #define SCALE 4096
 #define HALFSCALE 2048
 
-struct cmdline_info {
+
+
+struct CmdlineInfo {
     /* All the information the user supplied in the command line,
        in a form easy for the program to use.
     */
-    const char *       input_filespec;  /* Filespec of input file */
+    const char * inputFileName;   /* Name of input file */
     double       angle;           /* requested shear angle, in radians */
     unsigned int noantialias;     /* -noantialias option */
     const char * background;      /* NULL if none */
@@ -37,15 +40,17 @@ struct cmdline_info {
 
 
 static void
-parseCommandLine(int argc, char ** argv,
-                 struct cmdline_info *cmdlineP) {
+parseCommandLine(int argc, const char ** argv,
+                 struct CmdlineInfo *cmdlineP) {
 
     optStruct3 opt;
     unsigned int option_def_index = 0;
-    optEntry *option_def = malloc(100*sizeof(optEntry));
+    optEntry * option_def;
 
     unsigned int backgroundSpec;
 
+    MALLOCARRAY(option_def, 100);
+
     OPTENT3(0, "noantialias",      OPT_FLAG,  NULL, &cmdlineP->noantialias, 0);
     OPTENT3(0, "background",       OPT_STRING, &cmdlineP->background,
             &backgroundSpec, 0);
@@ -54,7 +59,7 @@ parseCommandLine(int argc, char ** argv,
     opt.short_allowed = FALSE;
     opt.allowNegNum = TRUE;
 
-    pm_optParseOptions3(&argc, argv, opt, sizeof(opt), 0);
+    pm_optParseOptions3(&argc, (char **)argv, opt, sizeof(opt), 0);
 
     if (!backgroundSpec)
         cmdlineP->background = NULL;
@@ -68,15 +73,16 @@ parseCommandLine(int argc, char ** argv,
             pm_error("Angle argument is not a valid floating point number: "
                      "'%s'", argv[1]);
         if (argc-1 < 2)
-            cmdlineP->input_filespec = "-";
+            cmdlineP->inputFileName = "-";
         else {
-            cmdlineP->input_filespec = argv[2];
+            cmdlineP->inputFileName = argv[2];
             if (argc-1 > 2)
                 pm_error("too many arguments (%d).  "
                          "The only arguments are shear angle and filespec.",
                          argc-1);
         }
     }
+    free(option_def);
 }
 
 
@@ -200,7 +206,7 @@ backgroundColor(const char * const backgroundColorName,
 
 
 int
-main(int argc, char * argv[]) {
+main(int argc, const char * argv[]) {
 
     FILE * ifP;
     xel * xelrow;
@@ -212,13 +218,13 @@ main(int argc, char * argv[]) {
     xelval maxval, newmaxval;
     double shearfac;
 
-    struct cmdline_info cmdline;
+    struct CmdlineInfo cmdline;
 
-    pnm_init(&argc, argv);
+    pm_proginit(&argc, argv);
 
     parseCommandLine(argc, argv, &cmdline);
 
-    ifP = pm_openr(cmdline.input_filespec);
+    ifP = pm_openr(cmdline.inputFileName);
 
     pnm_readpnminit(ifP, &cols, &rows, &maxval, &format);
     xelrow = pnm_allocrow(cols);
@@ -256,7 +262,7 @@ main(int argc, char * argv[]) {
             shearCols = (rows - row) * shearfac;
 
         shearRow(xelrow, cols, newxelrow, newcols, 
-                  shearCols, format, bgxel, !cmdline.noantialias);
+                 shearCols, format, bgxel, !cmdline.noantialias);
 
         pnm_writepnmrow(stdout, newxelrow, newcols, newmaxval, newformat, 0);
     }
@@ -266,3 +272,6 @@ main(int argc, char * argv[]) {
 
     return 0;
 }
+
+
+