about summary refs log tree commit diff
diff options
context:
space:
mode:
authorgiraffedata <giraffedata@9d0c8265-081b-0410-96cb-a4ca84ce46f8>2010-05-18 02:37:18 +0000
committergiraffedata <giraffedata@9d0c8265-081b-0410-96cb-a4ca84ce46f8>2010-05-18 02:37:18 +0000
commit60a0d64307394bad62684c32bbccf89552d15090 (patch)
treeba89fd811ea731b9a0cc0e64ff4fe9c05e92f0b2
parent559136d4679d53037e4b74dd3f833b2ea89ccc05 (diff)
downloadnetpbm-mirror-60a0d64307394bad62684c32bbccf89552d15090.tar.gz
netpbm-mirror-60a0d64307394bad62684c32bbccf89552d15090.tar.xz
netpbm-mirror-60a0d64307394bad62684c32bbccf89552d15090.zip
Pamflip PBM speedup with SSE et al
git-svn-id: http://svn.code.sf.net/p/netpbm/code/trunk@1214 9d0c8265-081b-0410-96cb-a4ca84ce46f8
-rw-r--r--doc/HISTORY5
-rw-r--r--editor/Makefile5
-rw-r--r--editor/pamflip/Makefile32
-rw-r--r--editor/pamflip/flip.h16
-rw-r--r--editor/pamflip/pamflip.c (renamed from editor/pamflip.c)82
-rw-r--r--editor/pamflip/pamflip.test (renamed from editor/pamflip.test)0
-rw-r--r--editor/pamflip/pamflip_sse.c345
-rw-r--r--editor/pamflip/pamflip_sse.h12
8 files changed, 464 insertions, 33 deletions
diff --git a/doc/HISTORY b/doc/HISTORY
index ed422a16..493b04a2 100644
--- a/doc/HISTORY
+++ b/doc/HISTORY
@@ -19,9 +19,14 @@ not yet  BJH  Release 10.51.00
               pnmsmooth: Don't display pnmconvol messages (i.e. run
               pnmconvol with -quiet).
 
+              pamflip: speedup for PBM.  Use SSE2 and skip some idempotent
+              pixel movement.  Thanks Prophet of the Way <afu@wta.att.ne.jp>.
+
               libnetpbm, various PBM programs: Use SSE insted of MMX.  Thanks
               Prophet of the Way <afu@wta.att.ne.jp>.
 
+              libnetpbm: remove bogus assertion.
+
               pnmtops: fix bug: 12 bits per sample output when 8 would do.
               Introduced in 10.40.
 
diff --git a/editor/Makefile b/editor/Makefile
index 29cdcab1..52b2b0bc 100644
--- a/editor/Makefile
+++ b/editor/Makefile
@@ -7,7 +7,7 @@ VPATH=.:$(SRCDIR)/$(SUBDIR)
 
 include $(BUILDDIR)/config.mk
 
-SUBDIRS = specialty
+SUBDIRS = pamflip specialty
 
 # We tend to separate out the build targets so that we don't have
 # any more dependencies for a given target than it really needs.
@@ -19,7 +19,7 @@ SUBDIRS = specialty
 PORTBINARIES = pamaddnoise pambackground pamcomp pamcut \
 	       pamdice pamditherbw pamedge \
 	       pamenlarge \
-	       pamflip pamfunc pammasksharpen \
+	       pamfunc pammasksharpen \
 	       pampaintspill pamperspective \
 	       pamscale pamsistoaglyph pamstretch pamthreshold pamundice \
 	       pbmclean pbmmask pbmpscale pbmreduce \
@@ -42,7 +42,6 @@ PORTBINARIES = pamaddnoise pambackground pamcomp pamcut \
 NOMERGEBINARIES = 
 MERGEBINARIES = $(PORTBINARIES)
 
-
 BINARIES = $(MERGEBINARIES) $(NOMERGEBINARIES)
 SCRIPTS = pnmflip ppmfade ppmquant ppmquantall ppmshadow \
 	  pamstretch-gen pnmmargin pnmquant 
diff --git a/editor/pamflip/Makefile b/editor/pamflip/Makefile
new file mode 100644
index 00000000..497c5379
--- /dev/null
+++ b/editor/pamflip/Makefile
@@ -0,0 +1,32 @@
+ifeq ($(SRCDIR)x,x)
+  SRCDIR = $(CURDIR)/../..
+  BUILDDIR = $(SRCDIR)
+endif
+SUBDIR = editor/pamflip
+VPATH=.:$(SRCDIR)/$(SUBDIR)
+
+include $(BUILDDIR)/config.mk
+
+SUBDIRS =
+
+MERGEBINARIES = pamflip
+
+BINARIES = pamflip
+
+SCRIPTS =
+
+PAMFLIP_OBJECTS = pamflip.o pamflip_sse.o
+
+OBJECTS = $(PAMFLIP_OBJECTS)
+
+MERGE_OBJECTS = $(OBJECTS:%.o=%.o2)
+
+.PHONY: all
+all: $(BINARIES) $(SUBDIRS:%=%/all)
+
+include $(SRCDIR)/common.mk
+
+pamflip: $(PAMFLIP_OBJECTS) $(NETPBMLIB) $(LIBOPT)
+	$(LD) -o $@ $(PAMFLIP_OBJECTS) \
+	  $(shell $(LIBOPT) $(NETPBMLIB)) \
+	  $(MATHLIB) $(LDFLAGS) $(LDLIBS) $(RPATH) $(LADD)
diff --git a/editor/pamflip/flip.h b/editor/pamflip/flip.h
new file mode 100644
index 00000000..612a7f84
--- /dev/null
+++ b/editor/pamflip/flip.h
@@ -0,0 +1,16 @@
+#ifndef FLIP_H_INCLUDED
+#define FLIP_H_INCLUDED
+
+struct xformCore {
+    /* a b
+       c d
+    */
+    int a;  /* -1, 0, or 1 */
+    int b;  /* -1, 0, or 1 */
+    int c;  /* -1, 0, or 1 */
+    int d;  /* -1, 0, or 1 */
+};
+
+
+
+#endif
diff --git a/editor/pamflip.c b/editor/pamflip/pamflip.c
index b8b079f0..1c758d67 100644
--- a/editor/pamflip.c
+++ b/editor/pamflip/pamflip.c
@@ -26,6 +26,10 @@
      transformRowsBottomTopNonPbm()
        non-PBM image with bottom-top transformation
        (also works for PBM, but we don't use it)
+     transformRowsToColumnsPbmSse()
+       PBM image with column-for-row transformation
+       requires Intel/AMD x86 SSE2
+       (can only do 90 degree/xy flips)
      transformPbm()
        PBM image with column-for-row transformation
        (also works for all other transformations, but we don't use it)
@@ -68,8 +72,19 @@
 #include "nstring.h"
 #include "bitreverse.h"
 
+#include "flip.h"
+#include "pamflip_sse.h"
+
 enum xformType {LEFTRIGHT, TOPBOTTOM, TRANSPOSE};
 
+#ifndef SIMD_PBM_TRANSPOSITION
+  #if HAVE_GCC_SSE2 && defined(__SSE2__)
+    #define SIMD_PBM_TRANSPOSITION 1
+  #else
+    #define SIMD_PBM_TRANSPOSITION 0
+  #endif
+#endif
+
 static void
 parseXformOpt(const char *     const xformOpt,
               unsigned int  *  const xformCountP,
@@ -115,25 +130,13 @@ parseXformOpt(const char *     const xformOpt,
 
 
 
-/* See transformPoint() for an explanation of the transform matrix types.
-   The difference between the two types is that 'xformCore' is particular
-   to the source image dimensions and can be used to do the transformation,
-   while 'xformCore' is independent of the source image and just
-   tells what kind of transformation.
+/* See transformPoint() for an explanation of the transform matrix types.  The
+   difference between xformCore and xformMatrix is that 'xformCore' is
+   particular to the source image dimensions and can be used to do the
+   transformation, while 'xformCore' is independent of the source image and
+   just tells what kind of transformation.
 */
 
-struct xformCore {
-    /* a b
-       c d
-    */
-    int a;  /* -1, 0, or 1 */
-    int b;  /* -1, 0, or 1 */
-    int c;  /* -1, 0, or 1 */
-    int d;  /* -1, 0, or 1 */
-};
-
-
-
 struct xformMatrix {
     /* a b 0
        c d 0
@@ -397,6 +400,9 @@ bitOrderReverse(unsigned char * const bitrow,
 /*----------------------------------------------------------------------------
   Reverse the bits in a packed pbm row (1 bit per pixel).  I.e. the leftmost
   bit becomes the rightmost, etc.
+
+  Exchange pixels in units of eight.  If both are zero, skip instead of
+  exchanging zeros.
 -----------------------------------------------------------------------------*/
     unsigned int const lastfullByteIdx = cols/8 - 1;
 
@@ -407,11 +413,14 @@ bitOrderReverse(unsigned char * const bitrow,
         bitrow[0] = bitreverse[bitrow[0]] << (8-cols);
     else if (cols % 8 == 0) {
         unsigned int i, j;
-        for (i = 0, j = lastfullByteIdx; i <= j; ++i, --j) {
-            unsigned char const t = bitreverse[bitrow[j]]; 
-            bitrow[j] = bitreverse[bitrow[i]];
-            bitrow[i] = t;
-        }
+        for (i = 0, j = lastfullByteIdx; i <= j; ++i, --j)
+            if ((bitrow[j] | bitrow[i]) == 0) {
+                /* Both are 0x00 - skip */
+            } else {
+                unsigned char const t = bitreverse[bitrow[j]]; 
+                bitrow[j] = bitreverse[bitrow[i]];
+                bitrow[i] = t;
+            }
     } else {
         unsigned int const m = cols % 8; 
 
@@ -422,18 +431,23 @@ bitOrderReverse(unsigned char * const bitrow,
         unsigned char th, tl;  /* 16 bit temp ( th << 8 | tl ) */
         tl = 0;
         for (i = 0, j = lastfullByteIdx+1; i <= lastfullByteIdx/2; ++i, --j) {
-            th = bitreverse[bitrow[i]];
-            bitrow[i] =
-                bitreverse[0xff & ((bitrow[j-1] << 8 | bitrow[j]) >> (8-m))];
-            bitrow[j] = 0xff & ((th << 8 | tl) >> m);
-            tl = th;
+            if( (tl | bitrow[i] | bitrow[j] | bitrow[j-1]) != 0) {
+                /* Skip if both are 0x00 */
+                th = bitreverse[bitrow[i]];
+                bitrow[i] =
+                    bitreverse[0xff & ((bitrow[j-1] << 8 
+                                        | bitrow[j]) >> (8-m))];
+                bitrow[j] = 0xff & ((th << 8 | tl) >> m);
+                tl = th;
+            }
         }
-        if (i == j) 
+        if (i == j && (bitrow[i] | tl) != 0) {
             /* bitrow[] has an odd number of bytes (an even number of
                full bytes; lastfullByteIdx is odd), so we did all but
                the center byte above.  We do the center byte now.
             */
             bitrow[j] = 0xff & ((bitreverse[bitrow[i]] << 8 | tl) >> m);
+        }
     }
 }
 
@@ -636,6 +650,10 @@ writeRaster(struct pam *    const pamP,
 
 
 
+
+
+
+
 static void
 transformPbmGen(struct pam *     const inpamP,
                 struct pam *     const outpamP,
@@ -1125,11 +1143,15 @@ transformPbm(struct pam *     const inpamP,
                through them only twice, so there is no page thrashing concern.
             */
             transformRowsBottomTopPbm(inpamP, outpamP, xform.a == -1);
-    } else
+    } else {
         /* This is a column-for-row type of transformation, which requires
            complex traversal of an in-memory image.
         */
-        transformPbmGen(inpamP, outpamP, xform);
+        if (SIMD_PBM_TRANSPOSITION == 1)
+            pamflip_transformRowsToColumnsPbmSse(inpamP, outpamP, xform);
+        else
+            transformPbmGen(inpamP, outpamP, xform);
+    }
 }
 
 
diff --git a/editor/pamflip.test b/editor/pamflip/pamflip.test
index 96e889ea..96e889ea 100644
--- a/editor/pamflip.test
+++ b/editor/pamflip/pamflip.test
diff --git a/editor/pamflip/pamflip_sse.c b/editor/pamflip/pamflip_sse.c
new file mode 100644
index 00000000..3a2d28d7
--- /dev/null
+++ b/editor/pamflip/pamflip_sse.c
@@ -0,0 +1,345 @@
+#include <assert.h>
+
+#include "pm_config.h"
+#include "pm_c_util.h"
+#include "pam.h"
+
+#include "flip.h"
+
+#include "pamflip_sse.h"
+
+#if HAVE_GCC_SSE2 && defined(__SSE2__)
+
+/*----------------------------------------------------------------------------
+   This is a specialized routine for row-for-column PBM transformations.
+   (-cw, -ccw, -xy).  It requires GCC (>= v. 4.0.4) and SSE2. 
+
+   In each cycle, we read sixteen rows from the input.  We process tbis band
+   left to right in blocks 8 pixels wide.  We use the SSE2 instruction
+   pmovmskb128, which reports the MSB of each byte in a 16 byte array, for
+   fast processing.  We place the 8x16 block into a 16 byte array, and
+   pmovmskb128 reports the 16 pixels on the left edge in one instruction
+   execution.  pslldi128 shifts the array contents leftward.
+
+   The following routines can write both in both directions (left and right)
+   into the output rows.  They do this by controlling the vertical stacking
+   order when the make the 8x16 blocks.
+ 
+   Function transpose1to15Bitrows() is for handling the partial bits of each
+   output row.  They can come from either the top or bottom of the vertical
+   input colunm, but they always go to the right end of the output rows.
+
+   transformRowsToColumnsPbm() does not have any instructions unique to
+   GCC or SSE.  It is possible to write a non-SSE version by providing
+   generic versions of transpose16Bitrows() and transpose1to15Bitrows() .
+   This is just a matter of replacing the V16 union with a plain uchar
+   array and writing an emulation for __builtin_pmovmskb128() .
+ 
+   Further enhancement should be possible by employing wider bands,
+   larger blocks as wider SIMD registers become available.  Another
+   method is checking for white blocks and recording them in a small
+   array and condensing writes into the output raster array.
+-----------------------------------------------------------------------------*/
+
+typedef char v16qi __attribute__ ((vector_size (16)));
+typedef int  v4di  __attribute__ ((vector_size (16)));
+
+union V16 {
+    v16qi v;
+    v4di d;
+    unsigned char i[16];
+};
+
+/* Beware when making modifications to code which involve v16qi, v4di, V16.
+   Certain versions of GCC get stuck with the following:
+
+   (1) Type mismatches between v16qi and v4di.  Avoid them with casts.
+
+   (2) Converions from a 16 byte array to v16qi (or union V16) by cast.
+       __vector__ variables have to be vector from the start. 
+
+   (3) union V16 as a register variable.
+
+   Some GCC versions emit warnings, others abort with error.
+*/
+
+
+
+static void
+transpose1to15Bitrows(unsigned int const cols,
+                      unsigned int const rows,
+                      bit **       const inrow,
+                      uint16_t **  const outplane,
+                      int          const xdir) {
+/*--------------------------------------------------------------------------
+  Convert input rows to output columns.  For handling partial rows.
+  Note that output from this always goes to the right edge of the image.
+----------------------------------------------------------------------------*/
+    unsigned int const outcol16 = (rows-1)/16;
+
+    unsigned int col;
+
+    union V16 v16;
+    v16.v = v16.v ^ v16.v;  /* clear to zero */
+
+    for (col = 0; col < cols; ++col) {
+        unsigned int const outrow = col;
+
+        if (col % 8 == 0) {
+            unsigned int i;
+            for (i = 0; i < rows % 16; ++i) {
+                int const idx = (xdir > 0) ?
+                    (i&8) + 7-(i&7) :       /* output left to right */
+                    (24- rows%16 +i) %16;  /*        right to left */
+                v16.i [idx] = inrow[i][col/8];
+            }
+        }
+        /* read 16 bits from left edge of block; write to output column  */
+        outplane[outrow][outcol16] = __builtin_ia32_pmovmskb128(v16.v);
+        v16.d = __builtin_ia32_pslldi128(v16.d, 1);
+    }
+}
+
+
+
+static void
+transpose16Bitrows(unsigned int const cols,
+      	           unsigned int const rows,
+                   const bit *  const block[16],
+                   uint16_t **  const outplane,
+                   unsigned int const outcol16) {
+/*--------------------------------------------------------------------------
+  Convert input rows to output columns.  Works in units of 8x16.
+
+  Uses pre-calculated pointers ( block[i][col8] ) instead of
+  (xdir > 0) ? (i & 0x08) + 7 - (i & 0x07) : (24 - rows % 16 +i) % 16
+  for efficiency.
+
+  We avoid using union V16 to keep the value in a register.  (When we do so,
+  GCC (4.2, 4.4) sees the suffix x of v16.i[x] and apparently decides that
+  the variable has to be addressable and therefore needs to be placed into
+  memory.)
+  ----------------------------------------------------------------------------*/
+    unsigned int col;
+
+    for (col = 0; col + 7 < cols; col += 8) {    /* Unrolled loop */
+        unsigned int const col8 = col / 8;
+
+        unsigned int outrow;
+        unsigned int i;
+
+        register v16qi vReg = {
+            block[0][col8],  block[1][col8],
+            block[2][col8],  block[3][col8],  
+            block[4][col8],  block[5][col8],
+            block[6][col8],  block[7][col8],
+            block[8][col8],  block[9][col8],
+            block[10][col8], block[11][col8],
+            block[12][col8], block[13][col8],
+            block[14][col8], block[15][col8] };
+
+        outrow = col;  /* initial value */
+
+        for (i = 0; i < 7; ++i) {
+            /* GCC (>=4.2) automatically unrolls this loop */  
+            outplane[outrow++][outcol16] = __builtin_ia32_pmovmskb128(vReg);
+            vReg = (v16qi)__builtin_ia32_pslldi128 ((v4di)vReg, 1);
+        }
+        outplane[outrow][outcol16] = __builtin_ia32_pmovmskb128(vReg);
+    }
+
+    if (col < cols) {  /* Transpose remaining pixels at end of input rows. */
+        unsigned int const col8 = col / 8;
+        register v16qi vReg = {
+            block[0][col8],  block[1][col8],
+            block[2][col8],  block[3][col8],  
+            block[4][col8],  block[5][col8],
+            block[6][col8],  block[7][col8],
+            block[8][col8],  block[9][col8],
+            block[10][col8], block[11][col8],
+            block[12][col8], block[13][col8],
+            block[14][col8], block[15][col8] };
+
+        for ( ; col < cols; ++col) { 
+            unsigned int const outrow = col;
+
+            outplane[outrow][outcol16] = __builtin_ia32_pmovmskb128(vReg);
+            vReg = (v16qi)__builtin_ia32_pslldi128((v4di)vReg, 1);
+        }
+    }
+}
+
+
+
+static void
+analyzeBlock(struct pam *   const inpamP,
+             bit **         const inrow,
+             int            const xdir,
+             const bit **   const block,
+             unsigned int * const topOfFullBlockP,
+             unsigned int * const outcol16P) {
+
+    if (xdir > 0){
+        /* Write output columns left to right */
+        unsigned int i;
+        for (i = 0; i < 16; ++i)
+            block[i] = inrow[(i & 0x8) + 7 - (i & 0x7)];
+        *topOfFullBlockP = 0;
+        *outcol16P = 0;
+    } else {
+        /* Write output columns right to left */
+        *topOfFullBlockP = inpamP->height % 16;
+
+        if (inpamP->height >= 16) {
+            unsigned int i;
+            for (i = 0; i < 16; ++i)
+                block[i]= inrow[((i & 0x8) ^ 0x8) + (i & 0x7)];
+            *outcol16P = inpamP->height/16 - 1;
+        } else
+            *outcol16P = 0;
+    }
+}
+
+
+
+static void
+doPartialBlockTop(struct pam * const inpamP,
+                  bit **       const inrow,
+                  int          const xdir,
+                  unsigned int const topOfFullBlock,
+                  uint16_t **  const outplane) {
+    
+    if (topOfFullBlock > 0) {
+        unsigned int row;
+        for (row = 0; row < topOfFullBlock; ++row)
+            pbm_readpbmrow_packed(inpamP->file, inrow[row],
+                                  inpamP->width, inpamP->format);
+
+        transpose1to15Bitrows(inpamP->width, inpamP->height,
+                              inrow, outplane, xdir);
+            /* Transpose partial rows on top of input.  Place on right edge of
+               output.
+            */ 
+    }
+}
+
+
+
+static void
+doFullBlocks(struct pam * const inpamP,
+             bit **       const inrow,
+             int          const xdir,
+             const bit *  const block[16],
+             unsigned int const topOfFullBlock,
+             unsigned int const initOutcol16,
+             uint16_t **  const outplane) {
+
+    unsigned int row;
+    unsigned int outcol16;
+    unsigned int modrow;
+        /* Number of current row within buffer */
+
+    for (row = topOfFullBlock, outcol16 = initOutcol16, modrow = 0;
+         row < inpamP->height;
+         ++row) {
+
+        pbm_readpbmrow_packed(inpamP->file, inrow[modrow],
+                              inpamP->width, inpamP->format);
+        ++modrow;
+        if (modrow == 16) {
+            /* 16 row buffer is full.  Transpose. */
+            modrow = 0; 
+
+            transpose16Bitrows(inpamP->width, inpamP->height,
+                               block, outplane, outcol16);
+            outcol16 += xdir;
+        }
+    }
+}
+
+
+
+static void
+doPartialBlockBottom(struct pam * const inpamP,
+                     bit **       const inrow,
+                     int          const xdir,
+                     uint16_t **  const outplane) {
+    
+    if (xdir > 0 && inpamP->height % 16 > 0)
+        transpose1to15Bitrows(inpamP->width, inpamP->height, inrow,
+                              outplane, xdir);
+        /* Transpose partial rows on bottom of input.  Place on right edge of
+           output.
+        */ 
+}
+
+
+
+static void
+writeOut(struct pam * const outpamP,
+         uint16_t **  const outplane,
+         int          const ydir) {
+             
+    unsigned int row;
+
+    for (row = 0; row < outpamP->height; ++row) {
+        unsigned int const outrow = (ydir > 0) ?
+            row :
+            outpamP->height - row - 1;  /* reverse order */
+  
+        pbm_writepbmrow_packed(stdout, (bit *)outplane[outrow],
+                               outpamP->width, 0);
+    }
+}
+
+
+
+void
+pamflip_transformRowsToColumnsPbmSse(struct pam *     const inpamP,
+                                     struct pam *     const outpamP,
+                                     struct xformCore const xformCore) { 
+/*----------------------------------------------------------------------------
+  This is a specialized routine for row-for-column PBM transformations.
+  (-cw, -ccw, -xy).
+-----------------------------------------------------------------------------*/
+    int const xdir = xformCore.c;
+        /* Input top:  output left (+1)/ right (-1)  */
+    int const ydir = xformCore.b;
+        /* Input left: output top  (+1)/ bottom (-1) */
+
+    bit ** inrow;
+    uint16_t ** outplane;
+    const bit * block[16];
+    unsigned int topOfFullBlock;
+    unsigned int outcol16;
+
+    inrow = pbm_allocarray_packed( inpamP->width, 16);
+    outplane = (uint16_t **)pbm_allocarray_packed(outpamP->width + 15,
+                                                  outpamP->height);
+        /* We write to the output array in 16 bit units.  Add margin (15). */  
+
+    analyzeBlock(inpamP, inrow, xdir, block, &topOfFullBlock, &outcol16);
+
+    doPartialBlockTop(inpamP, inrow, xdir, topOfFullBlock, outplane);
+
+    doFullBlocks(inpamP, inrow, xdir, block,
+                 topOfFullBlock, outcol16, outplane);
+
+    doPartialBlockBottom(inpamP, inrow, xdir, outplane);
+
+    writeOut(outpamP, outplane, ydir);
+
+    pbm_freearray(outplane, outpamP->height);
+    pbm_freearray(inrow, 16);
+}
+#else  /* SSE functions exist */
+
+void
+pamflip_transformRowsToColumnsPbmSse(struct pam *     const inpamP,
+                                     struct pam *     const outpamP,
+                                     struct xformCore const xformCore) { 
+
+    /* Nobody is supposed to call this */
+    assert(false);
+}
+#endif 
diff --git a/editor/pamflip/pamflip_sse.h b/editor/pamflip/pamflip_sse.h
new file mode 100644
index 00000000..1e70b765
--- /dev/null
+++ b/editor/pamflip/pamflip_sse.h
@@ -0,0 +1,12 @@
+#ifndef PAMFLIP_SSE_H_INCLUDED
+#define PAMFLIP_SSE_H_INCLUDED
+
+struct pam;
+#include "flip.h"
+
+void
+pamflip_transformRowsToColumnsPbmSse(struct pam *     const inpamP,
+                                     struct pam *     const outpamP,
+                                     struct xformCore const xformCore);
+
+#endif