4 files changed, 97 insertions, 108 deletions
diff --git a/converter/other/bmptopnm.c b/converter/other/bmptopnm.c
index 30b1012f..647ae3c6 100644
--- a/converter/other/bmptopnm.c
+++ b/converter/other/bmptopnm.c
@@ -421,7 +421,7 @@ lsbZeroCount(unsigned int const mask)
    Use GCC built-in when available.
 -----------------------------------------------------------------------------*/
 
-#if ( defined(__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 304) )
+#if HAVE_GCC_BITCOUNT 
 {
       return ( mask==0 ? sizeof(mask)*8 : __builtin_ctz(mask) );
 }
diff --git a/converter/other/jpeg2000/libjasper/jpc/jpc_math.c b/converter/other/jpeg2000/libjasper/jpc/jpc_math.c
index d860847d..72e3ac37 100644
--- a/converter/other/jpeg2000/libjasper/jpc/jpc_math.c
+++ b/converter/other/jpeg2000/libjasper/jpc/jpc_math.c
@@ -1,3 +1,72 @@
+#include <assert.h>
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+#include <stdlib.h>
+#include <stdarg.h>
+
+#include "jpc_math.h"
+
+
+
+/* Calculate the integer quantity floor(log2(x)), where x is a positive
+  integer. */
+int
+jpc_floorlog2(int const arg) {
+
+	int y;
+    int x;
+
+	assert(arg > 0);
+
+	y = 0;
+    x = arg;
+	while (x > 1) {
+		x >>= 1;
+		++y;
+	}
+	return y;
+}
+
+
+
+/*
+  jpc_floorlog2() and jpc_firstone() do the same thing.
+  The only difference is how input 0 is handled.
+
+n                  : 0 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 
+ceil(log2(n))      : x 0  1  2  2  3  3  3  3  4  4  4  4  4  4  4  4  5  5  5 
+floor(log2(n))     : x 0  1  1  2  2  2  2  3  3  3  3  3  3  3  3  4  4  4  4 
+31-__builtin_clz(n): x 0  1  1  2  2  2  2  3  3  3  3  3  3  3  3  4  4  4  4 
+jpc_floorlog2(n)   : x 0  1  1  2  2  2  2  3  3  3  3  3  3  3  3  4  4  4  4 
+jpc_firstone(n)    :-1 0  1  1  2  2  2  2  3  3  3  3  3  3  3  3  4  4  4  4 
+
+*/
+
+
+
+int
+jpc_firstone(int const arg) {
+/*---------------------------------------------------------------------------- 
+  Calculate the bit position of the first leading one in a nonnegative
+  integer.
+-----------------------------------------------------------------------------*/
+	int n;
+    int x;
+
+	assert(arg >= 0);
+
+	n = -1;
+    x = arg;
+	while (x > 0) {
+		x >>= 1;
+		++n;
+	}
+	return n;
+}
+
+
+
 /*
  * Copyright (c) 1999-2000 Image Power, Inc. and the University of
  *   British Columbia.
@@ -109,62 +178,3 @@
  * 
  * __END_OF_JASPER_LICENSE__
  */
-
-/*
- * Math Library
- *
- * $Id$
- */
-
-/******************************************************************************\
-* Includes
-\******************************************************************************/
-
-#include <assert.h>
-#include <stdio.h>
-#include <string.h>
-#include <math.h>
-#include <stdlib.h>
-#include <stdarg.h>
-
-#include "jpc_math.h"
-
-/******************************************************************************\
-* Miscellaneous Functions
-\******************************************************************************/
-
-/* Calculate the integer quantity floor(log2(x)), where x is a positive
-  integer. */
-int jpc_floorlog2(int x)
-{
-	int y;
-
-	/* The argument must be positive. */
-	assert(x > 0);
-
-	y = 0;
-	while (x > 1) {
-		x >>= 1;
-		++y;
-	}
-	return y;
-}
-
-/* Calculate the bit position of the first leading one in a nonnegative
-  integer. */
-/* This function is the basically the same as ceillog2(x), except that the
-  allowable range for x is slightly different. */
-int jpc_firstone(int x)
-{
-	int n;
-
-	/* The argument must be nonnegative. */
-	assert(x >= 0);
-
-	n = -1;
-	while (x > 0) {
-		x >>= 1;
-		++n;
-	}
-	return n;
-}
diff --git a/converter/other/pamtogif.c b/converter/other/pamtogif.c
index b8df9626..0c8c0f9e 100644
--- a/converter/other/pamtogif.c
+++ b/converter/other/pamtogif.c
@@ -864,7 +864,7 @@ typedef struct {
 static unsigned int
 nSignificantBits( unsigned int const arg ){
 
-#if defined(__GNUC__)  && (__GNUC__ * 100 + __GNUC_MINOR__ >= 304)
+#if HAVE_GCC_BITCOUNT
 
     return (arg == 0) ? 0 : 8 * sizeof(unsigned int) - __builtin_clz(arg);
 
diff --git a/lib/libpbm3.c b/lib/libpbm3.c
index 29b89e47..9200d30e 100644
--- a/lib/libpbm3.c
+++ b/lib/libpbm3.c
@@ -13,31 +13,20 @@
 #include <assert.h>
 
 #include "pm_c_util.h"
-#include "bitreverse.h"
 #include "pbm.h"
 
-/* HAVE_MMX_SSE means we have the means to use MMX and SSE CPU facilities
-   to make PBM raster processing faster.
+#if HAVE_GCC_MMXSSE
+#include "bitreverse.h"
+#endif
+
+/* HAVE_GCC_MMXSSE means we have the means to use MMX and SSE CPU facilities
+   to make PBM raster processing faster.  GCC only.
 
    The GNU Compiler -msse option makes SSE available.
+   For x86-32 with MMX/SSE, "-msse" must be explicitly given.
+   For x86-64 and AMD64, "-msse" is on by default.
 */
 
-#if defined(__GNUC__) && \
-  (__GNUC__ * 100 + __GNUC_MINOR__ >= 301) && \
-  (__GNUC__ * 100 + __GNUC_MINOR__ < 403) && \
-  defined (__SSE__)
-/* GCC 4.3 does have the facility, but it is different from what this
-   code knows how to use.  In particular, the calls to
-   __builtin_ia32_pcmpeqb() and __builtin_ia32_pmovmskb() fail to
-   compile, with complaints of improper argument types.
-*/
-
-#define HAVE_MMX_SSE 1
-#else
-#define HAVE_MMX_SSE 0
-#endif
-
-
 void
 pbm_writepbminit(FILE * const fileP, 
                  int    const cols, 
@@ -67,7 +56,7 @@ writePackedRawRow(FILE *                const fileP,
 } 
 
 
-
+#if HAVE_GCC_MMXSSE
 static void
 packBitsWithMmxSse(FILE *          const fileP,
                    const bit *     const bitrow,
@@ -83,7 +72,6 @@ packBitsWithMmxSse(FILE *          const fileP,
    Use the Pentium MMX and SSE facilities to pack the bits quickly, but
    perform the exact same function as the simpler packBitsGeneric().
 -----------------------------------------------------------------------------*/
-#if HAVE_MMX_SSE
     /*
       We use MMX/SSE facilities that operate on 8 bytes at once to pack
       the bits quickly.
@@ -93,49 +81,37 @@ packBitsWithMmxSse(FILE *          const fileP,
       The key machine instructions are:
     
     
-      PCMPEQB  Packed CoMPare EQual Byte
+      PCMPGTB  Packed CoMPare Greater Than Byte
     
         Compares 8 bytes in parallel
-        Result is x00 if equal, xFF if unequal for each byte       
+        Result is x00 if greater than, xFF if not for each byte       
     
       PMOVMSKB Packed MOVe MaSK Byte 
     
         Result is a byte of the MSBs of 8 bytes
-        x00 xFF x00 xFF xFF xFF x00 x00 --> 01011100B = 0x5C     
-    
+        x00 xFF x00 xFF xFF xFF x00 x00 --> 01011100B = 0x5C
+        
+        The result is actually a 32 bit int, but the higher bits are
+        always 0.  (0x0000005C in the above case)
     
       EMMS     Empty MMx State
     
         Free MMX registers  
     
-    
-      Here's a one-statement version of the code in our foor loop.  It's harder 
-      to read, but if we find out this generates more efficient code, we could 
-      use this.
-    
-        packedBits[col/8] 
-          = bitreverse [ ~ (unsigned char) __builtin_ia32_pmovmskb (
-            __builtin_ia32_pcmpeqb ( *(v8qi*) (&bitrow[col]), *(v8qi*) &zero64)
-            ) ];
     */
 
-#if (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)
+
     typedef char v8qi __attribute__ ((vector_size(8)));
-#else
-    typedef int v8qi __attribute__ ((mode(V8QI)));
-#endif
     typedef int di __attribute__ ((mode(DI)));
 
-    di const zero64 = 0;        /* to clear with PXOR */
-
     unsigned int col;
+    v8qi const zero64 =(v8qi)((di)0);  /* clear to zero */
 
     for (col = 0; col + 7 < cols; col += 8) {
+
         v8qi const compare =
-            __builtin_ia32_pcmpeqb(*(v8qi*) (&bitrow[col]), *(v8qi*) &zero64);
-        unsigned char const backwardWhiteMask = (unsigned char)
-            __builtin_ia32_pmovmskb(compare);
-        unsigned char const backwardBlackMask = ~backwardWhiteMask;
+            __builtin_ia32_pcmpgtb(*(v8qi*) (&bitrow[col]), (v8qi) zero64);
+        uint32_t const backwardBlackMask =  __builtin_ia32_pmovmskb(compare);
         unsigned char const blackMask = bitreverse[backwardBlackMask];
 
         packedBits[col/8] = blackMask;
@@ -144,10 +120,13 @@ packBitsWithMmxSse(FILE *          const fileP,
 
     __builtin_ia32_emms();
 
+}
 #else
-    if (bitreverse == bitreverse) {}; /* avoid unused vbl compiler warning */
+/* Avoid undefined function warning; never actually called */
+
+#define packBitsWithMmxSse(a,b,c,d,e) packBitsGeneric(a,b,c,d,e)
 #endif
-}
+
 
 
 
@@ -237,7 +216,7 @@ writePbmRowRaw(FILE *      const fileP,
 
         pm_setjmpbufsave(&jmpbuf, &origJmpbufP);
 
-        if (HAVE_MMX_SSE)
+        if (HAVE_GCC_MMXSSE)
             packBitsWithMmxSse(fileP, bitrow, packedBits, cols, &nextCol);
         else 
             packBitsGeneric(fileP, bitrow, packedBits, cols, &nextCol);