about summary refs log tree commit diff
diff options
context:
space:
mode:
authorgiraffedata <giraffedata@9d0c8265-081b-0410-96cb-a4ca84ce46f8>2015-04-09 01:38:43 +0000
committergiraffedata <giraffedata@9d0c8265-081b-0410-96cb-a4ca84ce46f8>2015-04-09 01:38:43 +0000
commit946e0515bbae993f3ee091c848fdae2bda51e97a (patch)
treedf6c7303fa91df474857633e083b5fa0bf5db8a1
parentd4fe9378cb8b94e847f7295d4b984b5135692b18 (diff)
downloadnetpbm-mirror-946e0515bbae993f3ee091c848fdae2bda51e97a.tar.gz
netpbm-mirror-946e0515bbae993f3ee091c848fdae2bda51e97a.tar.xz
netpbm-mirror-946e0515bbae993f3ee091c848fdae2bda51e97a.zip
Work around GCC < 4.2 bug that causes compile failure
git-svn-id: http://svn.code.sf.net/p/netpbm/code/trunk@2455 9d0c8265-081b-0410-96cb-a4ca84ce46f8
-rw-r--r--doc/HISTORY3
-rw-r--r--editor/pamflip/config.h7
-rw-r--r--editor/pamflip/pamflip.c11
-rw-r--r--editor/pamflip/pamflip_sse.c9
-rw-r--r--pm_config.in.h14
5 files changed, 33 insertions, 11 deletions
diff --git a/doc/HISTORY b/doc/HISTORY
index a739197c..d9316206 100644
--- a/doc/HISTORY
+++ b/doc/HISTORY
@@ -17,6 +17,9 @@ not yet  BJH  Release 10.71.00
               makeman: deal properly with backlash in source.  Thanks Willem
               van Schaik <willem@schaik.com>.
 
+              Build: work around bug in GCC < 4.2 related to SSE2 builtins
+              that causes compile of pamflip to fail.
+
 15.03.29 BJH  Release 10.70.00
 
               Add st4topgm, pgmtost4.
diff --git a/editor/pamflip/config.h b/editor/pamflip/config.h
new file mode 100644
index 00000000..42aefb6e
--- /dev/null
+++ b/editor/pamflip/config.h
@@ -0,0 +1,7 @@
+#ifndef SSE_PBM_XY_FLIP
+  #if WANT_SSE && HAVE_WORKING_SSE2
+    #define SSE_PBM_XY_FLIP 1
+  #else
+    #define SSE_PBM_XY_FLIP 0
+  #endif
+#endif
diff --git a/editor/pamflip/pamflip.c b/editor/pamflip/pamflip.c
index ade05601..149ab310 100644
--- a/editor/pamflip/pamflip.c
+++ b/editor/pamflip/pamflip.c
@@ -72,19 +72,12 @@
 #include "nstring.h"
 #include "bitreverse.h"
 
+#include "config.h"  /* Defines SSE_PBM_XY_FLIP */
 #include "flip.h"
 #include "pamflip_sse.h"
 
 enum xformType {LEFTRIGHT, TOPBOTTOM, TRANSPOSE};
 
-#ifndef SIMD_PBM_TRANSPOSITION
-  #if WANT_SSE && defined(__SSE2__)
-    #define SIMD_PBM_TRANSPOSITION 1
-  #else
-    #define SIMD_PBM_TRANSPOSITION 0
-  #endif
-#endif
-
 static void
 parseXformOpt(const char *     const xformOpt,
               unsigned int  *  const xformCountP,
@@ -1149,7 +1142,7 @@ transformPbm(struct pam *     const inpamP,
         /* This is a column-for-row type of transformation, which requires
            complex traversal of an in-memory image.
         */
-        if (SIMD_PBM_TRANSPOSITION == 1)
+        if (SSE_PBM_XY_FLIP)
             pamflip_transformRowsToColumnsPbmSse(inpamP, outpamP, xform);
         else
             transformPbmGen(inpamP, outpamP, xform);
diff --git a/editor/pamflip/pamflip_sse.c b/editor/pamflip/pamflip_sse.c
index eccbe965..e0929f65 100644
--- a/editor/pamflip/pamflip_sse.c
+++ b/editor/pamflip/pamflip_sse.c
@@ -24,6 +24,7 @@
 #include "mallocvar.h"
 #include "pam.h"
 
+#include "config.h"  /* Defines SSE_PBM_XY_FLIP */
 #include "flip.h"
 
 #include "pamflip_sse.h"
@@ -32,7 +33,7 @@
    (i.e. <emmintrin.h> exists).
 */
 
-#if WANT_SSE && defined(__SSE2__)
+#if SSE_PBM_XY_FLIP
 
 /*----------------------------------------------------------------------------
    This is a specialized routine for row-for-column PBM transformations.
@@ -59,7 +60,11 @@
    As an enhancement, we clear the output raster to zero (=white) in the
    beginning and flip only the 8x16 blocks that contain non-zero bits (=any
    amount of black pixels).  When we add padding to the edges, we initialize
-   it all to zero to prevent unnecessary transpositions.
+   it all to zero to prevent unnecessary transpositions.  Because most
+   real-world documents are largely white, this saves much execution time.  If
+   you are porting this code to an environment in which non-zero bits are the
+   majority, for example, BMP where zero means black, you should seriously
+   consider modifying this.
 
    All instructions unique to GCC/SSE are in transpose16Bitrows().
    It is possible to write a non-SSE version by providing a generic
diff --git a/pm_config.in.h b/pm_config.in.h
index c64fc093..9d0dc303 100644
--- a/pm_config.in.h
+++ b/pm_config.in.h
@@ -240,6 +240,20 @@
 #endif
 #endif
 
+#ifndef HAVE_WORKING_SSE2
+#if defined(__SSE2__) && ( GCCVERSION >=402 || defined(__clang__) )
+  #define HAVE_WORKING_SSE2 1
+  /* We can use SSE2 builtin functions to exploit SSE2 instructions.  GCC
+     version 4.2 or newer is required; older GCC ostensibly has these SSE2
+     builtins, but the compiler aborts with an error.  Note that __SSE2__
+     means not only that the compiler has the capability, but that the user
+     has not disabled it via compiler options.
+  */
+#else
+  #define HAVE_WORKING_SSE2 0
+#endif
+#endif
+
 /* UNALIGNED_OK means it's OK to do unaligned memory access, e.g.
    loading an 8-byte word from an address that is not a multiple of 8.
    On some systems, such an access causes a trap and a signal.