about summary refs log tree commit diff
diff options
context:
space:
mode:
authorgiraffedata <giraffedata@9d0c8265-081b-0410-96cb-a4ca84ce46f8>2013-10-20 20:00:51 +0000
committergiraffedata <giraffedata@9d0c8265-081b-0410-96cb-a4ca84ce46f8>2013-10-20 20:00:51 +0000
commit59498fa94e044604ace91a0f7dc06e8740e6cf9b (patch)
treef0d990b1f9ebed7308f71697c7bac6a0b868f015
parentcbba7cc2c587c5187eb9c07232af21eeeb4b0e4a (diff)
downloadnetpbm-mirror-59498fa94e044604ace91a0f7dc06e8740e6cf9b.tar.gz
netpbm-mirror-59498fa94e044604ace91a0f7dc06e8740e6cf9b.tar.xz
netpbm-mirror-59498fa94e044604ace91a0f7dc06e8740e6cf9b.zip
Use <emmintrins.h> interface for MMX SSE instrinsics instead of GCC-specific verisons
git-svn-id: http://svn.code.sf.net/p/netpbm/code/trunk@2022 9d0c8265-081b-0410-96cb-a4ca84ce46f8
-rw-r--r--GNUmakefile5
-rwxr-xr-xbuildtools/configure.pl62
-rw-r--r--config.mk.in8
-rw-r--r--doc/HISTORY8
-rw-r--r--editor/pamflip/pamflip_sse.c42
-rw-r--r--pm_config.in.h32
6 files changed, 124 insertions, 33 deletions
diff --git a/GNUmakefile b/GNUmakefile
index fec301d6..eaf81976 100644
--- a/GNUmakefile
+++ b/GNUmakefile
@@ -161,6 +161,11 @@ ifeq ($(HAVE_INT64),Y)
 else
 	echo "#define HAVE_INT64 0" >>$@
 endif	
+ifeq ($(WANT_MMX),Y)
+	echo "#define WANT_MMX 1" >>$@
+else
+	echo "#define WANT_MMX 0" >>$@
+endif	
 ifeq ($(DONT_HAVE_PROCESS_MGMT),Y)
 	echo "#define HAVE_FORK 0" >>$@
 else
diff --git a/buildtools/configure.pl b/buildtools/configure.pl
index 2179454b..8f1c46ba 100755
--- a/buildtools/configure.pl
+++ b/buildtools/configure.pl
@@ -99,7 +99,7 @@ sub prompt($$) {
 
 
 sub promptYesNo($) {
-    my ($default) = $@;
+    my ($default) = @_;
 
     my $retval;
 
@@ -947,6 +947,7 @@ sub getInttypes($) {
 }
 
 
+
 sub getInt64($$) {
 
     my ($inttypes_h, $haveInt64R) = @_;
@@ -982,6 +983,59 @@ sub getInt64($$) {
 
 
 
+sub determineMmxCapability($) {
+
+    my ($haveEmmintrinR) = @_;
+
+    if (defined($testCc)) {
+
+        print("(Doing test compiles to determine if your compiler has MMX " .
+              "intrinsics -- ignore errors)\n");
+
+        my $cflags = testCflags();
+
+        my $works;
+
+        my @cSourceCode = (
+                           "#include <emmintrin.h>\n",
+                           );
+            
+        testCompile($cflags, \@cSourceCode, \my $success);
+            
+        if ($success) {
+            print("It does.\n");
+            $$haveEmmintrinR = 'Y';
+        } else {
+            print("It does not.  Programs will not exploit fast MMX " .
+                  "instructions.\n");
+            $$haveEmmintrinR = 'N';
+        }
+        print("\n");
+    } else {
+        $$haveEmmintrinR = "N";
+    }
+}
+
+
+
+sub getMmx($) {
+
+    my ($wantMmxR) = @_;
+
+    determineMmxCapability(\my $haveEmmintrin);
+
+    my $gotit;
+
+    print("Use MMX instructions?\n");
+    print("\n");
+
+    my $default = $haveEmmintrin ? "y" : "n";
+
+    $$wantMmxR = promptYesNo($default);
+}
+
+
+
 # TODO: These should do test compiles to see if the headers are in the
 # default search path, both to create a default to offer and to issue a
 # warning after user has chosen.  Also test links to test the link library.
@@ -1959,6 +2013,8 @@ getInttypes(\my $inttypesHeaderFile);
 
 getInt64($inttypesHeaderFile, \my $haveInt64);
 
+getMmx(\my $wantMmx);
+
 findProcessManagement(\my $dontHaveProcessMgmt);
 
 #******************************************************************************
@@ -2385,6 +2441,10 @@ if ($haveInt64 ne 'Y') {
     push(@config_mk, "HAVE_INT64 = $haveInt64\n");
 }
 
+if ($wantMmx) {
+    push(@config_mk, "WANT_MMX = Y\n");
+}
+
 if ($dontHaveProcessMgmt) {
     push(@config_mk, "DONT_HAVE_PROCESS_MGMT = Y\n");
 }
diff --git a/config.mk.in b/config.mk.in
index 3873ba19..9197e956 100644
--- a/config.mk.in
+++ b/config.mk.in
@@ -89,6 +89,14 @@ INTTYPES_H = <inttypes.h>
 HAVE_INT64 = Y
 #HAVE_INT64 = N
 
+# WANT_MMX tells whether the build should use MMX instructions, via the the
+# standard MMX intrinsics (operators such as '_mm_movemask_epi8').  MMX
+# instructions are faster than traditional instructions, but aren't available
+# on all CPUs.  Also, the standard intrinsics are not available in all
+# compilers.
+WANT_MMX = N
+#WANT_MMX = Y
+
 # CC and LD are for building the Netpbm programs, which are not necessarily
 # intended to run on the same system on which Make is running.  But when we 
 # build a build tool such as Libopt, it is meant to run only on the same 
diff --git a/doc/HISTORY b/doc/HISTORY
index 7d1c0327..d353f154 100644
--- a/doc/HISTORY
+++ b/doc/HISTORY
@@ -15,8 +15,12 @@ not yet  BJH  Release 10.65.00
               Fix compile failure on system such as OpenBSD that don't have
               SIGWINCH and SIGIO.  Broken since 10.49 (December 2009).
 
-              Build on system without vasprintf (not GNU libc): fix
-              compiler warning.
+              Build: Use <emmintrins.h> interface for MMX SSE intrinsics
+              instead of GCC-specific versions.  Thanks Prophet of the Way
+              <afu@wta.att.ne.jp>.
+
+              Build on system without vasprintf (not GNU libc): fix compiler
+              warning.
 
               Apple build: use vasprintf.
 
diff --git a/editor/pamflip/pamflip_sse.c b/editor/pamflip/pamflip_sse.c
index 5a256f9d..c73d2c06 100644
--- a/editor/pamflip/pamflip_sse.c
+++ b/editor/pamflip/pamflip_sse.c
@@ -28,7 +28,11 @@
 
 #include "pamflip_sse.h"
 
-#if HAVE_GCC_SSE2 && defined(__SSE2__)
+/* Note that WANT_MMX implies the user expects MMX to be available
+   (i.e. <emmintrin.h> exists).
+*/
+
+#if WANT_MMX
 
 /*----------------------------------------------------------------------------
    This is a specialized routine for row-for-column PBM transformations.
@@ -68,6 +72,8 @@
    possibility.
 -----------------------------------------------------------------------------*/
 
+#include <emmintrin.h>
+
 typedef char v16qi __attribute__ ((vector_size (16)));
 typedef int  v4di  __attribute__ ((vector_size (16)));
 
@@ -82,10 +88,23 @@ typedef int  v4di  __attribute__ ((vector_size (16)));
    variable must be vector from the beginning.
 
    Changes for your local system are okay, but if you intend to
-   publish the them, please specify the compiler version you used.
+   publish them, please specify the compiler version you used.
+
+   This code has been tested on gcc versions 4.2.0, 4.2.4, 4.3.2,
+   4.4.3, 4.4.4, 4.5.0, 4.5.2, 4.6.0 and 4.6.1 clang versions
+   3.0, 3.2, 3.3.
+
+   We use SSE instructions in "_mm_" form in favor of "__builtin_".
+   In GCC the "__builtin_" form is documented but "_mm_" is not.
+   Former versions of this source file used "__builtin_".  This was
+   changed to make possible compilation with clang.
+
+   _mm_slli_epi32 : __builtin_ia32_pslldi128
+   _mm_cmpeq_epi8 : __builtin_ia32_pcmpeqb128
+   _mm_movemask_epi8 : __builtin_ia32_pmovmskb128
+
+   The conversion requires <emmintrin.h> .
 
-   This code has been tested on gcc versions: 4.2.0, 4.2.4, 4.3.2,
-   4.4.3, 4.4.4 and 4.5.0 .
 */
 
 
@@ -133,9 +152,10 @@ transpose16Bitrows(unsigned int const cols,
             block[12][col8], block[13][col8],
             block[14][col8], block[15][col8] };
 
-        register v16qi const compare =__builtin_ia32_pcmpeqb128(vReg,zero128);
+        register __m128i const compare =
+            _mm_cmpeq_epi8((__m128i)vReg, (__m128i)zero128);
 
-        if (__builtin_ia32_pmovmskb128(compare) != 0xffff) {
+        if (_mm_movemask_epi8(compare) != 0xffff) {
 
             /* There is some black content in this block; write to outplane */
             
@@ -147,10 +167,10 @@ transpose16Bitrows(unsigned int const cols,
             for (i = 0; i < 7; ++i) {
                 /* GCC (>=4.2) automatically unrolls this loop */  
                 outplane[outrow++][outcol16] =
-                    __builtin_ia32_pmovmskb128(vReg);
-                vReg = (v16qi)__builtin_ia32_pslldi128 ((v4di)vReg, 1);
+                    _mm_movemask_epi8((__m128i)vReg);
+                vReg = (v16qi)_mm_slli_epi32((__m128i)vReg, 1);
             }
-            outplane[outrow][outcol16] = __builtin_ia32_pmovmskb128(vReg);
+            outplane[outrow][outcol16] = _mm_movemask_epi8((__m128i)vReg);
         } else {
             /* The block is completely white; skip. */
         }
@@ -391,12 +411,12 @@ pamflip_transformRowsToColumnsPbmSse(const struct pam * const inpamP,
     pbm_freearray(outplane, outpamP->height + 7);
     pbm_freearray(inrow, 16);
 }
-#else  /* SSE functions exist */
+#else  /* WANT_MMX */
 
 void
 pamflip_transformRowsToColumnsPbmSse(const struct pam * const inpamP,
                                      const struct pam * const outpamP,
-                                     struct xformCore const xformCore) { 
+                                     struct xformCore   const xformCore) { 
 
     /* Nobody is supposed to call this */
     assert(false);
diff --git a/pm_config.in.h b/pm_config.in.h
index 9459be02..77e9e693 100644
--- a/pm_config.in.h
+++ b/pm_config.in.h
@@ -187,22 +187,14 @@
 /* CONFIGURE: GNU Compiler extensions are used in performance critical places
    when available.  Test whether they exist.
 
-   Turn off by defining NO_GCC_BUILTINS.
-
-   Note that though these influence the resulting Netpbm machine code, the
-   compiler setting ultimately decides what instruction set the compiler uses.
-   If you want a generic build, check the manual and adjust CFLAGS in
-   config.mk accordingly.
-
-   For example, if you want binaries that run on all Intel x86-32
-   family CPUs back to 80386, adding "-march=i386" to CFLAGS in
-   config.mk is much better than setting NO_GCC_BUILTINS to 1.
-   If you want to be extra sure use:
-   "-march=i386 -mno-mmx -mno-sse -DNO_GCC_BUILTINS"
-
-   Gcc uses SSE and SSE2 instructions by default for AMD/Intel x86-64.
-   Tinkering with "-mno-sse" is not recommended for these machines.  If you
-   don't want SSE code, set NO_GCC_BUILTINS to 1.
+   Prevent the build from exploiting these extensions by defining
+   NO_GCC_BUILTINS.
+
+   Before Netpbm 10.65 (December 2013), Netpbm used GCC compiler extensions
+   to generate MMX code in Pamflip.  Starting in 10.65, Netpbm instead uses
+   the more standard operators defined in <emmtrins.h>.  To prevent Netpbm
+   from explicitly using any MMX instructions, set WANT_MMX to N in
+   config.mk.
 */
 
 /*
@@ -231,9 +223,11 @@
   #define GCCVERSION 0
 #endif
 
-/* HAVE_GCC_SSE2 means the compiler has GCC builtins to directly access
-   SSE/SSE2 features.  This is different from whether the compiler generates
-   code that uses these features at all.
+/* HAVE_GCC_SSE2 means the compiler has GCC-specific builtins to directly
+   access SSE/SSE2 features.  This is different from whether the compiler
+   generates code that uses these features at all.  It is also different
+   from whether the compiler has the more standard operators defined in
+   <emmintrins.h>.
 */
 
 #ifndef HAVE_GCC_SSE2