diff options
-rw-r--r-- | GNUmakefile | 5 | ||||
-rwxr-xr-x | buildtools/configure.pl | 62 | ||||
-rw-r--r-- | config.mk.in | 8 | ||||
-rw-r--r-- | doc/HISTORY | 8 | ||||
-rw-r--r-- | editor/pamflip/pamflip_sse.c | 42 | ||||
-rw-r--r-- | pm_config.in.h | 32 |
6 files changed, 124 insertions, 33 deletions
diff --git a/GNUmakefile b/GNUmakefile index fec301d6..eaf81976 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -161,6 +161,11 @@ ifeq ($(HAVE_INT64),Y) else echo "#define HAVE_INT64 0" >>$@ endif +ifeq ($(WANT_MMX),Y) + echo "#define WANT_MMX 1" >>$@ +else + echo "#define WANT_MMX 0" >>$@ +endif ifeq ($(DONT_HAVE_PROCESS_MGMT),Y) echo "#define HAVE_FORK 0" >>$@ else diff --git a/buildtools/configure.pl b/buildtools/configure.pl index 2179454b..8f1c46ba 100755 --- a/buildtools/configure.pl +++ b/buildtools/configure.pl @@ -99,7 +99,7 @@ sub prompt($$) { sub promptYesNo($) { - my ($default) = $@; + my ($default) = @_; my $retval; @@ -947,6 +947,7 @@ sub getInttypes($) { } + sub getInt64($$) { my ($inttypes_h, $haveInt64R) = @_; @@ -982,6 +983,59 @@ sub getInt64($$) { +sub determineMmxCapability($) { + + my ($haveEmmintrinR) = @_; + + if (defined($testCc)) { + + print("(Doing test compiles to determine if your compiler has MMX " . + "intrinsics -- ignore errors)\n"); + + my $cflags = testCflags(); + + my $works; + + my @cSourceCode = ( + "#include <emmintrin.h>\n", + ); + + testCompile($cflags, \@cSourceCode, \my $success); + + if ($success) { + print("It does.\n"); + $$haveEmmintrinR = 'Y'; + } else { + print("It does not. Programs will not exploit fast MMX " . + "instructions.\n"); + $$haveEmmintrinR = 'N'; + } + print("\n"); + } else { + $$haveEmmintrinR = "N"; + } +} + + + +sub getMmx($) { + + my ($wantMmxR) = @_; + + determineMmxCapability(\my $haveEmmintrin); + + my $gotit; + + print("Use MMX instructions?\n"); + print("\n"); + + my $default = $haveEmmintrin ? "y" : "n"; + + $$wantMmxR = promptYesNo($default); +} + + + # TODO: These should do test compiles to see if the headers are in the # default search path, both to create a default to offer and to issue a # warning after user has chosen. Also test links to test the link library. @@ -1959,6 +2013,8 @@ getInttypes(\my $inttypesHeaderFile); getInt64($inttypesHeaderFile, \my $haveInt64); +getMmx(\my $wantMmx); + findProcessManagement(\my $dontHaveProcessMgmt); #****************************************************************************** @@ -2385,6 +2441,10 @@ if ($haveInt64 ne 'Y') { push(@config_mk, "HAVE_INT64 = $haveInt64\n"); } +if ($wantMmx) { + push(@config_mk, "WANT_MMX = Y\n"); +} + if ($dontHaveProcessMgmt) { push(@config_mk, "DONT_HAVE_PROCESS_MGMT = Y\n"); } diff --git a/config.mk.in b/config.mk.in index 3873ba19..9197e956 100644 --- a/config.mk.in +++ b/config.mk.in @@ -89,6 +89,14 @@ INTTYPES_H = <inttypes.h> HAVE_INT64 = Y #HAVE_INT64 = N +# WANT_MMX tells whether the build should use MMX instructions, via the the +# standard MMX intrinsics (operators such as '_mm_movemask_epi8'). MMX +# instructions are faster than traditional instructions, but aren't available +# on all CPUs. Also, the standard intrinsics are not available in all +# compilers. +WANT_MMX = N +#WANT_MMX = Y + # CC and LD are for building the Netpbm programs, which are not necessarily # intended to run on the same system on which Make is running. But when we # build a build tool such as Libopt, it is meant to run only on the same diff --git a/doc/HISTORY b/doc/HISTORY index 7d1c0327..d353f154 100644 --- a/doc/HISTORY +++ b/doc/HISTORY @@ -15,8 +15,12 @@ not yet BJH Release 10.65.00 Fix compile failure on system such as OpenBSD that don't have SIGWINCH and SIGIO. Broken since 10.49 (December 2009). - Build on system without vasprintf (not GNU libc): fix - compiler warning. + Build: Use <emmintrins.h> interface for MMX SSE intrinsics + instead of GCC-specific versions. Thanks Prophet of the Way + <afu@wta.att.ne.jp>. + + Build on system without vasprintf (not GNU libc): fix compiler + warning. Apple build: use vasprintf. diff --git a/editor/pamflip/pamflip_sse.c b/editor/pamflip/pamflip_sse.c index 5a256f9d..c73d2c06 100644 --- a/editor/pamflip/pamflip_sse.c +++ b/editor/pamflip/pamflip_sse.c @@ -28,7 +28,11 @@ #include "pamflip_sse.h" -#if HAVE_GCC_SSE2 && defined(__SSE2__) +/* Note that WANT_MMX implies the user expects MMX to be available + (i.e. <emmintrin.h> exists). +*/ + +#if WANT_MMX /*---------------------------------------------------------------------------- This is a specialized routine for row-for-column PBM transformations. @@ -68,6 +72,8 @@ possibility. -----------------------------------------------------------------------------*/ +#include <emmintrin.h> + typedef char v16qi __attribute__ ((vector_size (16))); typedef int v4di __attribute__ ((vector_size (16))); @@ -82,10 +88,23 @@ typedef int v4di __attribute__ ((vector_size (16))); variable must be vector from the beginning. Changes for your local system are okay, but if you intend to - publish the them, please specify the compiler version you used. + publish them, please specify the compiler version you used. + + This code has been tested on gcc versions 4.2.0, 4.2.4, 4.3.2, + 4.4.3, 4.4.4, 4.5.0, 4.5.2, 4.6.0 and 4.6.1 clang versions + 3.0, 3.2, 3.3. + + We use SSE instructions in "_mm_" form in favor of "__builtin_". + In GCC the "__builtin_" form is documented but "_mm_" is not. + Former versions of this source file used "__builtin_". This was + changed to make possible compilation with clang. + + _mm_slli_epi32 : __builtin_ia32_pslldi128 + _mm_cmpeq_epi8 : __builtin_ia32_pcmpeqb128 + _mm_movemask_epi8 : __builtin_ia32_pmovmskb128 + + The conversion requires <emmintrin.h> . - This code has been tested on gcc versions: 4.2.0, 4.2.4, 4.3.2, - 4.4.3, 4.4.4 and 4.5.0 . */ @@ -133,9 +152,10 @@ transpose16Bitrows(unsigned int const cols, block[12][col8], block[13][col8], block[14][col8], block[15][col8] }; - register v16qi const compare =__builtin_ia32_pcmpeqb128(vReg,zero128); + register __m128i const compare = + _mm_cmpeq_epi8((__m128i)vReg, (__m128i)zero128); - if (__builtin_ia32_pmovmskb128(compare) != 0xffff) { + if (_mm_movemask_epi8(compare) != 0xffff) { /* There is some black content in this block; write to outplane */ @@ -147,10 +167,10 @@ transpose16Bitrows(unsigned int const cols, for (i = 0; i < 7; ++i) { /* GCC (>=4.2) automatically unrolls this loop */ outplane[outrow++][outcol16] = - __builtin_ia32_pmovmskb128(vReg); - vReg = (v16qi)__builtin_ia32_pslldi128 ((v4di)vReg, 1); + _mm_movemask_epi8((__m128i)vReg); + vReg = (v16qi)_mm_slli_epi32((__m128i)vReg, 1); } - outplane[outrow][outcol16] = __builtin_ia32_pmovmskb128(vReg); + outplane[outrow][outcol16] = _mm_movemask_epi8((__m128i)vReg); } else { /* The block is completely white; skip. */ } @@ -391,12 +411,12 @@ pamflip_transformRowsToColumnsPbmSse(const struct pam * const inpamP, pbm_freearray(outplane, outpamP->height + 7); pbm_freearray(inrow, 16); } -#else /* SSE functions exist */ +#else /* WANT_MMX */ void pamflip_transformRowsToColumnsPbmSse(const struct pam * const inpamP, const struct pam * const outpamP, - struct xformCore const xformCore) { + struct xformCore const xformCore) { /* Nobody is supposed to call this */ assert(false); diff --git a/pm_config.in.h b/pm_config.in.h index 9459be02..77e9e693 100644 --- a/pm_config.in.h +++ b/pm_config.in.h @@ -187,22 +187,14 @@ /* CONFIGURE: GNU Compiler extensions are used in performance critical places when available. Test whether they exist. - Turn off by defining NO_GCC_BUILTINS. - - Note that though these influence the resulting Netpbm machine code, the - compiler setting ultimately decides what instruction set the compiler uses. - If you want a generic build, check the manual and adjust CFLAGS in - config.mk accordingly. - - For example, if you want binaries that run on all Intel x86-32 - family CPUs back to 80386, adding "-march=i386" to CFLAGS in - config.mk is much better than setting NO_GCC_BUILTINS to 1. - If you want to be extra sure use: - "-march=i386 -mno-mmx -mno-sse -DNO_GCC_BUILTINS" - - Gcc uses SSE and SSE2 instructions by default for AMD/Intel x86-64. - Tinkering with "-mno-sse" is not recommended for these machines. If you - don't want SSE code, set NO_GCC_BUILTINS to 1. + Prevent the build from exploiting these extensions by defining + NO_GCC_BUILTINS. + + Before Netpbm 10.65 (December 2013), Netpbm used GCC compiler extensions + to generate MMX code in Pamflip. Starting in 10.65, Netpbm instead uses + the more standard operators defined in <emmtrins.h>. To prevent Netpbm + from explicitly using any MMX instructions, set WANT_MMX to N in + config.mk. */ /* @@ -231,9 +223,11 @@ #define GCCVERSION 0 #endif -/* HAVE_GCC_SSE2 means the compiler has GCC builtins to directly access - SSE/SSE2 features. This is different from whether the compiler generates - code that uses these features at all. +/* HAVE_GCC_SSE2 means the compiler has GCC-specific builtins to directly + access SSE/SSE2 features. This is different from whether the compiler + generates code that uses these features at all. It is also different + from whether the compiler has the more standard operators defined in + <emmintrins.h>. */ #ifndef HAVE_GCC_SSE2 |