From 946e0515bbae993f3ee091c848fdae2bda51e97a Mon Sep 17 00:00:00 2001 From: giraffedata Date: Thu, 9 Apr 2015 01:38:43 +0000 Subject: Work around GCC < 4.2 bug that causes compile failure git-svn-id: http://svn.code.sf.net/p/netpbm/code/trunk@2455 9d0c8265-081b-0410-96cb-a4ca84ce46f8 --- doc/HISTORY | 3 +++ editor/pamflip/config.h | 7 +++++++ editor/pamflip/pamflip.c | 11 ++--------- editor/pamflip/pamflip_sse.c | 9 +++++++-- pm_config.in.h | 14 ++++++++++++++ 5 files changed, 33 insertions(+), 11 deletions(-) create mode 100644 editor/pamflip/config.h diff --git a/doc/HISTORY b/doc/HISTORY index a739197c..d9316206 100644 --- a/doc/HISTORY +++ b/doc/HISTORY @@ -17,6 +17,9 @@ not yet BJH Release 10.71.00 makeman: deal properly with backlash in source. Thanks Willem van Schaik . + Build: work around bug in GCC < 4.2 related to SSE2 builtins + that causes compile of pamflip to fail. + 15.03.29 BJH Release 10.70.00 Add st4topgm, pgmtost4. diff --git a/editor/pamflip/config.h b/editor/pamflip/config.h new file mode 100644 index 00000000..42aefb6e --- /dev/null +++ b/editor/pamflip/config.h @@ -0,0 +1,7 @@ +#ifndef SSE_PBM_XY_FLIP + #if WANT_SSE && HAVE_WORKING_SSE2 + #define SSE_PBM_XY_FLIP 1 + #else + #define SSE_PBM_XY_FLIP 0 + #endif +#endif diff --git a/editor/pamflip/pamflip.c b/editor/pamflip/pamflip.c index ade05601..149ab310 100644 --- a/editor/pamflip/pamflip.c +++ b/editor/pamflip/pamflip.c @@ -72,19 +72,12 @@ #include "nstring.h" #include "bitreverse.h" +#include "config.h" /* Defines SSE_PBM_XY_FLIP */ #include "flip.h" #include "pamflip_sse.h" enum xformType {LEFTRIGHT, TOPBOTTOM, TRANSPOSE}; -#ifndef SIMD_PBM_TRANSPOSITION - #if WANT_SSE && defined(__SSE2__) - #define SIMD_PBM_TRANSPOSITION 1 - #else - #define SIMD_PBM_TRANSPOSITION 0 - #endif -#endif - static void parseXformOpt(const char * const xformOpt, unsigned int * const xformCountP, @@ -1149,7 +1142,7 @@ transformPbm(struct pam * const inpamP, /* This is a column-for-row type of transformation, which requires complex traversal of an in-memory image. */ - if (SIMD_PBM_TRANSPOSITION == 1) + if (SSE_PBM_XY_FLIP) pamflip_transformRowsToColumnsPbmSse(inpamP, outpamP, xform); else transformPbmGen(inpamP, outpamP, xform); diff --git a/editor/pamflip/pamflip_sse.c b/editor/pamflip/pamflip_sse.c index eccbe965..e0929f65 100644 --- a/editor/pamflip/pamflip_sse.c +++ b/editor/pamflip/pamflip_sse.c @@ -24,6 +24,7 @@ #include "mallocvar.h" #include "pam.h" +#include "config.h" /* Defines SSE_PBM_XY_FLIP */ #include "flip.h" #include "pamflip_sse.h" @@ -32,7 +33,7 @@ (i.e. exists). */ -#if WANT_SSE && defined(__SSE2__) +#if SSE_PBM_XY_FLIP /*---------------------------------------------------------------------------- This is a specialized routine for row-for-column PBM transformations. @@ -59,7 +60,11 @@ As an enhancement, we clear the output raster to zero (=white) in the beginning and flip only the 8x16 blocks that contain non-zero bits (=any amount of black pixels). When we add padding to the edges, we initialize - it all to zero to prevent unnecessary transpositions. + it all to zero to prevent unnecessary transpositions. Because most + real-world documents are largely white, this saves much execution time. If + you are porting this code to an environment in which non-zero bits are the + majority, for example, BMP where zero means black, you should seriously + consider modifying this. All instructions unique to GCC/SSE are in transpose16Bitrows(). It is possible to write a non-SSE version by providing a generic diff --git a/pm_config.in.h b/pm_config.in.h index c64fc093..9d0dc303 100644 --- a/pm_config.in.h +++ b/pm_config.in.h @@ -240,6 +240,20 @@ #endif #endif +#ifndef HAVE_WORKING_SSE2 +#if defined(__SSE2__) && ( GCCVERSION >=402 || defined(__clang__) ) + #define HAVE_WORKING_SSE2 1 + /* We can use SSE2 builtin functions to exploit SSE2 instructions. GCC + version 4.2 or newer is required; older GCC ostensibly has these SSE2 + builtins, but the compiler aborts with an error. Note that __SSE2__ + means not only that the compiler has the capability, but that the user + has not disabled it via compiler options. + */ +#else + #define HAVE_WORKING_SSE2 0 +#endif +#endif + /* UNALIGNED_OK means it's OK to do unaligned memory access, e.g. loading an 8-byte word from an address that is not a multiple of 8. On some systems, such an access causes a trap and a signal. -- cgit 1.4.1