diff options
author | giraffedata <giraffedata@9d0c8265-081b-0410-96cb-a4ca84ce46f8> | 2013-10-30 18:57:38 +0000 |
---|---|---|
committer | giraffedata <giraffedata@9d0c8265-081b-0410-96cb-a4ca84ce46f8> | 2013-10-30 18:57:38 +0000 |
commit | 9f8b7230b969fde154cf06de3ee36fe14636371b (patch) | |
tree | f1c74fed6644663218ea987c4bd4104100a13ec4 /lib | |
parent | 6229105f5ce31e34b08dc656a71a919f313a11d7 (diff) | |
download | netpbm-mirror-9f8b7230b969fde154cf06de3ee36fe14636371b.tar.gz netpbm-mirror-9f8b7230b969fde154cf06de3ee36fe14636371b.tar.xz netpbm-mirror-9f8b7230b969fde154cf06de3ee36fe14636371b.zip |
Use SSE stuff with Clang as for GCC
git-svn-id: http://svn.code.sf.net/p/netpbm/code/trunk@2027 9d0c8265-081b-0410-96cb-a4ca84ce46f8
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Makefile | 9 | ||||
-rw-r--r-- | lib/libpbm3.c | 47 | ||||
-rw-r--r-- | lib/util/Makefile | 11 |
3 files changed, 43 insertions, 24 deletions
diff --git a/lib/Makefile b/lib/Makefile index 8d9b3175..6512949f 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -84,13 +84,12 @@ extra_staticlib: $(EXTRA_STATICLIB) # type, but request a static library in addition. #---------------------------------------------------------------------------- -# Note that the user may have configured -I options into CPPFLAGS/CFLAGS. -CFLAGS_ALL = $(INCLUDES) -DNDEBUG $(CPPFLAGS) $(CFLAGS) $(CFLAGS_SHLIB) \ - $(CFLAGS_PERSONAL) $(CADD) +$(LIBOBJECTS): CFLAGS_TARGET=$(CFLAGS_SHLIB) + +libpbm3.o: CFLAGS_TARGET+=$(CFLAGS_SSE) $(LIBOBJECTS): %.o: %.c importinc -# We have to get the command all on one line to avoid messy make messages - $(CC) -c $(CFLAGS_ALL) -o $@ $< + $(CC) -c $(INCLUDES) $(CFLAGS_ALL) -o $@ $< MAJ = 11 MIN = $(NETPBM_MINOR_RELEASE) diff --git a/lib/libpbm3.c b/lib/libpbm3.c index 3d846962..020e1558 100644 --- a/lib/libpbm3.c +++ b/lib/libpbm3.c @@ -16,21 +16,28 @@ #include "pbm.h" #ifndef PACKBITS_SSE -#if HAVE_GCC_SSE2 && HAVE_GCC_BSWAP && defined(__SSE2__) +#if WANT_SSE && defined(__SSE2__) && HAVE_GCC_BSWAP #define PACKBITS_SSE 2 #else #define PACKBITS_SSE 0 #endif #endif -/* HAVE_GCC_SSE2 means we have the means to use SSE CPU facilities - to make PBM raster processing faster. GCC only. +/* WANT_SSE means we want to use SSE CPU facilities to make PBM raster + processing faster. This implies it's actually possible - i.e. the + build environment has <emmintrin.h>. - The GNU Compiler -msse2 option makes SSE/SSE2 available. + The GNU Compiler -msse2 option makes SSE/SSE2 available, and is + evidenced by __SSE2__. For x86-32 with SSE, "-msse2" must be explicitly given. For x86-64 and AMD64, "-msse2" is the default (from Gcc v.4.) */ +#if PACKBITS_SSE == 2 + #include <emmintrin.h> +#endif + + void pbm_writepbminit(FILE * const fileP, int const cols, @@ -81,16 +88,28 @@ packBitsWithSse2( FILE * const fileP, PCMPGTB128 Packed CoMPare Greater Than Byte Compares 16 bytes in parallel - Result is x00 if greater than, xFF if not for each byte + Result is x00 if greater than, xFF if not for each byte + PMOVMSKB128 Packed MOVe MaSK Byte - Result is a byte of the MSBs of 16 bytes + Result is 16 bits, the MSBs of 16 bytes x00 xFF x00 xFF xFF xFF x00 x00 xFF xFF xFF xFF x00 x00 x00 x00 --> 0101110011110000B = 0x5CF0 The result is actually a 64 bit int, but the higher bits are always 0. + + We use SSE instructions in "_mm_" form in favor of "__builtin_". + In GCC the "__builtin_" form is documented but "_mm_" is not. + Former versions of this source file used "__builtin_". This was + changed to make possible compilation with clang, which does not + implement some "__builtin_" forms. + + __builtin_ia32_pcmpgtb128 : _mm_cmpgt_epi8 + __builtin_ia32_pmovmskb128 : _mm_movemask_epi8 + + The conversion requires <emmintrin.h> . */ typedef char v16qi __attribute__ ((vector_size(16))); @@ -110,11 +129,10 @@ packBitsWithSse2( FILE * const fileP, bit128.i64[1]=__builtin_bswap64( *(uint64_t*) &bitrow[col+8]); { - v16qi const compare = - __builtin_ia32_pcmpgtb128(bit128.v16, zero128); - uint16_t const blackMask = - (uint16_t) __builtin_ia32_pmovmskb128(compare); - + v16qi const compare = (v16qi) + _mm_cmpgt_epi8((__m128i)bit128.v16, (__m128i) zero128); + uint16_t const blackMask = _mm_movemask_epi8 ((__m128i)compare); + *(uint16_t *) & packedBits[col/8] = blackMask; } } @@ -128,10 +146,9 @@ packBitsWithSse2( FILE * const fileP, bit128.byte[ (i&8) + 7-(i&7) ] = bitrow[j]; { - v16qi const compare = - __builtin_ia32_pcmpgtb128( bit128.v16, zero128 ); - uint16_t const blackMask = - __builtin_ia32_pmovmskb128( compare ); + v16qi const compare = (v16qi) + _mm_cmpgt_epi8((__m128i)bit128.v16, (__m128i) zero128); + uint16_t const blackMask = _mm_movemask_epi8 ((__m128i)compare); if ( cols%16 >8 ) /* Two partial bytes */ *(uint16_t *) & packedBits[col/8] = blackMask; diff --git a/lib/util/Makefile b/lib/util/Makefile index 5bf1995e..28dfddfe 100644 --- a/lib/util/Makefile +++ b/lib/util/Makefile @@ -5,6 +5,8 @@ endif SUBDIR = lib/util VPATH=.:$(SRCDIR)/$(SUBDIR) +default:all + include $(BUILDDIR)/config.mk # nstring is required for asprintf(), etc. Also some systems don't have @@ -22,13 +24,14 @@ UTILOBJECTS = \ MERGE_OBJECTS = +include $(SRCDIR)/common.mk + all: $(UTILOBJECTS) -include $(SRCDIR)/common.mk +$(UTILOBJECTS): CFLAGS_TARGET=$(CFLAGS_SHLIB) $(UTILOBJECTS):%.o:%.c importinc - $(CC) -c $(INCLUDES) -DNDEBUG $(CPPFLAGS) $(CFLAGS) $(CFLAGS_SHLIB) \ - $(CFLAGS_PERSONAL) $(CADD) -o $@ $< + $(CC) -c $(INCLUDES) $(CFLAGS_ALL) -o $@ $< testnstring: test.c nstring.h nstring.o - $(CC) $(CFLAGS) $(CADD) -o $@ nstring.o $< + $(CC) $(CFLAGS_ALL) -o $@ nstring.o $< |