diff options
-rw-r--r-- | converter/other/bmptopnm.c | 2 | ||||
-rw-r--r-- | converter/other/jpeg2000/libjasper/jpc/jpc_math.c | 128 | ||||
-rw-r--r-- | converter/other/pamtogif.c | 2 | ||||
-rw-r--r-- | lib/libpbm3.c | 73 |
4 files changed, 97 insertions, 108 deletions
diff --git a/converter/other/bmptopnm.c b/converter/other/bmptopnm.c index 30b1012f..647ae3c6 100644 --- a/converter/other/bmptopnm.c +++ b/converter/other/bmptopnm.c @@ -421,7 +421,7 @@ lsbZeroCount(unsigned int const mask) Use GCC built-in when available. -----------------------------------------------------------------------------*/ -#if ( defined(__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 304) ) +#if HAVE_GCC_BITCOUNT { return ( mask==0 ? sizeof(mask)*8 : __builtin_ctz(mask) ); } diff --git a/converter/other/jpeg2000/libjasper/jpc/jpc_math.c b/converter/other/jpeg2000/libjasper/jpc/jpc_math.c index d860847d..72e3ac37 100644 --- a/converter/other/jpeg2000/libjasper/jpc/jpc_math.c +++ b/converter/other/jpeg2000/libjasper/jpc/jpc_math.c @@ -1,3 +1,72 @@ +#include <assert.h> +#include <stdio.h> +#include <string.h> +#include <math.h> +#include <stdlib.h> +#include <stdarg.h> + +#include "jpc_math.h" + + + +/* Calculate the integer quantity floor(log2(x)), where x is a positive + integer. */ +int +jpc_floorlog2(int const arg) { + + int y; + int x; + + assert(arg > 0); + + y = 0; + x = arg; + while (x > 1) { + x >>= 1; + ++y; + } + return y; +} + + + +/* + jpc_floorlog2() and jpc_firstone() do the same thing. + The only difference is how input 0 is handled. + +n : 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 +ceil(log2(n)) : x 0 1 2 2 3 3 3 3 4 4 4 4 4 4 4 4 5 5 5 +floor(log2(n)) : x 0 1 1 2 2 2 2 3 3 3 3 3 3 3 3 4 4 4 4 +31-__builtin_clz(n): x 0 1 1 2 2 2 2 3 3 3 3 3 3 3 3 4 4 4 4 +jpc_floorlog2(n) : x 0 1 1 2 2 2 2 3 3 3 3 3 3 3 3 4 4 4 4 +jpc_firstone(n) :-1 0 1 1 2 2 2 2 3 3 3 3 3 3 3 3 4 4 4 4 + +*/ + + + +int +jpc_firstone(int const arg) { +/*---------------------------------------------------------------------------- + Calculate the bit position of the first leading one in a nonnegative + integer. +-----------------------------------------------------------------------------*/ + int n; + int x; + + assert(arg >= 0); + + n = -1; + x = arg; + while (x > 0) { + x >>= 1; + ++n; + } + return n; +} + + + /* * Copyright (c) 1999-2000 Image Power, Inc. and the University of * British Columbia. @@ -109,62 +178,3 @@ * * __END_OF_JASPER_LICENSE__ */ - -/* - * Math Library - * - * $Id$ - */ - -/******************************************************************************\ -* Includes -\******************************************************************************/ - -#include <assert.h> -#include <stdio.h> -#include <string.h> -#include <math.h> -#include <stdlib.h> -#include <stdarg.h> - -#include "jpc_math.h" - -/******************************************************************************\ -* Miscellaneous Functions -\******************************************************************************/ - -/* Calculate the integer quantity floor(log2(x)), where x is a positive - integer. */ -int jpc_floorlog2(int x) -{ - int y; - - /* The argument must be positive. */ - assert(x > 0); - - y = 0; - while (x > 1) { - x >>= 1; - ++y; - } - return y; -} - -/* Calculate the bit position of the first leading one in a nonnegative - integer. */ -/* This function is the basically the same as ceillog2(x), except that the - allowable range for x is slightly different. */ -int jpc_firstone(int x) -{ - int n; - - /* The argument must be nonnegative. */ - assert(x >= 0); - - n = -1; - while (x > 0) { - x >>= 1; - ++n; - } - return n; -} diff --git a/converter/other/pamtogif.c b/converter/other/pamtogif.c index b8df9626..0c8c0f9e 100644 --- a/converter/other/pamtogif.c +++ b/converter/other/pamtogif.c @@ -864,7 +864,7 @@ typedef struct { static unsigned int nSignificantBits( unsigned int const arg ){ -#if defined(__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 304) +#if HAVE_GCC_BITCOUNT return (arg == 0) ? 0 : 8 * sizeof(unsigned int) - __builtin_clz(arg); diff --git a/lib/libpbm3.c b/lib/libpbm3.c index 29b89e47..9200d30e 100644 --- a/lib/libpbm3.c +++ b/lib/libpbm3.c @@ -13,31 +13,20 @@ #include <assert.h> #include "pm_c_util.h" -#include "bitreverse.h" #include "pbm.h" -/* HAVE_MMX_SSE means we have the means to use MMX and SSE CPU facilities - to make PBM raster processing faster. +#if HAVE_GCC_MMXSSE +#include "bitreverse.h" +#endif + +/* HAVE_GCC_MMXSSE means we have the means to use MMX and SSE CPU facilities + to make PBM raster processing faster. GCC only. The GNU Compiler -msse option makes SSE available. + For x86-32 with MMX/SSE, "-msse" must be explicitly given. + For x86-64 and AMD64, "-msse" is on by default. */ -#if defined(__GNUC__) && \ - (__GNUC__ * 100 + __GNUC_MINOR__ >= 301) && \ - (__GNUC__ * 100 + __GNUC_MINOR__ < 403) && \ - defined (__SSE__) -/* GCC 4.3 does have the facility, but it is different from what this - code knows how to use. In particular, the calls to - __builtin_ia32_pcmpeqb() and __builtin_ia32_pmovmskb() fail to - compile, with complaints of improper argument types. -*/ - -#define HAVE_MMX_SSE 1 -#else -#define HAVE_MMX_SSE 0 -#endif - - void pbm_writepbminit(FILE * const fileP, int const cols, @@ -67,7 +56,7 @@ writePackedRawRow(FILE * const fileP, } - +#if HAVE_GCC_MMXSSE static void packBitsWithMmxSse(FILE * const fileP, const bit * const bitrow, @@ -83,7 +72,6 @@ packBitsWithMmxSse(FILE * const fileP, Use the Pentium MMX and SSE facilities to pack the bits quickly, but perform the exact same function as the simpler packBitsGeneric(). -----------------------------------------------------------------------------*/ -#if HAVE_MMX_SSE /* We use MMX/SSE facilities that operate on 8 bytes at once to pack the bits quickly. @@ -93,49 +81,37 @@ packBitsWithMmxSse(FILE * const fileP, The key machine instructions are: - PCMPEQB Packed CoMPare EQual Byte + PCMPGTB Packed CoMPare Greater Than Byte Compares 8 bytes in parallel - Result is x00 if equal, xFF if unequal for each byte + Result is x00 if greater than, xFF if not for each byte PMOVMSKB Packed MOVe MaSK Byte Result is a byte of the MSBs of 8 bytes - x00 xFF x00 xFF xFF xFF x00 x00 --> 01011100B = 0x5C - + x00 xFF x00 xFF xFF xFF x00 x00 --> 01011100B = 0x5C + + The result is actually a 32 bit int, but the higher bits are + always 0. (0x0000005C in the above case) EMMS Empty MMx State Free MMX registers - - Here's a one-statement version of the code in our foor loop. It's harder - to read, but if we find out this generates more efficient code, we could - use this. - - packedBits[col/8] - = bitreverse [ ~ (unsigned char) __builtin_ia32_pmovmskb ( - __builtin_ia32_pcmpeqb ( *(v8qi*) (&bitrow[col]), *(v8qi*) &zero64) - ) ]; */ -#if (__GNUC__ * 100 + __GNUC_MINOR__ >= 403) + typedef char v8qi __attribute__ ((vector_size(8))); -#else - typedef int v8qi __attribute__ ((mode(V8QI))); -#endif typedef int di __attribute__ ((mode(DI))); - di const zero64 = 0; /* to clear with PXOR */ - unsigned int col; + v8qi const zero64 =(v8qi)((di)0); /* clear to zero */ for (col = 0; col + 7 < cols; col += 8) { + v8qi const compare = - __builtin_ia32_pcmpeqb(*(v8qi*) (&bitrow[col]), *(v8qi*) &zero64); - unsigned char const backwardWhiteMask = (unsigned char) - __builtin_ia32_pmovmskb(compare); - unsigned char const backwardBlackMask = ~backwardWhiteMask; + __builtin_ia32_pcmpgtb(*(v8qi*) (&bitrow[col]), (v8qi) zero64); + uint32_t const backwardBlackMask = __builtin_ia32_pmovmskb(compare); unsigned char const blackMask = bitreverse[backwardBlackMask]; packedBits[col/8] = blackMask; @@ -144,10 +120,13 @@ packBitsWithMmxSse(FILE * const fileP, __builtin_ia32_emms(); +} #else - if (bitreverse == bitreverse) {}; /* avoid unused vbl compiler warning */ +/* Avoid undefined function warning; never actually called */ + +#define packBitsWithMmxSse(a,b,c,d,e) packBitsGeneric(a,b,c,d,e) #endif -} + @@ -237,7 +216,7 @@ writePbmRowRaw(FILE * const fileP, pm_setjmpbufsave(&jmpbuf, &origJmpbufP); - if (HAVE_MMX_SSE) + if (HAVE_GCC_MMXSSE) packBitsWithMmxSse(fileP, bitrow, packedBits, cols, &nextCol); else packBitsGeneric(fileP, bitrow, packedBits, cols, &nextCol); |