about summary refs log tree commit diff
path: root/lib/libpbm3.c
diff options
context:
space:
mode:
Diffstat (limited to 'lib/libpbm3.c')
-rw-r--r--lib/libpbm3.c300
1 files changed, 202 insertions, 98 deletions
diff --git a/lib/libpbm3.c b/lib/libpbm3.c
index 9c9bbd25..9200d30e 100644
--- a/lib/libpbm3.c
+++ b/lib/libpbm3.c
@@ -10,32 +10,23 @@
 ** implied warranty.
 */
 
+#include <assert.h>
+
+#include "pm_c_util.h"
 #include "pbm.h"
-#include "libpbm.h"
+
+#if HAVE_GCC_MMXSSE
 #include "bitreverse.h"
+#endif
 
-/* HAVE_MMX_SSE means we have the means to use MMX and SSE CPU facilities
-   to make PBM raster processing faster.
+/* HAVE_GCC_MMXSSE means we have the means to use MMX and SSE CPU facilities
+   to make PBM raster processing faster.  GCC only.
 
    The GNU Compiler -msse option makes SSE available.
+   For x86-32 with MMX/SSE, "-msse" must be explicitly given.
+   For x86-64 and AMD64, "-msse" is on by default.
 */
 
-#if defined(__GNUC__) && \
-  (__GNUC__ * 100 + __GNUC_MINOR__ >= 301) && \
-  (__GNUC__ * 100 + __GNUC_MINOR__ < 403) && \
-  defined (__SSE__)
-/* GCC 4.3 does have the facility, but it is different from what this
-   code knows how to use.  In particular, the calls to
-   __builtin_ia32_pcmpeqb() and __builtin_ia32_pmovmskb() fail to
-   compile, with complaints of improper argument types.
-*/
-
-#define HAVE_MMX_SSE 1
-#else
-#define HAVE_MMX_SSE 0
-#endif
-
-
 void
 pbm_writepbminit(FILE * const fileP, 
                  int    const cols, 
@@ -65,13 +56,13 @@ writePackedRawRow(FILE *                const fileP,
 } 
 
 
-
+#if HAVE_GCC_MMXSSE
 static void
 packBitsWithMmxSse(FILE *          const fileP,
                    const bit *     const bitrow,
                    unsigned char * const packedBits,
-                   int             const cols,
-                   int *           const nextColP) {
+                   unsigned int    const cols,
+                   unsigned int *  const nextColP) {
 /*----------------------------------------------------------------------------
    Pack the bits of bitrow[] into bytes at 'packedBits'.  Going left to right,
    stop when there aren't enough bits left to fill a whole byte.  Return
@@ -81,7 +72,6 @@ packBitsWithMmxSse(FILE *          const fileP,
    Use the Pentium MMX and SSE facilities to pack the bits quickly, but
    perform the exact same function as the simpler packBitsGeneric().
 -----------------------------------------------------------------------------*/
-#if HAVE_MMX_SSE
     /*
       We use MMX/SSE facilities that operate on 8 bytes at once to pack
       the bits quickly.
@@ -91,45 +81,37 @@ packBitsWithMmxSse(FILE *          const fileP,
       The key machine instructions are:
     
     
-      PCMPEQB  Packed CoMPare EQual Byte
+      PCMPGTB  Packed CoMPare Greater Than Byte
     
         Compares 8 bytes in parallel
-        Result is x00 if equal, xFF if unequal for each byte       
+        Result is x00 if greater than, xFF if not for each byte       
     
       PMOVMSKB Packed MOVe MaSK Byte 
     
         Result is a byte of the MSBs of 8 bytes
-        x00 xFF x00 xFF xFF xFF x00 x00 --> 01011100B = 0x5C     
-    
+        x00 xFF x00 xFF xFF xFF x00 x00 --> 01011100B = 0x5C
+        
+        The result is actually a 32 bit int, but the higher bits are
+        always 0.  (0x0000005C in the above case)
     
       EMMS     Empty MMx State
     
         Free MMX registers  
     
-    
-      Here's a one-statement version of the code in our foor loop.  It's harder 
-      to read, but if we find out this generates more efficient code, we could 
-      use this.
-    
-        packedBits[col/8] 
-          = bitreverse [ ~ (unsigned char) __builtin_ia32_pmovmskb (
-            __builtin_ia32_pcmpeqb ( *(v8qi*) (&bitrow[col]), *(v8qi*) &zero64)
-            ) ];
     */
 
-    typedef int v8qi __attribute__ ((mode(V8QI)));
-    typedef int di __attribute__ ((mode(DI)));
 
-    di const zero64 = 0;        /* to clear with PXOR */
+    typedef char v8qi __attribute__ ((vector_size(8)));
+    typedef int di __attribute__ ((mode(DI)));
 
     unsigned int col;
+    v8qi const zero64 =(v8qi)((di)0);  /* clear to zero */
 
     for (col = 0; col + 7 < cols; col += 8) {
+
         v8qi const compare =
-            __builtin_ia32_pcmpeqb(*(v8qi*) (&bitrow[col]), *(v8qi*) &zero64);
-        unsigned char const backwardWhiteMask = (unsigned char)
-            __builtin_ia32_pmovmskb(compare);
-        unsigned char const backwardBlackMask = ~backwardWhiteMask;
+            __builtin_ia32_pcmpgtb(*(v8qi*) (&bitrow[col]), (v8qi) zero64);
+        uint32_t const backwardBlackMask =  __builtin_ia32_pmovmskb(compare);
         unsigned char const blackMask = bitreverse[backwardBlackMask];
 
         packedBits[col/8] = blackMask;
@@ -138,9 +120,20 @@ packBitsWithMmxSse(FILE *          const fileP,
 
     __builtin_ia32_emms();
 
+}
 #else
-    if (bitreverse == bitreverse) {}; /* avoid unused vbl compiler warning */
+/* Avoid undefined function warning; never actually called */
+
+#define packBitsWithMmxSse(a,b,c,d,e) packBitsGeneric(a,b,c,d,e)
 #endif
+
+
+
+
+static unsigned int
+bitValue(unsigned char const byteValue) {
+
+    return byteValue == 0 ? 0 : 1;
 }
 
 
@@ -149,10 +142,10 @@ static void
 packBitsGeneric(FILE *          const fileP,
                 const bit *     const bitrow,
                 unsigned char * const packedBits,
-                int             const cols,
-                int *           const nextColP) {
+                unsigned int    const cols,
+                unsigned int *  const nextColP) {
 /*----------------------------------------------------------------------------
-   Pack the bits of bitrow[] into byts at 'packedBits'.  Going left to right,
+   Pack the bits of bitrow[] into bytes at 'packedBits'.  Going left to right,
    stop when there aren't enough bits left to fill a whole byte.  Return
    as *nextColP the number of the next column after the rightmost one we
    packed.
@@ -161,18 +154,16 @@ packBitsGeneric(FILE *          const fileP,
 -----------------------------------------------------------------------------*/
     unsigned int col;
 
-    #define iszero(x) ((x) == 0 ? 0 : 1)
-
     for (col = 0; col + 7 < cols; col += 8)
         packedBits[col/8] = (
-            iszero(bitrow[col+0]) << 7 |
-            iszero(bitrow[col+1]) << 6 |
-            iszero(bitrow[col+2]) << 5 |
-            iszero(bitrow[col+3]) << 4 |
-            iszero(bitrow[col+4]) << 3 |
-            iszero(bitrow[col+5]) << 2 |
-            iszero(bitrow[col+6]) << 1 |
-            iszero(bitrow[col+7]) << 0
+            bitValue(bitrow[col+0]) << 7 |
+            bitValue(bitrow[col+1]) << 6 |
+            bitValue(bitrow[col+2]) << 5 |
+            bitValue(bitrow[col+3]) << 4 |
+            bitValue(bitrow[col+4]) << 3 |
+            bitValue(bitrow[col+5]) << 2 |
+            bitValue(bitrow[col+6]) << 1 |
+            bitValue(bitrow[col+7]) << 0
             );
     *nextColP = col;
 }
@@ -180,50 +171,72 @@ packBitsGeneric(FILE *          const fileP,
 
 
 static void
+packPartialBytes(const bit *     const bitrow,
+                 unsigned int    const cols,
+                 unsigned int    const nextCol,
+                 unsigned char * const packedBits) {
+              
+    /* routine for partial byte at the end of packedBits[]
+       Prior to addition of the above enhancement,
+       this method was used for the entire process
+    */                   
+    
+    unsigned int col;
+    int bitshift;
+    unsigned char item;
+    
+    bitshift = 7;  /* initial value */
+    item = 0;      /* initial value */
+    for (col = nextCol; col < cols; ++col, --bitshift)
+        if (bitrow[col] != 0)
+            item |= 1 << bitshift;
+    
+    packedBits[col/8] = item;
+}
+
+
+
+static void
 writePbmRowRaw(FILE *      const fileP,
                const bit * const bitrow,
                int         const cols) {
 
-    int nextCol;
+    jmp_buf jmpbuf;
+    jmp_buf * origJmpbufP;
+    unsigned char * packedBits;
 
-    unsigned char * const packedBits = pbm_allocrow_packed(cols);
+    packedBits = pbm_allocrow_packed(cols);
 
-    if (HAVE_MMX_SSE)
-        packBitsWithMmxSse(fileP, bitrow, packedBits, cols, &nextCol);
-    else 
-        packBitsGeneric(fileP, bitrow, packedBits, cols, &nextCol);
+    if (setjmp(jmpbuf) != 0) {
+        pbm_freerow_packed(packedBits);
+        pm_setjmpbuf(origJmpbufP);
+        pm_longjmp();
+    } else {
+        unsigned int nextCol;
 
-    /* routine for partial byte at the end of packed_bits[]
-       Prior to addition of the above enhancement,
-       this method was used for the entire process
-     */                   
-
-    if (cols % 8 > 0) {
-        int col;
-        int bitshift;
-        unsigned char item;
-
-        bitshift = 7;  /* initial value */
-        item = 0;      /* initial value */
-        for (col = nextCol; col < cols; ++col, --bitshift )
-            if (bitrow[col] !=0)
-                item |= 1 << bitshift
-                ;
+        pm_setjmpbufsave(&jmpbuf, &origJmpbufP);
+
+        if (HAVE_GCC_MMXSSE)
+            packBitsWithMmxSse(fileP, bitrow, packedBits, cols, &nextCol);
+        else 
+            packBitsGeneric(fileP, bitrow, packedBits, cols, &nextCol);
+
+        if (cols % 8 > 0)
+            packPartialBytes(bitrow, cols, nextCol, packedBits);
         
-        packedBits[col/8] = item;
+        writePackedRawRow(fileP, packedBits, cols);
+
+        pm_setjmpbuf(origJmpbufP);
     }
-    
-    writePackedRawRow(fileP, packedBits, cols);
-    
     pbm_freerow_packed(packedBits);
 }
 
 
 
 static void
-writePbmRowPlain(FILE * const fileP,
-                 bit *  const bitrow, 
-                 int    const cols) {
+writePbmRowPlain(FILE *      const fileP,
+                 const bit * const bitrow, 
+                 int         const cols) {
     
     int col, charcount;
 
@@ -242,10 +255,10 @@ writePbmRowPlain(FILE * const fileP,
 
 
 void
-pbm_writepbmrow(FILE * const fileP, 
-                bit *  const bitrow, 
-                int    const cols, 
-                int    const forceplain) {
+pbm_writepbmrow(FILE *       const fileP, 
+                const bit *  const bitrow, 
+                int          const cols, 
+                int          const forceplain) {
 
     if (!forceplain && !pm_plain_output)
         writePbmRowRaw(fileP, bitrow, cols);
@@ -257,28 +270,119 @@ pbm_writepbmrow(FILE * const fileP,
 
 void
 pbm_writepbmrow_packed(FILE *                const fileP, 
-                       const unsigned char * const packed_bits,
+                       const unsigned char * const packedBits,
                        int                   const cols, 
                        int                   const forceplain) {
 
     if (!forceplain && !pm_plain_output)
-        writePackedRawRow(fileP, packed_bits, cols);
+        writePackedRawRow(fileP, packedBits, cols);
     else {
-        bit *bitrow;
-        int col;
+        jmp_buf jmpbuf;
+        jmp_buf * origJmpbufP;
+        bit * bitrow;
 
         bitrow = pbm_allocrow(cols);
 
-        for (col = 0; col < cols; ++col) 
-            bitrow[col] = 
-                packed_bits[col/8] & (0x80 >> (col%8)) ? PBM_BLACK : PBM_WHITE;
-        writePbmRowPlain(fileP, bitrow, cols);
+        if (setjmp(jmpbuf) != 0) {
+            pbm_freerow(bitrow);
+            pm_setjmpbuf(origJmpbufP);
+            pm_longjmp();
+        } else {
+            unsigned int col;
+            
+            pm_setjmpbufsave(&jmpbuf, &origJmpbufP);
+
+            for (col = 0; col < cols; ++col) 
+                bitrow[col] = 
+                    packedBits[col/8] & (0x80 >> (col%8)) ?
+                    PBM_BLACK : PBM_WHITE;
+
+            writePbmRowPlain(fileP, bitrow, cols);
+
+            pm_setjmpbuf(origJmpbufP);
+        }
         pbm_freerow(bitrow);
     }
 }
 
 
 
+static unsigned char
+leftBits(unsigned char const x,
+         unsigned int  const n) {
+/*----------------------------------------------------------------------------
+   Clear rightmost (8-n) bits, retain leftmost (=high) n bits.
+-----------------------------------------------------------------------------*/
+    unsigned char buffer;
+
+    assert(n < 8);
+
+    buffer = x;
+
+    buffer >>= (8-n);
+    buffer <<= (8-n);
+
+    return buffer;
+}
+
+
+
+void
+pbm_writepbmrow_bitoffset(FILE *          const fileP,
+                          unsigned char * const packedBits,
+                          unsigned int    const cols,
+                          int             const format,
+                          unsigned int    const offset) {
+/*----------------------------------------------------------------------------
+   Write PBM row from a packed bit buffer 'packedBits, starting at the
+   specified offset 'offset' in the buffer.
+
+   We destroy the buffer.
+-----------------------------------------------------------------------------*/
+    unsigned int const rsh = offset % 8;
+    unsigned int const lsh = (8 - rsh) % 8;
+    unsigned int const csh = cols % 8;
+    unsigned char * const window = &packedBits[offset/8];
+        /* Area of packed row buffer from which we take the image data.
+           Aligned to nearest byte boundary to the left, so the first
+           few bits might be irrelvant.
+
+           Also our work buffer, in which we shift bits and from which we
+           ultimately write the bits to the file.
+        */
+    unsigned int const colByteCnt = pbm_packed_bytes(cols);
+    unsigned int const last = colByteCnt - 1;
+        /* Position within window of rightmost byte after shift */
+
+    bool const carryover = (csh == 0 || rsh + csh > 8);
+        /* TRUE:  Input comes from colByteCnt bytes and one extra byte.
+           FALSE: Input comes from colByteCnt bytes.  For example:
+           TRUE:  xxxxxxii iiiiiiii iiiiiiii iiixxxxx  cols=21, offset=6 
+           FALSE: xiiiiiii iiiiiiii iiiiiixx ________  cols=21, offset=1
+
+           We treat these differently for in the FALSE case the byte after
+           last (indicated by ________) may not exist.
+        */
+       
+    if (rsh > 0) {
+        unsigned int const shiftBytes =  carryover ? colByteCnt : colByteCnt-1;
+
+        unsigned int i;
+        for (i = 0; i < shiftBytes; ++i)
+            window[i] = window[i] << rsh | window[i+1] >> lsh;
+
+        if (!carryover)
+            window[last] = window[last] << rsh;
+    }
+      
+    if (csh > 0)
+        window[last] = leftBits(window[last], csh);
+          
+    pbm_writepbmrow_packed(fileP, window, cols, 0);
+}
+
+
+
 void
 pbm_writepbm(FILE * const fileP, 
              bit ** const bits,