From c07539b1f8e185b1353449eb167d671488096c03 Mon Sep 17 00:00:00 2001
From: giraffedata <giraffedata@9d0c8265-081b-0410-96cb-a4ca84ce46f8>
Date: Sat, 16 Feb 2019 16:20:09 +0000
Subject: Speedup for PBM with xscale factor >10

git-svn-id: http://svn.code.sf.net/p/netpbm/code/trunk@3547 9d0c8265-081b-0410-96cb-a4ca84ce46f8
---
 editor/pamenlarge.c | 485 +++++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 401 insertions(+), 84 deletions(-)

(limited to 'editor')

diff --git a/editor/pamenlarge.c b/editor/pamenlarge.c
index 7e2d58c1..a33bd29c 100644
--- a/editor/pamenlarge.c
+++ b/editor/pamenlarge.c
@@ -3,6 +3,9 @@
 ===============================================================================
   By Bryan Henderson 2004.09.26.  Contributed to the public domain by its
   author.
+
+  The design and code for the fast processing of PBMs is by Akira Urushibata
+  in March 2010 and substantially improved in February 2019.
 =============================================================================*/
 
 #include <stdbool.h>
@@ -178,14 +181,31 @@ validateComputableDimensions(unsigned int const width,
 }
 
 
+static unsigned char const pair[7][4] = {
+    { 0x00 , 0x7F , 0x80 , 0xFF},
+    { 0x00 , 0x3F , 0xC0 , 0xFF},
+    { 0x00 , 0x1F , 0xE0 , 0xFF},
+    { 0x00 , 0x0F , 0xF0 , 0xFF},
+    { 0x00 , 0x07 , 0xF8 , 0xFF},
+    { 0x00 , 0x03 , 0xFC , 0xFF},
+    { 0x00 , 0x01 , 0xFE , 0xFF} };
+
+
 
 static void
-enlargePbmRowHorizontally(struct pam *          const inpamP,
-                          const unsigned char * const inrow,
-                          unsigned int          const inColChars,
-                          unsigned int          const outColChars,
-                          unsigned int          const scaleFactor,
-                          unsigned char *       const outrow) {
+enlargePbmRowHorizontallySmall(const unsigned char * const inrow,
+                               unsigned int          const inColChars,
+                               unsigned int          const xScaleFactor,
+                               unsigned char *       const outrow) {
+/*----------------------------------------------------------------------------
+   Fast routines for scale factors 1-13.
+
+   Using a temp value "inrowChar" makes a difference.  We know that inrow
+   and outrow don't overlap, but the compiler does not and emits code
+   which reads inrow[colChar] each time fearing that a write to outrow[x]
+   may have altered the value.  (The first "const" for inrow in the above
+   argument list is not enough for the compiler.)
+-----------------------------------------------------------------------------*/
 
     static unsigned char const dbl[16] = {
         0x00, 0x03, 0x0C, 0x0F, 0x30, 0x33, 0x3C, 0x3F,
@@ -201,82 +221,91 @@ enlargePbmRowHorizontally(struct pam *          const inpamP,
     static unsigned char const trp3[8] = {
         0x00, 0x07, 0x38, 0x3F, 0xC0, 0xC7, 0xF8, 0xFF };
 
-    static unsigned char const quad[4] = { 0x00, 0x0F, 0xF0, 0xFF };
-
     static unsigned char const quin2[8] = {
         0x00, 0x01, 0x3E, 0x3F, 0xC0, 0xC1, 0xFE, 0xFF };
 
     static unsigned char const quin4[8] = {
         0x00, 0x03, 0x7C, 0x7F, 0x80, 0x83, 0xFC, 0xFF };
 
-    static unsigned int const pair[4] = { 0x0000, 0x00FF, 0xFF00, 0xFFFF };
+    static unsigned char const * quad = pair[3];
 
     unsigned int colChar;
 
-    switch (scaleFactor) {
+    switch (xScaleFactor) {
     case 1:  break; /* outrow set to inrow */
+
     case 2:  /* Make outrow using prefabricated parts (same for 3, 5). */
         for (colChar = 0; colChar < inColChars; ++colChar) {
-            outrow[colChar*2]   = dbl[(inrow[colChar] & 0xF0) >> 4];
-            outrow[colChar*2+1] = dbl[(inrow[colChar] & 0x0F) >> 0];
+            unsigned char const inrowChar = inrow[colChar];
+            outrow[colChar*2]   = dbl[ inrowChar >> 4];
+            outrow[colChar*2+1] = dbl[(inrowChar & 0x0F) >> 0];
             /* Possible outrow overrun by one byte. */
         }
         break;
 
     case 3:
         for (colChar = 0; colChar < inColChars; ++colChar) {
-            outrow[colChar*3]   = trp1[(inrow[colChar] & 0xF0) >> 5];
-            outrow[colChar*3+1] = trp2[(inrow[colChar] >> 2) & 0x0F];
-            outrow[colChar*3+2] = trp3[(inrow[colChar] >> 0) & 0x07];
+            unsigned char const inrowChar = inrow[colChar];
+            outrow[colChar*3]   = trp1[ inrowChar >> 5];
+            outrow[colChar*3+1] = trp2[(inrowChar >> 2) & 0x0F];
+            outrow[colChar*3+2] = trp3[(inrowChar >> 0) & 0x07];
         }
         break;
 
     case 4:
         for (colChar = 0; colChar < inColChars; ++colChar) {
+            unsigned char const inrowChar = inrow[colChar];
             unsigned int i;
             for (i = 0; i < 4; ++i)
-                outrow[colChar*4+i]=
-                    quad[(inrow[colChar] >> (6 - 2 * i)) & 0x03];
+                outrow[colChar*4+i] =
+                    quad[(inrowChar >> (6 - 2 * i)) & 0x03];
         }
         break;
 
     case 5:
         for (colChar = 0; colChar < inColChars; ++colChar) {
-            outrow[colChar*5]   = pair [(inrow[colChar] >> 6) & 0x03] >> 5;
-            outrow[colChar*5+1] = quin2[(inrow[colChar] >> 4) & 0x07] >> 0;
-            outrow[colChar*5+2] = quad [(inrow[colChar] >> 3) & 0x03] >> 0;
-            outrow[colChar*5+3] = quin4[(inrow[colChar] >> 1) & 0x07] >> 0;
-            outrow[colChar*5+4] = pair [(inrow[colChar] >> 0) & 0x03] >> 3;
+            unsigned char const inrowChar = inrow[colChar];
+            outrow[colChar*5]   = pair [4][(inrowChar >> 6) & 0x03];
+            outrow[colChar*5+1] = quin2[(inrowChar >> 4) & 0x07] >> 0;
+            outrow[colChar*5+2] = quad [(inrowChar >> 3) & 0x03] >> 0;
+            outrow[colChar*5+3] = quin4[(inrowChar >> 1) & 0x07] >> 0;
+            outrow[colChar*5+4] = pair [2][(inrowChar >> 0) & 0x03];
         }
         break;
 
     case 6:  /* Compound of 2 and 3 */
         for (colChar = 0; colChar < inColChars; ++colChar) {
-            unsigned char const hi = dbl[(inrow[colChar] & 0xF0) >> 4];
-            unsigned char const lo = dbl[(inrow[colChar] & 0x0F) >> 0];
+            unsigned char const inrowChar = inrow[colChar];
+            unsigned char const hi = dbl[(inrowChar & 0xF0) >> 4];
+            unsigned char const lo = dbl[(inrowChar & 0x0F) >> 0];
 
-            outrow[colChar*6]   = trp1[(hi & 0xF0) >> 5];
+            outrow[colChar*6]   = trp1[hi >> 5];
             outrow[colChar*6+1] = trp2[(hi >> 2) & 0x0F];
             outrow[colChar*6+2] = trp3[hi & 0x07];
 
-            outrow[colChar*6+3] = trp1[(lo & 0xF0) >> 5];
+            outrow[colChar*6+3] = trp1[lo >> 5];
             outrow[colChar*6+4] = trp2[(lo >> 2) & 0x0F];
             outrow[colChar*6+5] = trp3[lo & 0x07];
         }
         break;
 
     case 7:
+        /* This approach can be used for other scale values.
+           Good for architectures which provide wide registers
+           such as SSE.
+        */
         for (colChar = 0; colChar < inColChars; ++colChar) {
+            unsigned char const inrowChar = inrow[colChar];
             uint32_t hi, lo;
 
-            hi = inrow[colChar] >> 4;
+            hi = inrowChar >> 4;
             hi = ((((hi>>1) * 0x00082080) | (0x01 & hi)) & 0x00204081 ) * 0x7F;
             hi >>= 4;
             outrow[colChar*7]   =  (unsigned char) ( hi >> 16);
             outrow[colChar*7+1] =  (unsigned char) ((hi >>  8) & 0xFF);
             outrow[colChar*7+2] =  (unsigned char) ((hi >>  0) & 0xFF);
 
-            lo = inrow[colChar] & 0x001F;
+            lo = inrowChar & 0x001F;
             lo = ((((lo>>1) * 0x02082080) | (0x01 & lo)) & 0x10204081 ) * 0x7F;
             outrow[colChar*7+3] =  (unsigned char) ((lo >> 24) & 0xFF);
             outrow[colChar*7+4] =  (unsigned char) ((lo >> 16) & 0xFF);
@@ -287,77 +316,344 @@ enlargePbmRowHorizontally(struct pam *          const inpamP,
 
     case 8:
         for (colChar = 0; colChar < inColChars; ++colChar) {
+            unsigned char const inrowChar = inrow[colChar];
             unsigned int i;
             for (i = 0; i < 8; ++i) {
                 outrow[colChar*8+i] =
-                    ((inrow[colChar] >> (7-i)) & 0x01) *0xFF;
+                    ((inrowChar >> (7-i)) & 0x01) *0xFF;
             }
         }
         break;
 
     case 9:
         for (colChar = 0; colChar < inColChars; ++colChar) {
-            outrow[colChar*9]   =  ((inrow[colChar] >> 7) & 0x01) * 0xFF;
-            outrow[colChar*9+1] =  pair[(inrow[colChar] >> 6) & 0x03] >> 1;
-            outrow[colChar*9+2] =  pair[(inrow[colChar] >> 5) & 0x03] >> 2;
-            outrow[colChar*9+3] =  pair[(inrow[colChar] >> 4) & 0x03] >> 3;
-            outrow[colChar*9+4] =  pair[(inrow[colChar] >> 3) & 0x03] >> 4;
-            outrow[colChar*9+5] =  pair[(inrow[colChar] >> 2) & 0x03] >> 5;
-            outrow[colChar*9+6] =  pair[(inrow[colChar] >> 1) & 0x03] >> 6;
-            outrow[colChar*9+7] =  pair[(inrow[colChar] >> 0) & 0x03] >> 7;
-            outrow[colChar*9+8] =  (inrow[colChar] & 0x01) * 0xFF;
+            unsigned char const inrowChar = inrow[colChar];
+            outrow[colChar*9]   =  ((inrowChar >> 7) & 0x01) * 0xFF;
+            outrow[colChar*9+1] =  pair[0][(inrowChar >> 6) & 0x03];
+            outrow[colChar*9+2] =  pair[1][(inrowChar >> 5) & 0x03];
+            outrow[colChar*9+3] =  pair[2][(inrowChar >> 4) & 0x03];
+            outrow[colChar*9+4] =  pair[3][(inrowChar >> 3) & 0x03];
+            outrow[colChar*9+5] =  pair[4][(inrowChar >> 2) & 0x03];
+            outrow[colChar*9+6] =  pair[5][(inrowChar >> 1) & 0x03];
+            outrow[colChar*9+7] =  pair[6][(inrowChar >> 0) & 0x03];
+            outrow[colChar*9+8] =  (inrowChar & 0x01) * 0xFF;
         }
         break;
 
     case 10:
         for (colChar = 0; colChar < inColChars; ++colChar) {
-            outrow[colChar*10]   = ((inrow[colChar] >> 7) & 0x01 ) * 0xFF;
-            outrow[colChar*10+1] = pair[(inrow[colChar] >> 6) & 0x03] >> 2;
-            outrow[colChar*10+2] = pair[(inrow[colChar] >> 5) & 0x03] >> 4;
-            outrow[colChar*10+3] = pair[(inrow[colChar] >> 4) & 0x03] >> 6;
-            outrow[colChar*10+4] = ((inrow[colChar] >> 4) & 0x01) * 0xFF;
-            outrow[colChar*10+5] = ((inrow[colChar] >> 3) & 0x01) * 0xFF;
-            outrow[colChar*10+6] = pair[(inrow[colChar] >> 2) & 0x03] >> 2;
-            outrow[colChar*10+7] = pair[(inrow[colChar] >> 1) & 0x03] >> 4;
-            outrow[colChar*10+8] = pair[(inrow[colChar] >> 0) & 0x03] >> 6;
-            outrow[colChar*10+9] = ((inrow[colChar] >> 0) & 0x01) * 0xFF;
+            unsigned char const inrowChar = inrow[colChar];
+            outrow[colChar*10]   = ((inrowChar >> 7) & 0x01 ) * 0xFF;
+            outrow[colChar*10+1] = pair[1][(inrowChar >> 6) & 0x03];
+            outrow[colChar*10+2] = quad[(inrowChar >> 5) & 0x03];
+            outrow[colChar*10+3] = pair[5][(inrowChar >> 4) & 0x03];
+            outrow[colChar*10+4] = ((inrowChar >> 4) & 0x01) * 0xFF;
+            outrow[colChar*10+5] = ((inrowChar >> 3) & 0x01) * 0xFF;
+            outrow[colChar*10+6] = pair[1][(inrowChar >> 2) & 0x03];
+            outrow[colChar*10+7] = quad[(inrowChar >> 1) & 0x03];
+            outrow[colChar*10+8] = pair[5][(inrowChar >> 0) & 0x03];
+            outrow[colChar*10+9] = ((inrowChar >> 0) & 0x01) * 0xFF;
         }
         break;
 
+    case 11:
+        for (colChar = 0; colChar < inColChars; ++colChar) {
+            unsigned char const inrowChar = inrow[colChar];
+            outrow[colChar*11]   = ((inrowChar >> 7) & 0x01 ) * 0xFF;
+            outrow[colChar*11+1] = pair[2][(inrowChar >> 6) & 0x03];
+            outrow[colChar*11+2] = pair[5][(inrowChar >> 5) & 0x03];
+            outrow[colChar*11+3] = ((inrowChar >> 5) & 0x01) * 0xFF;
+            outrow[colChar*11+4] = pair[0][(inrowChar >> 4) & 0x03];
+            outrow[colChar*11+5] = quad[(inrowChar >> 3) & 0x03];
+            outrow[colChar*11+6] = pair[6][(inrowChar >> 2) & 0x03];
+            outrow[colChar*11+7] = ((inrowChar >> 2) & 0x01) * 0xFF;
+            outrow[colChar*11+8] = pair[1][(inrowChar >> 1) & 0x03];
+            outrow[colChar*11+9] = pair[4][(inrowChar >> 0) & 0x03];
+            outrow[colChar*11+10] = ((inrowChar >> 0) & 0x01) * 0xFF;
+        }
+        break;
+
+    case 12:
+        for (colChar = 0; colChar < inColChars; ++colChar) {
+            unsigned char const inrowChar = inrow[colChar];
+            outrow[colChar*12+ 0] = ((inrowChar >> 7) & 0x01) * 0xFF;
+            outrow[colChar*12+ 1] = quad[(inrowChar >> 6) & 0x03];
+            outrow[colChar*12+ 2] = ((inrowChar >> 6) & 0x01) * 0xFF;
+            outrow[colChar*12+ 3] = ((inrowChar >> 5) & 0x01) * 0xFF;
+            outrow[colChar*12+ 4] = quad[(inrowChar >> 4) & 0x03];
+            outrow[colChar*12+ 5] = ((inrowChar >> 4) & 0x01) * 0xFF;
+            outrow[colChar*12+ 6] = ((inrowChar >> 3) & 0x01) * 0xFF;
+            outrow[colChar*12+ 7] = quad[(inrowChar >> 2) & 0x03];
+            outrow[colChar*12+ 8] = ((inrowChar >> 2) & 0x01) * 0xFF;
+            outrow[colChar*12+ 9] = ((inrowChar >> 1) & 0x01) * 0xFF;
+            outrow[colChar*12+10] = quad[(inrowChar >> 0) & 0x03];
+            outrow[colChar*12+11] = ((inrowChar >> 0) & 0x01) * 0xFF;
+        }
+        break;
+
+    case 13:
+      /* Math quiz: 13 is the last entry here.
+         Is this an arbitrary choice?
+         Or is there something which makes 13 necessary?
+
+         If you like working on questions like this you may like
+         number/group theory.  However don't expect a straightforward
+         answer from a college math textbook.  - afu
+      */
+         for (colChar = 0; colChar < inColChars; ++colChar) {
+            unsigned char const inrowChar = inrow[colChar];
+            outrow[colChar*13+ 0] = ((inrowChar >> 7) & 0x01) * 0xFF;
+            outrow[colChar*13+ 1] = pair[4][(inrowChar >> 6) & 0x03];
+            outrow[colChar*13+ 2] = ((inrowChar >> 6) & 0x01) * 0xFF;
+            outrow[colChar*13+ 3] = pair[1][(inrowChar >> 5) & 0x03];
+            outrow[colChar*13+ 4] = pair[6][(inrowChar >> 4) & 0x03];
+            outrow[colChar*13+ 5] = ((inrowChar >> 4) & 0x01) * 0xFF;
+            outrow[colChar*13+ 6] = quad[(inrowChar >> 3) & 0x03];
+            outrow[colChar*13+ 7] = ((inrowChar >> 3) & 0x01) * 0xFF;
+            outrow[colChar*13+ 8] = pair[0][(inrowChar >> 2) & 0x03];
+            outrow[colChar*13+ 9] = pair[5][(inrowChar >> 1) & 0x03];
+            outrow[colChar*13+10] = ((inrowChar >> 1) & 0x01) * 0xFF;
+            outrow[colChar*13+11] = pair[2][(inrowChar >> 0) & 0x03];
+            outrow[colChar*13+12] = ((inrowChar >> 0) & 0x01) * 0xFF;
+        }
+        break;
 
     default:
-        /*  Unlike the above cases, we iterate through outrow.  To compute the
-            color composition of each outrow byte, we consult a single bit or
-            two consecutive bits in inrow.
+        pm_error("Internal error");
+    }
+}
+
+
+/*
+  General method for scale values 14 and above
+
+  First notice that all output characters are either entirely 0, entirely 1
+  or a combination with the change from 0->1 or 1->0 happening only once.
+  (Sequences like 00111000 never appear when scale value is above 8).
+
+  Let us call the chars which are entirely 0 or 1 "solid" and those which
+  may potentially contain both "transitional".   For scale values 6 - 14
+  each input character expands to output characters aligned as follows:
+
+  6 : TTTTTT
+  7 : TTTTTTT
+  8 : SSSSSSSS
+  9 : STTTTTTTS
+  10: STSTSSTSTS
+  11: STTSTTTSTTS
+  12: STSSTSSTSSTS
+  13: STSTTSTSTTSTS
+  14: STSTSTSSTSTSTS
+
+  Above 15 we insert strings of solid chars as necessary:
+
+  22: SsTSsTSsTSsSsTSsTSsTSs
+  30: SssTSssTSssTSssSssTSssTSssTSss
+  38: SsssTSsssTSsssTSsssSsssTSsssTSsssTSsss
+*/
+
+
+struct OffsetInit {
+  unsigned int scale;
+  const char * alignment;
+};
+
+
+static struct OffsetInit const offsetInit[8] = {
+  /* 0: single char copied from output of enlargePbmRowHorizontallySmall()
+     1: stretch of solid chars
+
+     Each entry is symmetrical left-right and has exactly eight '2's
+   */
+
+  {  8, "22222222" },               /* 8n+0 */
+  {  9, "21121212121212112" },      /* 8n+1 */
+  { 10, "211212112211212112" },     /* 8n+2 */
+  { 11, "2112121121211212112" },    /* 8n+3 */
+  {  4, "212212212212" },           /* 8n+4 */
+  { 13, "211211211212112112112" },  /* 8n+5 */
+  {  6, "21212122121212" },         /* 8n+6 */
+  {  7, "212121212121212" }         /* 8n+7 */
+};
+
+  /*   Relationship between 'S' 'T' in previous comment and '1' '2' here
+
+     11: STTSTTTSTTS
+     19: sSTsTsSTsTsTSsTsTSs
+         2112121121211212112           # table entry for 8n+3
+     27: ssSTssTssSTssTssTSssTssTSss
+         2*112*12*112*12*112*12*112*
+     35: sssSTsssTsssSTsssTsssTSsssTsssTSsss
+         2**112**12**112**12**112**12**112**
+     42: ssssSTssssTssssSTssssTssssTSssssTssssTSssss
+         2***112***12***112***12***112***12***112***
+  */
+
+
+struct OffsetTable {
+    unsigned int offsetSolid[8];
+    unsigned int offsetTrans[13];
+    unsigned int scale;
+    unsigned int solidChars;
+};
+
+
+
+static void
+setupOffsetTable(unsigned int         const xScaleFactor,
+                 struct OffsetTable * const offsetTableP) {
+
+    unsigned int i, j0, j1, dest;
+    struct OffsetInit const classEntry = offsetInit[xScaleFactor % 8];
+    unsigned int const scale = classEntry.scale;
+    unsigned int const solidChars = xScaleFactor / 8 - (scale > 8 ? 1 : 0);
+
+    for (i = j0 = j1 = dest = 0; classEntry.alignment[i] != '\0'; ++i) {
+      switch (classEntry.alignment[i]) {
+        case '1': offsetTableP->offsetTrans[j0++] = dest++;
+                  break;
+
+        case '2': offsetTableP->offsetSolid[j1++] = dest;
+                  dest += solidChars;
+                  break;
+
+        default:  pm_error("Internal error. Abnormal alignment value");
+        }
+    }
+
+    offsetTableP->scale = scale;
+    offsetTableP->solidChars = solidChars;
+}
+
+
+
+static void
+enlargePbmRowFractional(unsigned char         const inrowChar,
+                        unsigned int          const outColChars,
+                        unsigned int          const xScaleFactor,
+                        unsigned char       * const outrow,
+                        struct OffsetTable  * const tableP) {
+
+/*----------------------------------------------------------------------------
+  Routine called from enlargePbmRowHorizontallyGen() to process the final
+  fractional inrow char.
+
+  outrow : write position for this function (not left edge of entire row)
+----------------------------------------------------------------------------*/
+
+    unsigned int i;
+
+    /* Deploy (most) solid chars */
+
+    for (i = 0; i < 7; ++i) {
+        unsigned int j;
+        unsigned char const bit8 = (inrowChar >> (7 - i) & 0x01) * 0xFF;
+
+        if (tableP->offsetSolid[i] >= outColChars)
+            break;
+        else
+            for (j = 0; j < tableP->solidChars; ++j) {
+                outrow[j + tableP->offsetSolid[i]] = bit8;
+            }
+     }
+    /* If scale factor is a multiple of 8 we are done. */
 
-            Color changes never happen twice in a single outrow byte.
+    if (tableP->scale != 8) {
+        unsigned char outrowTemp[16];
 
-            This is a generalization of above routines for scale factors
-            9 and 10.
+        enlargePbmRowHorizontallySmall(&inrowChar, 1,
+                                       tableP->scale, outrowTemp);
 
-            Logic works for scale factors 4, 6, 7, 8, and above (but not 5).
+        for (i = 0 ; i < tableP->scale; ++i) {
+            unsigned int const offset = tableP->offsetTrans[i];
+            if (offset >= outColChars)
+                break;
+            else
+                outrow[offset] = outrowTemp[i];
+            }
+
+    }
+
+}
+
+
+
+static void
+enlargePbmRowHorizontallyGen(const unsigned char * const inrow,
+                             unsigned int          const inColChars,
+                             unsigned int          const outColChars,
+                             unsigned int          const xScaleFactor,
+                             unsigned char       * const outrow,
+                             struct OffsetTable  * const tableP) {
+
+/*----------------------------------------------------------------------------
+  We iterate through inrow.
+  Output chars are deployed according to offsetTable.
+
+  All transitional chars and some solid chars are determined by calling
+  one the fast routines in enlargePbmRowHorizontallySmall().
+----------------------------------------------------------------------------*/
+    unsigned int colChar;
+    unsigned int const fullColChars =
+        inColChars - ((inColChars * xScaleFactor == outColChars) ? 0 : 1);
+
+    for (colChar = 0; colChar < fullColChars; ++colChar) {
+        unsigned char const inrowChar = inrow[colChar];
+        char bit8[8];
+        unsigned int i;
+
+        /* Deploy most solid chars
+           Some scale factors yield uneven string lengths: in such
+           cases we don't handle the odd solid chars at this point
         */
 
-        for (colChar = 0; colChar < outColChars; ++colChar) {
-            unsigned int const mult = scaleFactor;
-            unsigned int const mod = colChar % mult;
-            unsigned int const bit = (mod*8)/mult;
-            /* source bit position, leftmost=0 */
-            unsigned int const offset = mult - (mod*8)%mult;
-            /* number of outrow bits derived from the same
-               "source" inrow bit, starting at and to the right
-               of leftmost bit of outrow byte, inclusive
-            */
-
-            if (offset >= 8)  /* Bits in outrow byte are all 1 or 0 */
-                outrow[colChar] =
-                    (inrow[colChar/mult] >> (7-bit) & 0x01) * 0xFF;
-            else           /* Two inrow bits influence this outrow byte */
-                outrow[colChar] = (unsigned char)
-                    (pair[inrow[colChar/mult] >> (6-bit) & 0x03] >> offset)
-                    & 0xFF;
+        for (i = 0; i < 8; ++i)
+            bit8[i] = (inrowChar >> (7 - i) & 0x01) * 0xFF;
+
+        for (i = 0; i < tableP->solidChars; ++i) {
+            unsigned int base = colChar * xScaleFactor + i;
+            outrow[base]              = bit8[0];
+            outrow[base + tableP->offsetSolid[1]] = bit8[1];
+            outrow[base + tableP->offsetSolid[2]] = bit8[2];
+            outrow[base + tableP->offsetSolid[3]] = bit8[3];
+            outrow[base + tableP->offsetSolid[4]] = bit8[4];
+            outrow[base + tableP->offsetSolid[5]] = bit8[5];
+            outrow[base + tableP->offsetSolid[6]] = bit8[6];
+            outrow[base + tableP->offsetSolid[7]] = bit8[7];
+        }
+
+        /* If scale factor is a multiple of 8 we are done */
+
+        if (tableP->scale != 8) {
+            /* Deploy transitional chars and any remaining solid chars */
+            unsigned char outrowTemp[16];
+            unsigned int base = colChar * xScaleFactor;
+
+            enlargePbmRowHorizontallySmall(&inrowChar, 1,
+                                           tableP->scale, outrowTemp);
+
+            /* There are at least 4 valid entries in offsetTrans[] */
+
+            outrow[base + tableP->offsetTrans[0]] = outrowTemp[0];
+            outrow[base + tableP->offsetTrans[1]] = outrowTemp[1];
+            outrow[base + tableP->offsetTrans[2]] = outrowTemp[2];
+            outrow[base + tableP->offsetTrans[3]] = outrowTemp[3];
+
+            for (i = 4; i < tableP->scale; ++i)
+                outrow[base + tableP->offsetTrans[i]] = outrowTemp[i];
         }
     }
+
+    /* Process the fractional final inrow byte */
+
+     if (fullColChars < inColChars) {
+        unsigned int  const start = fullColChars * xScaleFactor;
+        unsigned char const inrowLast = inrow[inColChars -1];
+
+        enlargePbmRowFractional(inrowLast, outColChars - start,
+                                xScaleFactor, &outrow[start], tableP);
+        }
+
 }
 
 
@@ -368,27 +664,36 @@ enlargePbm(struct pam * const inpamP,
            unsigned int const yScaleFactor,
            FILE *       const ofP) {
 
-    unsigned char * inrow;
-    unsigned char * outrow;
-
-    unsigned int row;
+    enum ScaleMethod {METHOD_USEINPUT, METHOD_SMALL, METHOD_GENERAL};
+    enum ScaleMethod const scaleMethod =
+        xScaleFactor == 1 ? METHOD_USEINPUT :
+        scaleMethod <= 13 ? METHOD_SMALL :
+        METHOD_GENERAL;
 
     unsigned int const outcols = inpamP->width * xScaleFactor;
     unsigned int const outrows = inpamP->height * yScaleFactor;
     unsigned int const inColChars  = pbm_packed_bytes(inpamP->width);
     unsigned int const outColChars = pbm_packed_bytes(outcols);
 
+    unsigned char * inrow;
+    unsigned char * outrow;
+    unsigned int row;
+    struct OffsetTable offsetTable;
+
     inrow  = pbm_allocrow_packed(inpamP->width);
 
-    if (xScaleFactor == 1)
+    if (scaleMethod == METHOD_USEINPUT)
         outrow = inrow;
-    else  {
-        /* Allow writes beyond outrow data end when xScaleFactor is
-           one of the special fast cases: 2, 3, 4, 5, 6, 7, 8, 9, 10.
+    else {
+        /* Allow writes beyond outrow data end when using the table method.
         */
         unsigned int const rightPadding =
-            xScaleFactor > 10 ? 0 : (xScaleFactor - 1) * 8;
+            scaleMethod == METHOD_GENERAL ? 0 : (xScaleFactor - 1) * 8;
+
         outrow = pbm_allocrow_packed(outcols + rightPadding);
+
+        if (scaleMethod == METHOD_GENERAL)
+            setupOffsetTable(xScaleFactor, &offsetTable);
     }
 
     pbm_writepbminit(ofP, outcols, outrows, 0);
@@ -402,8 +707,20 @@ enlargePbm(struct pam * const inpamP,
         if (outcols % 8 > 0)           /* clean final partial byte */
             pbm_cleanrowend_packed(inrow, inpamP->width);
 
-        enlargePbmRowHorizontally(inpamP, inrow, inColChars, outColChars,
-                                  xScaleFactor, outrow);
+        switch (scaleMethod) {
+        case METHOD_USEINPUT:
+            /* Nothing to do */
+            break;
+        case METHOD_SMALL:
+            enlargePbmRowHorizontallySmall(inrow, inColChars,
+                                           xScaleFactor, outrow);
+            break;
+        case METHOD_GENERAL:
+            enlargePbmRowHorizontallyGen(inrow, inColChars, outColChars,
+                                         xScaleFactor, outrow,
+                                         &offsetTable);
+            break;
+        }
 
         for (i = 0; i < yScaleFactor; ++i)
             pbm_writepbmrow_packed(ofP, outrow, outcols, 0);
-- 
cgit 1.4.1