about summary refs log tree commit diff
path: root/converter/pbm/pbmtog3.c
blob: 4f19ec2f06cd26c04dbfa45d92e187b8afd04fd8 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
/* pbmtog3.c - read a PBM image and produce a Group 3 FAX file

   For specifications for Group 3 (G3) fax MH coding see ITU-T T.4:
   Standardization of Group 3 facsimile terminals for document transmission
   https://www.itu.int/rec/T-REC-T.4/en

   This program generates only MH.
*/


#include <assert.h>

#include "pm_c_util.h"
#include "shhopt.h"
#include "mallocvar.h"
#include "bitreverse.h"
#include "intcode.h"
#include "g3.h"
#include "pbm.h"

enum G3eol {EOL, ALIGN8, ALIGN16, NO_EOL, NO_RTC, NO_EOLRTC};

enum OutMode {BINARY, DUMP};

struct OutStream;

typedef void PutbitsFn(struct OutStream * const outP,
                       struct BitString   const newBits);

struct OutStream {
    FILE * fp;
    struct BitString buffer;
    bool reverseBits;    /* Reverse bit order */  
    PutbitsFn * putbitsFn;
    enum G3eol eolAlign; /* Omit EOL and/or RTC; align EOL to 8/16 bits */ 
    void * data;         /* Reserved for future expansion */
};



struct CmdlineInfo {
    /* All the information the user supplied in the command line,
       in a form easy for the program to use.
    */
    const char * inputFileName;
    unsigned int reversebits;
    enum G3eol   align;
    unsigned int dump;
    unsigned int desiredWidth;
    unsigned int verbose;
};



static void
parseCommandLine(int argc, const char ** const argv,
                 struct CmdlineInfo * const cmdlineP) {
/*----------------------------------------------------------------------------
   Note that the file spec array we return is stored in the storage that
   was passed to us as the argv array.
-----------------------------------------------------------------------------*/
    optEntry * option_def;
        /* Instructions to OptParseOptions2 on how to parse our options.  */
    optStruct3 opt;
    unsigned int nofixedwidth;
    unsigned int align8, align16;

    unsigned int option_def_index;

    MALLOCARRAY_NOFAIL(option_def, 100);

    option_def_index = 0;   /* incremented by OPTENTRY */
    OPTENT3(0,   "reversebits",      OPT_FLAG,  NULL, &cmdlineP->reversebits,
            0);
    OPTENT3(0,   "nofixedwidth",     OPT_FLAG,  NULL, &nofixedwidth,
            0);
    OPTENT3(0,   "align8",           OPT_FLAG,  NULL, &align8,
            0);
    OPTENT3(0,   "align16",          OPT_FLAG,  NULL, &align16,
            0);
    OPTENT3(0,   "verbose",          OPT_FLAG,  NULL, &cmdlineP->verbose, 
            0);
    OPTENT3(0,   "dump",             OPT_FLAG,  NULL, &cmdlineP->dump,
            0);

    /* TODO
       Explicit fixed widths: -A4 -B4 -A3
    */

    opt.opt_table = option_def;
    opt.short_allowed = false;  /* We have no short (old-fashioned) options */
    opt.allowNegNum = true;  /* We may have parms that are negative numbers */

    pm_optParseOptions3(&argc, (char **)argv, opt, sizeof(opt), 0);
        /* Uses and sets argc, argv, and some of *cmdlineP and others. */

    free(option_def);

    if (align8) {
        if (align16)
            pm_error("You can't specify both -align8 and -align16");
        else
            cmdlineP->align = ALIGN8;
    } else if (align16)
        cmdlineP->align = ALIGN16;
    else
        cmdlineP->align = EOL;

    if (nofixedwidth)
        cmdlineP->desiredWidth = 0;
    else
        cmdlineP->desiredWidth = 1728;

    if (argc-1 == 0) 
        cmdlineP->inputFileName = "-";
    else if (argc-1 != 1)
        pm_error("Program takes zero or one argument (filename).  You "
                 "specified %d", argc-1);
    else
        cmdlineP->inputFileName = argv[1];
}



static void
reversebuffer(unsigned char * const p, 
              unsigned int    const n) {

    unsigned int i;

    for (i = 0; i < n; ++i)
        p[i] = bitreverse[p[i]];
}



static void
flushBuffer(struct OutStream * const outP) {
/*----------------------------------------------------------------------------
  Flush the contents of the bit buffer 
-----------------------------------------------------------------------------*/
    struct BitString const buffer = outP->buffer;

    assert (buffer.bitCount <= 32);

    if (buffer.bitCount > 0) {
        unsigned int const fullBuffer = sizeof(buffer.intBuffer) * 8; 
        unsigned int const bytesToWrite = (buffer.bitCount+7)/8;
        bigend32 outbytes;
        size_t rc;

        outbytes = pm_bigendFromUint32(
                   buffer.intBuffer << (fullBuffer - buffer.bitCount));
        if (outP->reverseBits)
    	reversebuffer((unsigned char *)&outbytes, bytesToWrite);
        rc = fwrite((unsigned char *)&outbytes, 1, bytesToWrite, outP->fp);
        if (rc != bytesToWrite)
            pm_error("Output error");
    }
}



static PutbitsFn putbitsDump;

static void
putbitsDump(struct OutStream * const outP,
            struct BitString   const newBits) {
/*----------------------------------------------------------------------------
  Print the content of the bit put request, in human readable text form
  For debugging.  Also good for studying how the coding scheme works.
-----------------------------------------------------------------------------*/
    unsigned int const bitCount = newBits.bitCount;

    unsigned int i;
    char charBuff[128];

    assert (bitCount >= 0 && bitCount < 32);
    assert (sizeof(newBits.intBuffer) + 2 < 128);

    for (i = 0; i < bitCount; ++i) {
        unsigned int const n = bitCount - i - 1;
        charBuff[i] = ((newBits.intBuffer >> n) & 0x01) + '0';
    }

    charBuff[bitCount]   = '\n';
    charBuff[bitCount+1] = '\0';
    fwrite(charBuff, 1, bitCount+1, outP->fp);
}



static PutbitsFn putbitsDump;

static void
putbitsBinary(struct OutStream * const outP,
              struct BitString   const newBits) {
/*----------------------------------------------------------------------------
   Push the bits 'newBits' onto the right end of output buffer
   out.buffer (moving the bits already in the buffer left).

   Flush the buffer to stdout as necessary to make room.

   'newBits' must be shorter than a whole word.

   N.B. the definition of struct BitString requires upper bits to be zero.
-----------------------------------------------------------------------------*/
    unsigned int const fullBuffer = sizeof(outP->buffer.intBuffer) * 8; 
    unsigned int const spaceLeft = fullBuffer - outP->buffer.bitCount;
        /* Number of bits of unused space (at the high end) in buffer */

    assert(newBits.bitCount < fullBuffer);
    assert(newBits.intBuffer >> newBits.bitCount == 0);

    if (spaceLeft > newBits.bitCount) {
        /* New bits fit with bits to spare */
        outP->buffer.intBuffer = 
            outP->buffer.intBuffer << newBits.bitCount | newBits.intBuffer;
        outP->buffer.bitCount += newBits.bitCount;
    } else { 
        /* New bits fill buffer.  We'll have to flush the buffer to stdout
           and put the rest of the bits in the new buffer.
        */
        unsigned int const nextBufBitCount = newBits.bitCount - spaceLeft;
        unsigned int const bitMask = ((1<<nextBufBitCount) - 1); 

        outP->buffer.intBuffer = ( (outP->buffer.intBuffer << spaceLeft) 
                                 | (newBits.intBuffer >> nextBufBitCount));
        outP->buffer.bitCount  = fullBuffer;
        flushBuffer(outP);

        outP->buffer.intBuffer = newBits.intBuffer & bitMask;
        outP->buffer.bitCount = nextBufBitCount;
    }
}



static void 
initOutStream(struct OutStream * const outP,
              bool               const reverseBits,
              enum G3eol         const eolAlign,
              bool               const wantDump) {

    outP->buffer.intBuffer = 0;
    outP->buffer.bitCount  = 0;
    outP->reverseBits      = reverseBits;
    outP->putbitsFn        = wantDump ? &putbitsDump : &putbitsBinary;
    outP->fp               = wantDump ? stderr : stdout;
    outP->eolAlign         = eolAlign;
}



static struct BitString
tableEntryToBitString(G3TableEntry const tableEntry) {

    struct BitString retval;

    retval.intBuffer = tableEntry.code;
    retval.bitCount  = tableEntry.length;

    return retval;
}



static void
putbits(struct OutStream * const outP,
        struct BitString   const newBits) {

    outP->putbitsFn(outP, newBits);
}



static void
putcodeShort(struct OutStream * const outP,
             bit                const color, 
             unsigned int       const runLength) {

    /* Note that this requires ttable to be aligned white entry, black
       entry, white, black, etc.
    */
    unsigned int index = runLength * 2 + color;
    putbits(outP, tableEntryToBitString(ttable[index]));
}



static void
putcodeLong(struct OutStream * const outP,
            bit                const color,
            unsigned int       const runLength) {
/*----------------------------------------------------------------------------
   Output Make-up code and Terminating code at once.

   For run lengths which require both: length 64 and above 

   The codes are combined here to avoid calculations in putbits()

   Terminating code is max 12 bits, Make-up code is max 13 bits.
   (See ttable, mtable entries in pbmtog3.h)  

   Also reduces object code size when putcode is compiled inline.
-----------------------------------------------------------------------------*/
    unsigned int const loIndex = runLength % 64 * 2 + color;
    unsigned int const hiIndex = runLength / 64 * 2 + color;
    unsigned int const loLength = ttable[loIndex].length;
    unsigned int const hiLength = mtable[hiIndex].length;

    struct BitString combinedCode;

    combinedCode.intBuffer = mtable[hiIndex].code << loLength |
                             ttable[loIndex].code;
    combinedCode.bitCount  = hiLength + loLength;

    putbits(outP, combinedCode);
}



static  void
putcodeExtra(struct OutStream * const outP,
             int                const color,
             int                const runLength) {
/*----------------------------------------------------------------------------
   Lengths over 2560.  This is rare.
   According to the standard, the mark-up code for 2560 can be issued as
   many times as necessary without terminal codes.  
   --------------------------------------------------------------------------*/
    G3TableEntry const markUp2560 = mtable[2560/64];
                              /* Same code for black and white */

    unsigned int remainingLen;

    for (remainingLen = runLength; remainingLen > 2560; remainingLen -= 2560)
      putbits(outP, tableEntryToBitString(markUp2560));
    /* after the above: 0 < remainingLen <= 2560 */

    if (remainingLen >= 64)
        putcodeLong(outP, color, remainingLen);
    else
        putcodeShort(outP, color, remainingLen);
}



static void
putspan(struct OutStream * const outP,
        bit                const color, 
        unsigned int       const runLength) {

    if (runLength < 64)
        putcodeShort(outP, color, runLength);
    else if (runLength < 2560)
        putcodeLong (outP, color, runLength);
    else  /* runLength > 2560 : rare */
        putcodeExtra(outP, color, runLength);
}



static void
puteol(struct OutStream * const outP) {

    switch (outP->eolAlign) {
    case EOL: {
        struct BitString const eol = { 1, 12 };

        putbits(outP, eol);
    } break;
    case ALIGN8:  case ALIGN16: {
        unsigned int const bitCount = outP->buffer.bitCount;
        unsigned int const fillbits =
            (outP->eolAlign == ALIGN8) ? (44 - bitCount) % 8
            : (52 - bitCount) % 16;

        struct BitString eol;

        eol.bitCount = 12 + fillbits;     eol.intBuffer = 1;
        putbits(outP, eol);
    } break;
    case NO_EOL: case NO_EOLRTC:
        break;
    case NO_RTC:
        pm_error("INTERNAL ERROR: no-RTC EOL treatment not implemented");
        break;
    }
}



static void
putrtc(struct OutStream * const outP) {

    switch (outP->eolAlign) {
    case NO_RTC: case NO_EOLRTC:
        break;
    default:
        puteol(outP);    puteol(outP);    puteol(outP);
        puteol(outP);    puteol(outP);    puteol(outP);
    }
}



static void
readOffSideMargins(unsigned char * const bitrow,
                   unsigned int    const colChars,
                   unsigned int  * const firstNonWhiteCharP, 
                   unsigned int  * const lastNonWhiteCharP,
                   bool          * const blankRowP) {
/*----------------------------------------------------------------------------
  Determine the white margins on the left and right side of a row.
  This is an enhancement: convertRowToG3() works without this.
-----------------------------------------------------------------------------*/
    unsigned int charCnt;
    unsigned int firstChar;
    unsigned int lastChar;
    bool         blankRow;

    assert(colChars > 0);

    for (charCnt = 0; charCnt < colChars && bitrow[charCnt] == 0; ++charCnt);

    if (charCnt >= colChars) {
        /* Reached end of bitrow with no black pixels encountered */
        firstChar = lastChar = 0;
        blankRow  = true;
    } else {
        /* There is at least one black pixel in the row */
        firstChar = charCnt;
        blankRow = false;

        charCnt = colChars - 1;

        while (bitrow[charCnt--] == 0x00)
            ;
        lastChar = charCnt + 1;
    }

    *firstNonWhiteCharP = firstChar;
    *lastNonWhiteCharP  = lastChar;
    *blankRowP          = blankRow;
}



static void
setBlockBitsInFinalChar(unsigned char * const finalByteP,
                        unsigned int    const cols) {
/*----------------------------------------------------------------------------
   If the char in the row is fractional, set it up so that the don't care
   bits are the opposite color of the last valid pixel.
----------------------------------------------------------------------------*/
    unsigned char const finalByte  = *finalByteP;
    unsigned int const silentBitCnt = 8 - cols % 8;
    bit const rowEndColor = (finalByte >> silentBitCnt) & 0x01;

    if (rowEndColor == PBM_WHITE) {
        unsigned char const blackMask = (0x01 << silentBitCnt) - 1;

        *finalByteP = finalByte | blackMask;
    }
    /* No adjustment required if the row ends with a black pixel.
       pbm_cleanrowend_packed() takes care of this.
    */
}



static void
trimFinalChar(struct OutStream * const outP,
              bit                const color,
              int                const carryLength, 
              int                const existingCols,
              int                const desiredWidth) {
/*---------------------------------------------------------------------------
   If the carry value from the last char in the row represents a valid
   sequence, output it.

   (1) If input row width is not a whole multiple of 8 and -nofixwidth
       was specified, the final carry value represents inactive bits
       at the row end.  Emit no code.  See setBlockBitsInFinalChar().

   (2) If there is white margin on the right side, the final carry value
       is valid.  We add to it the margin width.  Right-side margin may
       be added in main() to a narrow input image, detected in the
       input row by readOffSideMargins() or both.  The same treatment
       applies regardless of the nature of the right-side margin.  
----------------------------------------------------------------------------*/
    if (existingCols == desiredWidth) {
        if (existingCols % 8 == 0)
            putspan(outP, color, carryLength);  /* Code up to byte boundary */
        /* Emit nothing if existingCols is not a whole multiple of 8 */
    } else if (existingCols < desiredWidth) {
        if (color == 0) {       /* Last bit sequence in final char: white */
            unsigned int const totalLength =
                carryLength + (desiredWidth - existingCols);
            putspan(outP, 0, totalLength);
        } else {                 /* Black */
            unsigned int const padLength = desiredWidth - existingCols;
            putspan(outP, 1, carryLength);
            putspan(outP, 0, padLength);
        }
    }
}



static void
convertRowToG3(struct OutStream * const outP,
               unsigned char    * const bitrow, 
               unsigned int       const existingCols,
               unsigned int       const desiredWidth) {
/*----------------------------------------------------------------------------
   Table based Huffman coding
  
   Normally Huffman code encoders count sequences of ones and zeros
   and convert them to binary codes as they terminate.  This program
   recognizes chains of pixels and converts them directly, reading
   prefabricated code chains from an indexed table.

   For example the 8-bit sequence 01100110 translates to
   Huffman code: 000111 11 0111 11 000111.

   In reality things are more complicated.  The leftmost 0 (MSB) may be
   part of a longer sequence starting in the adjacent byte or perhaps
   spanning several bytes.  Likewise for the rightmost 0.

   So we first remove the sequence on the left side and compare its
   color with the leftmost pixel of the adjacent byte and emit codes
   for either ones sequence if they agree, or two if they disagree. 
   Next the composite code for the central part (in the above example
   110011 -> 11 0111 11) is emitted.  Finally we save the length and
   color of the sequence on the right end as carry-over for the next
   byte cycle.  Some 8-bit input sequences (00000000, 01111111,
   00111111, etc.) have no central part: these are special cases.  
---------------------------------------------------------------------------*/
    unsigned int const colChars = pbm_packed_bytes(existingCols);

    unsigned int charCnt;
    unsigned int firstActiveChar;
    unsigned int lastActiveChar;
    bool         blankRow;
    bit          borderColor;

    borderColor = PBM_WHITE; /* initial value */ 

    if (existingCols == desiredWidth && (desiredWidth % 8) > 0)
        setBlockBitsInFinalChar(&bitrow[colChars-1], desiredWidth);

    readOffSideMargins(bitrow, colChars,
                       &firstActiveChar, &lastActiveChar, &blankRow);

    if (blankRow)
        putspan(outP, PBM_WHITE, desiredWidth);
    else {
        unsigned int carryLength;

        for (charCnt = firstActiveChar, carryLength = firstActiveChar * 8;
             charCnt <=lastActiveChar;
             ++charCnt) {

            unsigned char const byte = bitrow[charCnt];
            bit const rColor = !borderColor;

            if (byte == borderColor * 0xFF) {
                carryLength += 8;
            } else if (byte == (unsigned char) ~(borderColor * 0xFF)) {
                putspan(outP, borderColor, carryLength);
                carryLength = 8;
                borderColor = rColor;
            } else {
                struct PrefabCode const code = prefabCode[byte];
                unsigned int const activeLength =
                    8 - code.leadBits - code.trailBits; 

                if (borderColor == (byte >> 7)) {
                    putspan(outP, borderColor, carryLength + code.leadBits);
                } else {
                    putspan(outP, borderColor, carryLength);
                    putcodeShort(outP, rColor, code.leadBits);
                }
                if (activeLength > 0)
                    putbits(outP, code.activeBits);

                borderColor = byte & 0x01;
                carryLength = code.trailBits;
            }
        }
        trimFinalChar(outP, borderColor, carryLength,
                      (lastActiveChar + 1) * 8, desiredWidth);
    }
    puteol(outP);
}



int
main(int          argc,
     const char * argv[]) {

    struct CmdlineInfo cmdline;
    FILE * ifP;
    unsigned char * bitrow;
       /* This is the bits of the current row, as read from the input and
          modified various ways at various points in the program.  It has
          a word of zero padding on the high (right) end for the convenience
          of code that accesses this buffer in word-size bites.
        */

    int rows;
    int cols;
    int format;
    unsigned int existingCols;
    unsigned int desiredWidth;
    unsigned int row;
    struct OutStream out;

    pm_proginit(&argc, argv);

    parseCommandLine(argc, argv, &cmdline);

    ifP = pm_openr(cmdline.inputFileName);

    pbm_readpbminit(ifP, &cols, &rows, &format);

    if (cmdline.desiredWidth == 0)
        desiredWidth = existingCols = cols;
    else {
        if (cmdline.desiredWidth < cols)
            existingCols = desiredWidth = cmdline.desiredWidth;
        else {
            existingCols = pbm_packed_bytes(cols) * 8;
            desiredWidth = cmdline.desiredWidth;
        }
    }

    MALLOCARRAY(bitrow, pbm_packed_bytes(cols) + sizeof(uint32_t));

    if (!bitrow)
        pm_error("Failed to allocate a row buffer for %u columns", cols);

    initOutStream(&out, cmdline.reversebits, cmdline.align, cmdline.dump);
    
    puteol(&out);

    for (row = 0; row < rows; ++row) {
        pbm_readpbmrow_packed(ifP, bitrow, cols, format);
        pbm_cleanrowend_packed(bitrow, cols);
        convertRowToG3(&out, bitrow, existingCols, desiredWidth);
    }

    pbm_freerow_packed(bitrow);
    putrtc(&out);
    flushBuffer(&out);
    pm_close(ifP);

    return 0;
}