about summary refs log tree commit diff
path: root/test/pbmtext-utf8.test
blob: ca1f45a2958287544dde44ac805cdbf7412b2ab1 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
#! /bin/bash
# This script tests: pbmtext
# Also requires:

# This test requires the en_US.utf8 locale
# Skip this test if it is not available

iconv /dev/null
if [ $? -ne 0  ]
  then echo "iconv command not available." 1>&2
       echo "Skipping." 1>&2
  exit 80;
fi

echo "A" | LC_ALL=en_US.utf8 pbmtext -wchar > /dev/null
if [ $? -ne 0  ]
  then echo "LC_ALL could not be set to en_US.utf8." 1>&2
       echo "Skipping." 1>&2
  exit 80;
fi

# Test 1.
# Two rows
# Should print 1240895458 5110 twice
LC_ALL=C \
awk 'BEGIN { for (i=32; i<=125;++i) printf("%c",i); print ""; \
             for (i=161;i<=255;++i) printf("%c",i); }' | \
    pbmtext -builtin bdf | cksum

LC_ALL=C \
awk 'BEGIN { for (i=32; i<=125;++i) printf("%c",i); print ""; \
             for (i=161;i<=255;++i) printf("%c",i);  }' | \
    iconv -f iso8859-1 -t utf-8 | \
    LC_ALL=en_US.utf8 pbmtext -builtin bdf -wchar | cksum


# Test 2.
# One row
# Should print 898975479 2272 twice
LC_ALL=C \
awk 'BEGIN { for (i=32; i<=125;++i) printf("%c",i); print "" }' | \
    pbmtext -builtin bdf | cksum

LC_ALL=C \
awk 'BEGIN { for (i=32; i<=125;++i) printf("%c",i);  print ""}' | \
    LC_ALL=en_US.utf8 pbmtext -builtin bdf -wchar | cksum


tmpdir=${tmpdir:-/tmp}
output=${tmpdir}/output


# Test 3.
# Two rows
# Output may be affected by locale.  Compare with cmp.
# Should print 0
LC_ALL=C \
awk 'BEGIN { for (i=32; i<=125;++i) printf("%c",i); print ""; \
             for (i=161;i<=255;++i) printf("%c",i); print "" }' | \
    iconv -f iso88591 -t utf8 > ${output}

LC_ALL=C \
awk 'BEGIN { for (i=32; i<=125;++i) printf("%c",i); print ""; \
             for (i=161;i<=255;++i) printf("%c",i); print "" }' | \
    iconv -f iso8859-1 -t utf-8 | \
    LC_ALL=en_US.utf8 pbmtext -builtin bdf -wchar -text-dump | \
    cmp --quiet - ${output}

echo $?
rm ${output}


# Test 4.
# One row
# Should print the following twice:
# !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}
LC_ALL=C \
awk 'BEGIN { for (i=32; i<=125;++i) printf("%c",i); print "" } '

LC_ALL=C \
awk 'BEGIN { for (i=32; i<=125;++i) printf("%c",i);  print ""}' | \
        LC_ALL=en_US.utf8 pbmtext -builtin bdf -wchar -text-dump


# Test 5.
# Invalid utf-8 sequence
# Should print 1
LC_ALL=C \
awk 'BEGIN { for (i=128; i<=129;++i) printf("%c",i);  print ""}' | \
        LC_ALL=en_US.utf8 pbmtext -builtin bdf -wchar -text-dump
echo $?