test/pbmtext-utf8.test


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209

#! /bin/bash
# This script tests: pbmtext
# Also requires:

LANG=C
LC_ALL=C
export LANG LC_ALL


# This test requires a working UTF-8 locale
# Skip this test if it is are not available

iconv /dev/null
if [ $? -ne 0  ]
  then echo "iconv command not available." 1>&2
       echo "Skipping." 1>&2
       exit 80;
fi

tmpdir=${tmpdir:-/tmp}
utf_locale_list=${tmpdir}/utf_locale_list

locale_to_test="en_US.utf8"  # Initial value
# Edit the above value if necessary

# Make a list of available locales which end in "utf8"
locale -a | grep "\.utf8$" > ${utf_locale_list}

# Hunt for a valid utf8 locale
# Submit each candidate to a trial pbmtext run until one that works is
# encountered

i=0
until [ -z ${locale_to_test} ] || \
  echo "A" | LC_ALL=${locale_to_test} pbmtext -wchar > /dev/null
    do
      let i=$(($i+1));
      locale_to_test=`sed "$i"p -n  ${utf_locale_list}`;
    done;

rm ${utf_locale_list};
if [ -z  ${locale_to_test} ]
  then echo "No utf-8 locale available." 1>&2
       echo "Skipping." 1>&2
       exit 80;
  else
       echo "Testing with locale set to ${locale_to_test}" 1>&2
fi;

locale_for_test=${locale_to_test}


# Test 1.
# Two rows
# Should print 2066913605 5110 twice

echo "Test 1"

awk 'BEGIN { for (i=32; i<=126;++i) printf("%c",i); print ""; \
             for (i=161;i<=255;++i) printf("%c",i); }' | \
    pbmtext -builtin bdf | cksum

awk 'BEGIN { for (i=32; i<=126;++i) printf("%c",i); print ""; \
             for (i=161;i<=191;++i) printf("%c%c",194,i);     \
             for (i=128;i<=191;++i) printf("%c%c",195,i); }' | \
    LC_ALL=${locale_for_test} pbmtext -builtin bdf -wchar | cksum


# Test 2.
# One row
# Should print 2920616515 2301 twice

echo "Test 2"

awk 'BEGIN { for (i=32; i<=126;++i) printf("%c",i); print "" }' | \
    pbmtext -builtin bdf | cksum

awk 'BEGIN { for (i=32; i<=126;++i) printf("%c",i);  print ""}' | \
    LC_ALL=${locale_for_test} pbmtext -builtin bdf -wchar | cksum

output=${tmpdir}/output


# Test 3.
# Two rows
# Compare with cmp.
# Should print 0 0 0 : 0

echo "Test 3"

awk 'BEGIN { for (i=32; i<=126;++i) printf("%c",i); print ""; \
             for (i=161;i<=191;++i) printf("%c%c",194,i);      \
             for (i=128;i<=191;++i) printf("%c%c",195,i); print ""}' \
    > ${output}

awk 'BEGIN { for (i=32; i<=126;++i) printf("%c",i); print ""; \
             for (i=161;i<=191;++i) printf("%c%c",194,i);      \
             for (i=128;i<=191;++i) printf("%c%c",195,i); print ""}' | \
    LC_ALL=${locale_for_test} pbmtext -builtin bdf -wchar -text-dump | \
    cmp --quiet - ${output}

echo ${PIPESTATUS[@]} ":" $?
rm ${output}


# Test 4.
# One row  ASCII 7-bit range
# Should print the following twice:
# !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~

echo "Test 4"

awk 'BEGIN { for (i=32; i<=126;++i) printf("%c",i); print "" } '

awk 'BEGIN { for (i=32; i<=126;++i) printf("%c",i); print ""}' | \
        LC_ALL=${locale_for_test} pbmtext -builtin bdf -wchar -text-dump

# Test 5.
# Long input text


echo "Test 5 Invalid"

long_txt=${tmpdir}/long.txt
test_out=${tmpdir}/test_out

head -c 4999 /dev/zero | sed 's@\x00@\xc2\xbe@g' > ${long_txt}
cat ${long_txt} | wc -c | tr -d ' '

cat ${long_txt} | \
  LC_ALL=${locale_for_test} pbmtext -nomargins -builtin fixed -wchar | cksum

echo 1>&2
echo "Test input text which exceeds length limit" 1>&2
echo "Error messages should appear below the line." 1>&2
echo "-----------------------------------------------------------" 1>&2

echo -n "z" >> ${long_txt}
cat ${long_txt} | wc -c | tr -d ' '

cat ${long_txt} | \
  LC_ALL=${locale_for_test} \
  pbmtext -nomargins -builtin fixed -wchar > ${test_out} || \
  echo -n "Expected failure 1";
  test -s ${test_out}; echo " "$?
  rm -f ${test_out}

cat ${long_txt} | \
  LC_ALL=${locale_for_test} \
  pbmtext -nomargins -builtin fixed -wchar > ${test_out} || \
  echo -n "Expected failure 2";
  test -s ${test_out}; echo " "$?
  rm -f ${test_out}

rm ${long_txt}


# Test 6.
# Invalid utf-8 sequences
# For each case output should be:
# 6-x : 0 1 : 1 1

echo "Invalid utf-8 sequences as input." 1>&2
echo "Errors message should appear below the line." 1>&2
echo "-----------------------------------------------------------" 1>&2

echo "Test 6 Invalid"

awk 'BEGIN { printf("%c%c",128,129);  print ""}' | \
        LC_ALL=${locale_for_test} \
        pbmtext -builtin bdf -wchar -text-dump > ${test_out}
  echo -n "6-1:" ${PIPESTATUS[@]} ":" $?
  test -s ${test_out}; echo " "$?
  rm -f ${test_out}

awk 'BEGIN { printf("ABC%c%c",192, 193);  print ""}' | \
        LC_ALL=${locale_for_test} \
        pbmtext -builtin bdf -wchar -text-dump > ${test_out}
  echo -n "6-2:" ${PIPESTATUS[@]} ":" $?
  test -s ${test_out}; echo " "$?
  rm -f ${test_out}

awk 'BEGIN { printf("abcde%c%c", 254, 253);  print ""}' | \
        LC_ALL=${locale_for_test} \
        pbmtext -builtin bdf -wchar -text-dump > ${test_out}
  echo -n "6-3:" ${PIPESTATUS[@]} ":" $?
  test -s ${test_out}; echo " "$?
  rm -f ${test_out}

awk 'BEGIN { printf("abcdefg%c%c", 195, 15);  print ""}' | \
        LC_ALL=${locale_for_test} \
        pbmtext -builtin bdf -wchar -text-dump > ${test_out}
  echo -n "6-4:" ${PIPESTATUS[@]} ":" $?
  test -s ${test_out}; echo " "$?
  rm -f ${test_out}

awk 'BEGIN { printf("123456789%c%c%c", 224, 143 ,0);  print ""}' | \
        LC_ALL=${locale_for_test} \
        pbmtext -builtin bdf -wchar -text-dump > ${test_out}
  echo -n "6-5:" ${PIPESTATUS[@]} ":" $?
  test -s ${test_out}; echo " "$?
  rm -f ${test_out}

awk 'BEGIN { printf("abcdefg123ABCDEFG%c%c%c%c",247, 135, 135, 7);  print ""}' | \
        LC_ALL=${locale_for_test} \
        pbmtext -builtin bdf -wchar -text-dump > ${test_out}
  echo -n "6-6:" ${PIPESTATUS[@]} ":" $?
  test -s ${test_out}; echo " "$?
  rm -f ${test_out}