about summary refs log tree commit diff
path: root/src
diff options
context:
space:
mode:
authorRich Felker <dalias@aerifal.cx>2012-04-23 19:19:26 -0400
committerRich Felker <dalias@aerifal.cx>2012-04-23 19:19:26 -0400
commit1a63a9fc30e7a1f1239e3cedcb5041e5ec1c5351 (patch)
tree9ab2438310a30b91aa19962205d0a6fe065d868e /src
parent38b5d7d0526be24fba0f0789407727e4e7950e63 (diff)
downloadmusl-1a63a9fc30e7a1f1239e3cedcb5041e5ec1c5351.tar.gz
musl-1a63a9fc30e7a1f1239e3cedcb5041e5ec1c5351.tar.xz
musl-1a63a9fc30e7a1f1239e3cedcb5041e5ec1c5351.zip
sync case mappings with unicode 6.1
also special-case ß (U+00DF) as lowercase even though it does not have
a mapping to uppercase. unicode added an uppercase version of this
character but does not map it, presumably because the uppercase
version is not actually used except for some obscure purpose...
Diffstat (limited to 'src')
-rw-r--r--src/ctype/iswlower.c2
-rw-r--r--src/ctype/towctrans.c36
2 files changed, 30 insertions, 8 deletions
diff --git a/src/ctype/iswlower.c b/src/ctype/iswlower.c
index 0a568e77..438fe26a 100644
--- a/src/ctype/iswlower.c
+++ b/src/ctype/iswlower.c
@@ -2,5 +2,5 @@
 
 int iswlower(wint_t wc)
 {
-	return towupper(wc) != wc;
+	return towupper(wc) != wc || wc == 0xdf;
 }
diff --git a/src/ctype/towctrans.c b/src/ctype/towctrans.c
index 0b1eed04..2842d690 100644
--- a/src/ctype/towctrans.c
+++ b/src/ctype/towctrans.c
@@ -30,6 +30,7 @@ static const struct {
 	CASELACE(0x4c1,0x4cd),
 	CASELACE(0x4d0,0x50e),
 
+	CASELACE(0x514,0x526),
 	CASEMAP(0x531,0x556,0x561),
 
 	CASELACE(0x01a0,0x01a4),
@@ -69,12 +70,19 @@ static const struct {
 	CASEMAP(0x2c00,0x2c2e,0x2c30),
 	CASELACE(0x2c67,0x2c6b),
 	CASELACE(0x2c80,0x2ce2),
+	CASELACE(0x2ceb,0x2ced),
+
+	CASELACE(0xa640,0xa66c),
+	CASELACE(0xa680,0xa696),
 
 	CASELACE(0xa722,0xa72e),
 	CASELACE(0xa732,0xa76e),
 	CASELACE(0xa779,0xa77b),
 	CASELACE(0xa77e,0xa786),
 
+	CASELACE(0xa790,0xa792),
+	CASELACE(0xa7a0,0xa7a8),
+
 	CASEMAP(0xff21,0xff3a,0xff41),
 	{ 0,0,0 }
 };
@@ -144,6 +152,8 @@ static const unsigned short pairs[][2] = {
 	{ 0x03f7, 0x03f8 },
 	{ 0x03fa, 0x03fb },
 	{ 0x1e60, 0x1e9b },
+	{ 0xdf, 0xdf },
+	{ 0x1e9e, 0xdf },
 
 	{ 0x1f59, 0x1f51 },
 	{ 0x1f5b, 0x1f53 },
@@ -181,10 +191,20 @@ static const unsigned short pairs[][2] = {
 	{ 0x2c6d, 0x251 },
 	{ 0x2c6e, 0x271 },
 	{ 0x2c6f, 0x250 },
+	{ 0x2c70, 0x252 },
 	{ 0x2c72, 0x2c73 },
 	{ 0x2c75, 0x2c76 },
+	{ 0x2c7e, 0x23f },
+	{ 0x2c7f, 0x240 },
+	{ 0x2cf2, 0x2cf3 },
 
 	{ 0xa77d, 0x1d79 },
+	{ 0xa78b, 0xa78c },
+	{ 0xa78d, 0x265 },
+	{ 0xa7aa, 0x266 },
+
+	{ 0x10c7, 0x2d27 },
+	{ 0x10cd, 0x2d2d },
 
 	/* bogus greek 'symbol' letters */
 	{ 0x376, 0x377 },
@@ -207,17 +227,19 @@ static wchar_t __towcase(wchar_t wc, int lower)
 	int i;
 	int lmul = 2*lower-1;
 	int lmask = lower-1;
-	if ((unsigned)wc - 0x10400 < 0x50)
-		return wc + lmul*0x28;
 	/* no letters with case in these large ranges */
 	if (!iswalpha(wc)
 	 || (unsigned)wc - 0x0600 <= 0x0fff-0x0600
-	 || (unsigned)wc - 0x2e00 <= 0xa6ff-0x2e00
+	 || (unsigned)wc - 0x2e00 <= 0xa63f-0x2e00
 	 || (unsigned)wc - 0xa800 <= 0xfeff-0xa800)
 		return wc;
 	/* special case because the diff between upper/lower is too big */
-	if ((unsigned)wc - 0x10a0 < 0x26 || (unsigned)wc - 0x2d00 < 0x26)
-		return wc + lmul*(0x2d00-0x10a0);
+	if (lower && (unsigned)wc - 0x10a0 < 0x2e)
+		if (wc>0x10c5 && wc != 0x10c7 && wc != 0x10cd) return wc;
+		else return wc + 0x2d00 - 0x10a0;
+	if (!lower && (unsigned)wc - 0x2d00 < 0x26)
+		if (wc>0x2d25 && wc != 0x2d27 && wc != 0x2d2d) return wc;
+		else return wc + 0x10a0 - 0x2d00;
 	for (i=0; casemaps[i].len; i++) {
 		int base = casemaps[i].upper + (lmask & casemaps[i].lower);
 		if ((unsigned)wc-base < casemaps[i].len) {
@@ -230,8 +252,8 @@ static wchar_t __towcase(wchar_t wc, int lower)
 		if (pairs[i][1-lower] == wc)
 			return pairs[i][lower];
 	}
-	if ((unsigned)wc - 0x10428 + (lower<<5) + (lower<<3) < 0x28)
-		return wc - 0x28 + (lower<<10) + (lower<<6);
+	if ((unsigned)wc - (0x10428 - 0x28*lower) < 0x28)
+		return wc - 0x28 + 0x50*lower;
 	return wc;
 }