about summary refs log tree commit diff
diff options
context:
space:
mode:
authorRich Felker <dalias@aerifal.cx>2012-04-23 19:19:26 -0400
committerRich Felker <dalias@aerifal.cx>2012-04-23 19:19:26 -0400
commit1a63a9fc30e7a1f1239e3cedcb5041e5ec1c5351 (patch)
tree9ab2438310a30b91aa19962205d0a6fe065d868e
parent38b5d7d0526be24fba0f0789407727e4e7950e63 (diff)
downloadmusl-1a63a9fc30e7a1f1239e3cedcb5041e5ec1c5351.tar.gz
musl-1a63a9fc30e7a1f1239e3cedcb5041e5ec1c5351.tar.xz
musl-1a63a9fc30e7a1f1239e3cedcb5041e5ec1c5351.zip
sync case mappings with unicode 6.1
also special-case ß (U+00DF) as lowercase even though it does not have
a mapping to uppercase. unicode added an uppercase version of this
character but does not map it, presumably because the uppercase
version is not actually used except for some obscure purpose...
-rw-r--r--src/ctype/iswlower.c2
-rw-r--r--src/ctype/towctrans.c36
2 files changed, 30 insertions, 8 deletions
diff --git a/src/ctype/iswlower.c b/src/ctype/iswlower.c
index 0a568e77..438fe26a 100644
--- a/src/ctype/iswlower.c
+++ b/src/ctype/iswlower.c
@@ -2,5 +2,5 @@
 
 int iswlower(wint_t wc)
 {
-	return towupper(wc) != wc;
+	return towupper(wc) != wc || wc == 0xdf;
 }
diff --git a/src/ctype/towctrans.c b/src/ctype/towctrans.c
index 0b1eed04..2842d690 100644
--- a/src/ctype/towctrans.c
+++ b/src/ctype/towctrans.c
@@ -30,6 +30,7 @@ static const struct {
 	CASELACE(0x4c1,0x4cd),
 	CASELACE(0x4d0,0x50e),
 
+	CASELACE(0x514,0x526),
 	CASEMAP(0x531,0x556,0x561),
 
 	CASELACE(0x01a0,0x01a4),
@@ -69,12 +70,19 @@ static const struct {
 	CASEMAP(0x2c00,0x2c2e,0x2c30),
 	CASELACE(0x2c67,0x2c6b),
 	CASELACE(0x2c80,0x2ce2),
+	CASELACE(0x2ceb,0x2ced),
+
+	CASELACE(0xa640,0xa66c),
+	CASELACE(0xa680,0xa696),
 
 	CASELACE(0xa722,0xa72e),
 	CASELACE(0xa732,0xa76e),
 	CASELACE(0xa779,0xa77b),
 	CASELACE(0xa77e,0xa786),
 
+	CASELACE(0xa790,0xa792),
+	CASELACE(0xa7a0,0xa7a8),
+
 	CASEMAP(0xff21,0xff3a,0xff41),
 	{ 0,0,0 }
 };
@@ -144,6 +152,8 @@ static const unsigned short pairs[][2] = {
 	{ 0x03f7, 0x03f8 },
 	{ 0x03fa, 0x03fb },
 	{ 0x1e60, 0x1e9b },
+	{ 0xdf, 0xdf },
+	{ 0x1e9e, 0xdf },
 
 	{ 0x1f59, 0x1f51 },
 	{ 0x1f5b, 0x1f53 },
@@ -181,10 +191,20 @@ static const unsigned short pairs[][2] = {
 	{ 0x2c6d, 0x251 },
 	{ 0x2c6e, 0x271 },
 	{ 0x2c6f, 0x250 },
+	{ 0x2c70, 0x252 },
 	{ 0x2c72, 0x2c73 },
 	{ 0x2c75, 0x2c76 },
+	{ 0x2c7e, 0x23f },
+	{ 0x2c7f, 0x240 },
+	{ 0x2cf2, 0x2cf3 },
 
 	{ 0xa77d, 0x1d79 },
+	{ 0xa78b, 0xa78c },
+	{ 0xa78d, 0x265 },
+	{ 0xa7aa, 0x266 },
+
+	{ 0x10c7, 0x2d27 },
+	{ 0x10cd, 0x2d2d },
 
 	/* bogus greek 'symbol' letters */
 	{ 0x376, 0x377 },
@@ -207,17 +227,19 @@ static wchar_t __towcase(wchar_t wc, int lower)
 	int i;
 	int lmul = 2*lower-1;
 	int lmask = lower-1;
-	if ((unsigned)wc - 0x10400 < 0x50)
-		return wc + lmul*0x28;
 	/* no letters with case in these large ranges */
 	if (!iswalpha(wc)
 	 || (unsigned)wc - 0x0600 <= 0x0fff-0x0600
-	 || (unsigned)wc - 0x2e00 <= 0xa6ff-0x2e00
+	 || (unsigned)wc - 0x2e00 <= 0xa63f-0x2e00
 	 || (unsigned)wc - 0xa800 <= 0xfeff-0xa800)
 		return wc;
 	/* special case because the diff between upper/lower is too big */
-	if ((unsigned)wc - 0x10a0 < 0x26 || (unsigned)wc - 0x2d00 < 0x26)
-		return wc + lmul*(0x2d00-0x10a0);
+	if (lower && (unsigned)wc - 0x10a0 < 0x2e)
+		if (wc>0x10c5 && wc != 0x10c7 && wc != 0x10cd) return wc;
+		else return wc + 0x2d00 - 0x10a0;
+	if (!lower && (unsigned)wc - 0x2d00 < 0x26)
+		if (wc>0x2d25 && wc != 0x2d27 && wc != 0x2d2d) return wc;
+		else return wc + 0x10a0 - 0x2d00;
 	for (i=0; casemaps[i].len; i++) {
 		int base = casemaps[i].upper + (lmask & casemaps[i].lower);
 		if ((unsigned)wc-base < casemaps[i].len) {
@@ -230,8 +252,8 @@ static wchar_t __towcase(wchar_t wc, int lower)
 		if (pairs[i][1-lower] == wc)
 			return pairs[i][lower];
 	}
-	if ((unsigned)wc - 0x10428 + (lower<<5) + (lower<<3) < 0x28)
-		return wc - 0x28 + (lower<<10) + (lower<<6);
+	if ((unsigned)wc - (0x10428 - 0x28*lower) < 0x28)
+		return wc - 0x28 + 0x50*lower;
 	return wc;
 }