Index: icu46/patches/converters.patch |
=================================================================== |
--- icu46/patches/converters.patch (revision 69841) |
+++ icu46/patches/converters.patch (working copy) |
@@ -1,5 +1,5 @@ |
--- source/data/mappings/ucmlocal.mk 1969-12-31 16:00:00.000000000 -0800 |
-+++ source/data/mappings/ucmlocal.mk 2009-12-02 13:12:20.156521000 -0800 |
++++ source/data/mappings/ucmlocal.mk 2010-12-21 15:36:19.030397000 -0800 |
@@ -0,0 +1,58 @@ |
+# Note: A number of encodings are handled with purely algorithmic converters, |
+# without any mapping tables: |
@@ -59,9 +59,9 @@ |
+noop-cns-11643.ucm\ |
+noop-gb2312_gl.ucm\ |
+noop-iso-ir-165.ucm |
---- source/data/mappings/convrtrs.txt 2009-08-04 10:53:44.000000000 -0700 |
-+++ source/data/mappings/convrtrs.txt 2009-08-27 09:33:30.822570000 -0700 |
-@@ -345,7 +345,7 @@ |
+--- source/data/mappings/convrtrs.txt 2010-09-16 22:04:39.000000000 -0700 |
++++ source/data/mappings/convrtrs.txt 2010-12-21 15:50:41.804761000 -0800 |
+@@ -360,7 +360,7 @@ |
ibm-367 { IBM* } IBM367 { IANA WINDOWS } # This is not truely ibm-367 because it's missing the fallbacks. |
# GB 18030 is partly algorithmic, using the MBCS converter |
@@ -70,7 +70,7 @@ |
# Table-based interchange codepages |
-@@ -482,15 +482,16 @@ |
+@@ -505,15 +505,16 @@ |
916 { JAVA } |
# Turkish |
@@ -95,7 +95,7 @@ |
cp920 { JAVA } |
920 { JAVA } |
windows-28599 { WINDOWS* } |
-@@ -588,10 +589,6 @@ |
+@@ -618,10 +619,6 @@ |
ibm-33722_P12A_P12A-2004_U2 { UTR22* } |
ibm-33722 # Leave untagged because this isn't the default |
ibm-5050 # Leave untagged because this isn't the default, and yes this alias is correct |
@@ -106,13 +106,16 @@ |
windows-51932 { WINDOWS* } |
ibm-33722_VPUA |
IBM-eucJP |
-@@ -604,14 +601,17 @@ |
+@@ -637,14 +634,20 @@ |
# ibm-954 seems to be almost a superset of ibm-33722 and ibm-1350 |
# ibm-1350 seems to be almost a superset of ibm-33722 |
# ibm-954 contains more PUA characters than the others. |
+# CHROME : Instead of ibm-33722_P*, we use our own EUC-JP converter |
+# to match IE7 and Mozilla more closely. |
-+google-euc_jp_mod { UTR22* } # a modified version of EUC-JP that prefers 2-byte code points when converting from Unicode while recognizing both 2-byte and 3-byte code points when converting to Unicode. |
++# google_euc_jp_mod is a modified version of EUC-JP that prefers 2-byte code |
++# points when converting from Unicode while recognizing both 2-byte and |
++# 3-byte code points when converting to Unicode. |
++google-euc_jp_mod { UTR22* } |
+ EUC-JP { MIME* IANA JAVA* WINDOWS* } |
+ Extended_UNIX_Code_Packed_Format_for_Japanese { IANA* JAVA WINDOWS } |
+ csEUCPkdFmtJapanese { IANA JAVA WINDOWS } |
@@ -127,10 +130,10 @@ |
- X-EUC-JP { JAVA } # Japan EUC. x-euc-jp is a MIME name |
- eucjis { JAVA } |
- ujis # Linux sometimes uses this name. This is an unfortunate generic and rarely used name. Its use is discouraged. |
+ x-IBM954 { JAVA } |
+ x-IBM954C { JAVA } |
# eucJP # This is closest to Solaris EUC-JP. |
- |
- # Here are various interpretations and extentions of Big5 |
-@@ -645,33 +645,40 @@ |
+@@ -695,33 +698,40 @@ |
ibm-1386_P100-2001 { UTR22* } |
ibm-1386 { IBM* } |
cp1386 |
@@ -183,7 +186,7 @@ |
GB2312.1980-0 # From X11R6 |
ibm-964_P110-1999 { UTR22* } # Taiwan EUC. x-euc-tw is a MIME name |
-@@ -720,13 +727,8 @@ |
+@@ -774,13 +784,8 @@ |
# Java has both ibm-970 and EUC-KR as separate converters. |
ibm-970_P110_P110-2006_U2 { UTR22* } |
ibm-970 { IBM* JAVA } |
@@ -197,7 +200,7 @@ |
cp970 { JAVA* } |
970 { JAVA } |
ibm-970_VPUA |
-@@ -738,16 +740,16 @@ |
+@@ -793,16 +798,16 @@ |
# ibm-1363 is almost a superset of ibm-970. |
ibm-1363_P11B-1998 { UTR22* } |
ibm-1363 # Leave untagged because this isn't the default |
@@ -221,11 +224,11 @@ |
+ #ksc |
+ #windows-949 # Alternate mapping. Leave untagged. This is the IBM interpretation of a Windows codepage. |
ibm-1363_VSUB_VPUA |
+ x-IBM1363C { JAVA } |
# ks_x_1001:1992 |
- # ksc5601-1992 |
-@@ -756,27 +758,41 @@ |
- ibm-1363 { IBM* } |
+@@ -813,15 +818,30 @@ |
ibm-1363_VASCII_VSUB_VPUA |
+ x-IBM1363 { JAVA } |
+#CHROME: Windows-949 is NOT EUC-KR, but a superset of EUC-KR with 8,822 |
+# additional Hangul syllables. However, the reality of the web |
@@ -235,13 +238,12 @@ |
+# are removed from the list of aliases for other Korean converters |
+# above. |
windows-949-2000 { UTR22* } |
-- windows-949 { JAVA* WINDOWS* } |
-- KS_C_5601-1987 { WINDOWS } |
-- KS_C_5601-1989 { WINDOWS } |
-- KSC_5601 { MIME WINDOWS } # Needed by iso-2022 |
+ windows-949 { JAVA* WINDOWS* } |
+ KS_C_5601-1987 { WINDOWS } |
+ KS_C_5601-1989 { WINDOWS } |
+ KSC_5601 { MIME WINDOWS } # Needed by iso-2022 |
+ EUC-KR { IANA* MIME* WINDOWS } |
-+ windows-949 { JAVA* WINDOWS } |
-+ KS_C_5601-1987 { WINDOWS* IANA } |
++ KS_C_5601-1987 { WINDOWS IANA } |
+ KS_C_5601-1989 { WINDOWS IANA } |
+ KSC_5601 { IANA WINDOWS } # Needed by iso-2022 |
csKSC56011987 { WINDOWS } |
@@ -254,7 +256,13 @@ |
+ 5601 |
+ x-windows-949 # Mozilla |
+ x-UHC # Mozilla (Unified Hangul Code) |
+ x-KSC5601 { JAVA } |
+ windows-1361-2000 { UTR22* } |
+@@ -830,18 +850,20 @@ |
+ johab |
+ x-Johab { JAVA } |
+ |
+#CHROME: TIS-620, ISO-8859-11 and Windows-874 are slightly different from |
+# each other, but they're used as if they're identical on the web. |
windows-874-2000 { UTR22* } # Thai (w/ euro update) |
@@ -263,6 +271,7 @@ |
+ TIS-620 { IANA* WINDOWS MIME* } |
+ windows-874 { JAVA* WINDOWS* MIME } |
MS874 { JAVA } |
+ x-windows-874 { JAVA } |
- # iso-8859-11 { WINDOWS } # iso-8859-11 is similar to TIS-620. ibm-13162 is a closer match. |
+ iso-8859-11 { IANA WINDOWS MIME } # iso-8859-11 is similar to TIS-620. ibm-13162 is a closer match. |
@@ -274,8 +283,8 @@ |
+ #TIS-620 { IANA* JAVA } # This is actually separate from ibm-874, which is similar to this table |
tis620.2533 { JAVA } # This is actually separate from ibm-874, which is similar to this table |
eucTH # eucTH is an unusual alias from Solaris. eucTH has fewer mappings than TIS620 |
- |
-@@ -820,7 +836,16 @@ |
+ x-IBM874 { JAVA } |
+@@ -891,7 +913,16 @@ |
ibm-5347_P100-1998 { UTR22* } ibm-5347 { IBM* } windows-1251 { IANA* JAVA* WINDOWS* } cp1251 { WINDOWS JAVA } ANSI1251 # Windows Cyrillic (w/ euro update). ANSI1251 is from Solaris |
ibm-5348_P100-1997 { UTR22* } ibm-5348 { IBM* } windows-1252 { IANA* JAVA* WINDOWS* } cp1252 { JAVA } # Windows Latin1 (w/ euro update) |
ibm-5349_P100-1998 { UTR22* } ibm-5349 { IBM* } windows-1253 { IANA* JAVA* WINDOWS* } cp1253 { JAVA } # Windows Greek (w/ euro update) |
@@ -291,10 +300,10 @@ |
+ l5 { IANA } |
+ 8859_9 { JAVA } |
ibm-9447_P100-2002 { UTR22* } ibm-9447 { IBM* } windows-1255 { IANA* JAVA* WINDOWS* } cp1255 { JAVA } # Windows Hebrew (w/ euro update) |
- ibm-9448_X100-2005 { UTR22* } ibm-9448 { IBM* } windows-1256 { IANA* JAVA* WINDOWS* } cp1256 { WINDOWS JAVA } # Windows Arabic (w/ euro update) |
+ ibm-9448_X100-2005 { UTR22* } ibm-9448 { IBM* } windows-1256 { IANA* JAVA* WINDOWS* } cp1256 { WINDOWS JAVA } x-windows-1256S { JAVA } # Windows Arabic (w/ euro update) |
ibm-9449_P100-2002 { UTR22* } ibm-9449 { IBM* } windows-1257 { IANA* JAVA* WINDOWS* } cp1257 { JAVA } # Windows Baltic (w/ euro update) |
--- source/data/mappings/windows-932-2000.ucm 1969-12-31 16:00:00.000000000 -0800 |
-+++ source/data/mappings/windows-932-2000.ucm 2009-08-05 13:21:17.750080000 -0700 |
++++ source/data/mappings/windows-932-2000.ucm 2010-12-21 15:36:19.154409000 -0800 |
@@ -0,0 +1,9932 @@ |
+# *************************************************************************** |
+# * |
@@ -10229,7 +10238,7 @@ |
+END CHARMAP |
+# |
--- source/data/mappings/windows-936-2000.ucm 2007-05-09 23:40:31.000000000 -0700 |
-+++ source/data/mappings/windows-936-2000.ucm 2009-08-05 13:21:17.768078000 -0700 |
++++ source/data/mappings/windows-936-2000.ucm 2010-12-21 15:36:19.167404000 -0800 |
@@ -37,7 +37,8 @@ |
# build an extension-only (delta) .cnv file |
@@ -10273,7 +10282,7 @@ |
<U301E> \xA8\x95 |0 |
<U3021> \xA9\x40 |0 |
--- source/data/mappings/windows-949-2000.ucm 2003-12-18 15:16:48.000000000 -0800 |
-+++ source/data/mappings/windows-949-2000.ucm 2009-08-05 13:21:17.783068000 -0700 |
++++ source/data/mappings/windows-949-2000.ucm 2010-12-21 15:36:19.178396000 -0800 |
@@ -37,7 +37,8 @@ |
# build an extension-only (delta) .cnv file |
@@ -10285,7 +10294,7 @@ |
# The following was the generated state table. |
# This does not account for unassigned characters |
--- source/data/mappings/windows-950-2000.ucm 2003-12-18 15:16:48.000000000 -0800 |
-+++ source/data/mappings/windows-950-2000.ucm 2009-08-05 13:21:17.799075000 -0700 |
++++ source/data/mappings/windows-950-2000.ucm 2010-12-21 15:36:19.189398000 -0800 |
@@ -37,7 +37,8 @@ |
# build an extension-only (delta) .cnv file |
@@ -10296,10 +10305,10 @@ |
# The following was the generated state table. |
# This does not account for unassigned characters |
---- source/data/mappings/ucmebcdic.mk 2007-05-09 23:40:31.000000000 -0700 |
+--- source/data/mappings/ucmebcdic.mk 2010-03-02 01:46:00.000000000 -0800 |
+++ source/data/mappings/ucmebcdic.mk 1969-12-31 16:00:00.000000000 -0800 |
-@@ -1,27 +0,0 @@ |
--# Copyright (c) 1999-2007, International Business Machines Corporation and |
+@@ -1,29 +0,0 @@ |
+-# Copyright (c) 1999-2010, International Business Machines Corporation and |
-# others. All Rights Reserved. |
-# A list of EBCDIC UCM's to build |
-# ibm-37 and ibm-1047 are already mentioned in makedata.mak and Makefile.in |
@@ -10326,8 +10335,11 @@ |
-ibm-4517_P100-2005.ucm ibm-4899_P100-1998.ucm ibm-4971_P100-1999.ucm\ |
-ibm-500_P100-1995.ucm ibm-5123_P100-1999.ucm ibm-803_P100-1999.ucm\ |
-ibm-8482_P100-1999.ucm ibm-9067_X100-2005.ucm ibm-16684_P110-2003.ucm |
+- |
+-#UCM_SOURCE_EBCDIC_IGNORE_SISO = |
+\ No newline at end of file |
--- source/data/mappings/google-euc_jp_mod.ucm 1969-12-31 16:00:00.000000000 -0800 |
-+++ source/data/mappings/google-euc_jp_mod.ucm 2009-08-05 13:21:17.850068000 -0700 |
++++ source/data/mappings/google-euc_jp_mod.ucm 2010-12-21 15:36:37.148907000 -0800 |
@@ -0,0 +1,13699 @@ |
+# *************************************************************************** |
+# * |
@@ -24029,7 +24041,7 @@ |
+<UFFE5> \xA1\xEF |0 |
+END CHARMAP |
--- source/data/mappings/iso-8859_16-2001.ucm 1969-12-31 16:00:00.000000000 -0800 |
-+++ source/data/mappings/iso-8859_16-2001.ucm 2009-08-05 13:21:17.854067000 -0700 |
++++ source/data/mappings/iso-8859_16-2001.ucm 2010-12-21 15:36:37.150883000 -0800 |
@@ -0,0 +1,301 @@ |
+# |
+# Name: ISO/IEC 8859-16:2001 to Unicode |
@@ -24332,8 +24344,8 @@ |
+<U201E> \xA5 |0 |
+<U20AC> \xA4 |0 |
+END CHARMAP |
---- source/common/ucnv2022.c 2009-08-07 12:29:02.000000000 -0700 |
-+++ source/common/ucnv2022.c 2009-12-02 13:24:21.544681000 -0800 |
+--- source/common/ucnv2022.c 2010-09-08 16:18:15.000000000 -0700 |
++++ source/common/ucnv2022.c 2010-12-21 15:36:37.178900000 -0800 |
@@ -491,7 +491,7 @@ |
} |
if(jpCharsetMasks[version]&CSM(GB2312)) { |
@@ -24370,7 +24382,7 @@ |
/* set the function pointers to appropriate funtions */ |
--- source/data/mappings/noop-cns-11643.ucm 1969-12-31 16:00:00.000000000 -0800 |
-+++ source/data/mappings/noop-cns-11643.ucm 2009-12-02 15:38:15.114703000 -0800 |
++++ source/data/mappings/noop-cns-11643.ucm 2010-12-21 15:36:37.182878000 -0800 |
@@ -0,0 +1,31 @@ |
+# ******************************************************************************* |
+# * |
@@ -24404,7 +24416,7 @@ |
+CHARMAP |
+END CHARMAP |
--- source/data/mappings/noop-gb2312_gl.ucm 1969-12-31 16:00:00.000000000 -0800 |
-+++ source/data/mappings/noop-gb2312_gl.ucm 2009-12-02 15:38:36.641528000 -0800 |
++++ source/data/mappings/noop-gb2312_gl.ucm 2010-12-21 15:36:37.183880000 -0800 |
@@ -0,0 +1,32 @@ |
+#________________________________________________________________________ |
+# |
@@ -24439,7 +24451,7 @@ |
+CHARMAP |
+END CHARMAP |
--- source/data/mappings/noop-iso-ir-165.ucm 1969-12-31 16:00:00.000000000 -0800 |
-+++ source/data/mappings/noop-iso-ir-165.ucm 2009-12-02 15:38:06.317529000 -0800 |
++++ source/data/mappings/noop-iso-ir-165.ucm 2010-12-21 15:36:37.185880000 -0800 |
@@ -0,0 +1,29 @@ |
+#________________________________________________________________________ |
+# |