| Index: source/test/testdata/csdetest.xml
|
| diff --git a/source/test/testdata/csdetest.xml b/source/test/testdata/csdetest.xml
|
| index 03b0ac8583ffad05f9dc7ab029827590f69a8e7f..8b151a3c4a8573784d8e1b30557b61f1b1e7d6e3 100644
|
| --- a/source/test/testdata/csdetest.xml
|
| +++ b/source/test/testdata/csdetest.xml
|
| @@ -1,10 +1,10 @@
|
| <?xml version="1.0" encoding="UTF-8"?>
|
|
|
| -<!-- Copyright (c) 2005-2007 IBM Corporation and others. All rights reserved -->
|
| +<!-- Copyright (c) 2005-2013 IBM Corporation and others. All rights reserved -->
|
| <!-- See individual test cases for their specific copyright. -->
|
|
|
| <charset-detection-tests>
|
| - <test-case id="IUC10-ar" encodings="UTF-8 UTF-32BE UTF-32LE ISO-8859-6/ar windows-1256/ar">
|
| + <test-case id="IUC10-ar" encodings="UTF-8 UTF-16LE UTF-16BE UTF-32BE UTF-32LE ISO-8859-6/ar windows-1256/ar">
|
| <!-- Copyright © 1991-2005 Unicode, Inc. All rights reserved. -->
|
|
|
| أوروبا, برمجيات الحاسوب + انترنيت :
|
| @@ -20,7 +20,7 @@
|
|
|
| </test-case>
|
|
|
| - <test-case id="IUC10-da-Q" encodings="UTF-8 UTF-32BE UTF-32LE windows-1252/da">
|
| + <test-case id="IUC10-da-Q" encodings="UTF-8 UTF-16LE UTF-16BE UTF-32BE UTF-32LE windows-1252/da">
|
| <!-- Copyright © 1991-2005 Unicode, Inc. All rights reserved. -->
|
|
|
| Europa, Software + Internet:
|
| @@ -36,7 +36,7 @@
|
|
|
| </test-case>
|
|
|
| - <test-case id="IUC10-da" encodings="UTF-8 UTF-32BE UTF-32LE ISO-8859-1/da">
|
| + <test-case id="IUC10-da" encodings="UTF-8 UTF-16LE UTF-16BE UTF-32BE UTF-32LE ISO-8859-1/da">
|
| <!-- Copyright © 1991-2005 Unicode, Inc. All rights reserved. -->
|
|
|
| Europa, Software + Internet:
|
| @@ -52,7 +52,7 @@
|
|
|
| </test-case>
|
|
|
| - <test-case id="IUC10-de" encodings="UTF-8 UTF-32BE UTF-32LE ISO-8859-1/de">
|
| + <test-case id="IUC10-de" encodings="UTF-8 UTF-16LE UTF-16BE UTF-32BE UTF-32LE ISO-8859-1/de">
|
| <!-- Copyright © 1991-2005 Unicode, Inc. All rights reserved. -->
|
|
|
| Europa, Software + das Internet:
|
| @@ -69,7 +69,7 @@
|
| </test-case>
|
|
|
| <!-- No UTF-8 in this test because there are no non-ASCII characters. -->
|
| - <test-case id="IUC10-en" encodings="UTF-32BE UTF-32LE ISO-8859-1/en">
|
| + <test-case id="IUC10-en" encodings="UTF-16LE UTF-16BE UTF-32BE UTF-32LE ISO-8859-1/en">
|
| <!-- Copyright © 1991-2005 Unicode, Inc. All rights reserved. -->
|
|
|
| Europe, Software + the Internet:
|
| @@ -85,7 +85,7 @@
|
|
|
| </test-case>
|
|
|
| - <test-case id="IUC10-es" encodings="UTF-8 UTF-32BE UTF-32LE ISO-8859-1/es">
|
| + <test-case id="IUC10-es" encodings="UTF-8 UTF-16LE UTF-16BE UTF-32BE UTF-32LE ISO-8859-1/es">
|
| <!-- Copyright © 1991-2005 Unicode, Inc. All rights reserved. -->
|
|
|
| Europa, Software + el Internet:
|
| @@ -101,7 +101,7 @@
|
|
|
| </test-case>
|
|
|
| - <test-case id="IUC10-fr" encodings="UTF-8 UTF-32BE UTF-32LE ISO-8859-1/fr">
|
| + <test-case id="IUC10-fr" encodings="UTF-8 UTF-16LE UTF-16BE UTF-32BE UTF-32LE ISO-8859-1/fr">
|
| <!-- Copyright © 1991-2005 Unicode, Inc. All rights reserved. -->
|
|
|
| L'Europe, le logiciel et l'Internet :
|
| @@ -118,7 +118,7 @@
|
|
|
| </test-case>
|
|
|
| - <test-case id="IUC10-he" encodings="UTF-8 UTF-32BE UTF-32LE ISO-8859-8-I/he">
|
| + <test-case id="IUC10-he" encodings="UTF-8 UTF-16LE UTF-16BE UTF-32BE UTF-32LE ISO-8859-8-I/he">
|
| <!-- Copyright © 1991-2005 Unicode, Inc. All rights reserved. -->
|
|
|
| אירופה, תוכנה והאינטרנט:
|
| @@ -133,7 +133,7 @@
|
|
|
| </test-case>
|
|
|
| - <test-case id="IUC10-he-Q" encodings="UTF-8 UTF-32BE UTF-32LE windows-1255/he">
|
| + <test-case id="IUC10-he-Q" encodings="UTF-8 UTF-16LE UTF-16BE UTF-32BE UTF-32LE windows-1255/he">
|
| <!-- Copyright © 1991-2005 Unicode, Inc. All rights reserved. -->
|
|
|
| אירופה, תוכנה והאינטרנט:
|
| @@ -148,7 +148,7 @@
|
|
|
| </test-case>
|
|
|
| - <test-case id="IUC10-hu" encodings="UTF-8 UTF-32BE UTF-32LE ISO-8859-2/hu">
|
| + <test-case id="IUC10-hu" encodings="UTF-8 UTF-16LE UTF-16BE UTF-32BE UTF-32LE ISO-8859-2/hu">
|
| <!-- Copyright © 1991-2005 Unicode, Inc. All rights reserved. -->
|
|
|
| Európa, a Szoftver s az Internet -
|
| @@ -165,7 +165,7 @@
|
|
|
| </test-case>
|
|
|
| - <test-case id="IUC10-hu-Q" encodings="UTF-8 UTF-32BE UTF-32LE windows-1250/hu">
|
| + <test-case id="IUC10-hu-Q" encodings="UTF-8 UTF-16LE UTF-16BE UTF-32BE UTF-32LE windows-1250/hu">
|
| <!-- Copyright © 1991-2005 Unicode, Inc. All rights reserved. -->
|
|
|
| Európa, a Szoftver s az Internet -
|
| @@ -182,7 +182,7 @@
|
|
|
| </test-case>
|
|
|
| - <test-case id="IUC10-it" encodings="UTF-8 UTF-32BE UTF-32LE ISO-8859-1/it">
|
| + <test-case id="IUC10-it" encodings="UTF-8 UTF-16LE UTF-16BE UTF-32BE UTF-32LE ISO-8859-1/it">
|
| <!-- Copyright © 1991-2005 Unicode, Inc. All rights reserved. -->
|
|
|
| Europa, software e Internet:
|
| @@ -199,7 +199,7 @@
|
| </test-case>
|
|
|
| <!-- No EUC-JP in this test because it detects as GB18030 -->
|
| - <test-case id="IUC10-jp" encodings="UTF-8 UTF-32BE UTF-32LE Shift_JIS/ja ISO-2022-JP">
|
| + <test-case id="IUC10-jp" encodings="UTF-8 UTF-16LE UTF-16BE UTF-32BE UTF-32LE Shift_JIS/ja ISO-2022-JP">
|
| <!-- Copyright © 1991-2005 Unicode, Inc. All rights reserved. -->
|
|
|
| ヨーロッパ、ソフトウェア、そしてインターネット:
|
| @@ -214,7 +214,7 @@
|
|
|
| </test-case>
|
|
|
| - <test-case id="IUC10-ko" encodings="UTF-8 UTF-32BE UTF-32LE EUC-KR/ko ISO-2022-KR">
|
| + <test-case id="IUC10-ko" encodings="UTF-8 UTF-16LE UTF-16BE UTF-32BE UTF-32LE EUC-KR/ko ISO-2022-KR">
|
| <!-- Copyright © 1991-2005 Unicode, Inc. All rights reserved. -->
|
|
|
| 유럽, 소프트웨어 그리고 인터넷:
|
| @@ -230,7 +230,7 @@
|
| </test-case>
|
|
|
| <!-- No UTF-8 in this test because there are no non-ASCII characters. -->
|
| - <test-case id="IUC10-nl" encodings="UTF-32BE UTF-32LE ISO-8859-1/nl">
|
| + <test-case id="IUC10-nl" encodings="UTF-16LE UTF-16BE UTF-32BE UTF-32LE ISO-8859-1/nl">
|
| <!-- Copyright © 1991-2005 Unicode, Inc. All rights reserved. -->
|
|
|
| Europa, Software + het Internet:
|
| @@ -247,7 +247,7 @@
|
| </test-case>
|
|
|
| <!-- No language for ISO-8859-1 in this test because no-NO is recogonized as Danish... -->
|
| - <test-case id="IUC10-no-NO" encodings="UTF-8 UTF-32BE UTF-32LE ISO-8859-1/da">
|
| + <test-case id="IUC10-no-NO" encodings="UTF-8 UTF-16LE UTF-16BE UTF-32BE UTF-32LE ISO-8859-1/da">
|
| <!-- Copyright © 1991-2005 Unicode, Inc. All rights reserved. -->
|
|
|
| Europa, Programvare og Internet:
|
| @@ -262,7 +262,7 @@
|
|
|
| </test-case>
|
|
|
| - <test-case id="IUC10-no-NO-NY" encodings="UTF-8 UTF-32BE UTF-32LE ISO-8859-1/no">
|
| + <test-case id="IUC10-no-NO-NY" encodings="UTF-8 UTF-16LE UTF-16BE UTF-32BE UTF-32LE ISO-8859-1/no">
|
| <!-- Copyright © 1991-2005 Unicode, Inc. All rights reserved. -->
|
|
|
| Europa, programvare og Internett:
|
| @@ -278,7 +278,7 @@
|
|
|
| </test-case>
|
|
|
| - <test-case id="IUC10-pt-BR" encodings="UTF-8 UTF-32BE UTF-32LE ISO-8859-1/pt">
|
| + <test-case id="IUC10-pt-BR" encodings="UTF-8 UTF-16LE UTF-16BE UTF-32BE UTF-32LE ISO-8859-1/pt">
|
| <!-- Copyright © 1991-2005 Unicode, Inc. All rights reserved. -->
|
|
|
| Europa, Software e a Internet:
|
| @@ -294,7 +294,7 @@
|
|
|
| </test-case>
|
|
|
| - <test-case id="IUC10-pt-PT" encodings="UTF-8 UTF-32BE UTF-32LE ISO-8859-1/pt">
|
| + <test-case id="IUC10-pt-PT" encodings="UTF-8 UTF-16LE UTF-16BE UTF-32BE UTF-32LE ISO-8859-1/pt">
|
| <!-- Copyright © 1991-2005 Unicode, Inc. All rights reserved. -->
|
|
|
| Europa, Software e a Internet:
|
| @@ -311,7 +311,7 @@
|
|
|
| </test-case>
|
|
|
| - <test-case id="IUC10-ro" encodings="UTF-8 UTF-32BE UTF-32LE ISO-8859-2/ro">
|
| + <test-case id="IUC10-ro" encodings="UTF-8 UTF-16LE UTF-16BE UTF-32BE UTF-32LE ISO-8859-2/ro">
|
| <!-- Copyright © 1991-2005 Unicode, Inc. All rights reserved. -->
|
|
|
| Europa, Software şi Internet:
|
| @@ -328,7 +328,7 @@
|
|
|
| </test-case>
|
|
|
| - <test-case id="IUC10-ru" encodings="UTF-8 UTF-32BE UTF-32LE ISO-8859-5/ru windows-1251/ru KOI8-R/ru">
|
| + <test-case id="IUC10-ru" encodings="UTF-8 UTF-16LE UTF-16BE UTF-32BE UTF-32LE ISO-8859-5/ru windows-1251/ru KOI8-R/ru">
|
| <!-- Copyright © 1991-2005 Unicode, Inc. All rights reserved. -->
|
|
|
| Европа, Программное обеспечение + Интернет:
|
| @@ -345,7 +345,7 @@
|
|
|
| </test-case>
|
|
|
| - <test-case id="IUC10-sv" encodings="UTF-8 UTF-32BE UTF-32LE ISO-8859-1/sv">
|
| + <test-case id="IUC10-sv" encodings="UTF-8 UTF-16LE UTF-16BE UTF-32BE UTF-32LE ISO-8859-1/sv">
|
| <!-- Copyright © 1991-2005 Unicode, Inc. All rights reserved. -->
|
|
|
| Europa, programvara och Internet:
|
| @@ -361,7 +361,7 @@
|
|
|
| </test-case>
|
|
|
| - <test-case id="IUC10-yi" encodings="UTF-8 UTF-32BE UTF-32LE">
|
| + <test-case id="IUC10-yi" encodings="UTF-8 UTF-16LE UTF-16BE UTF-32BE UTF-32LE">
|
| <!-- Copyright © 1991-2005 Unicode, Inc. All rights reserved. -->
|
|
|
| אײראָפּע: פּראָגראַמװאַרג און די װעלטנעץ:
|
| @@ -377,7 +377,7 @@
|
|
|
| </test-case>
|
|
|
| - <test-case id="IUC10-zh-Hant" encodings="UTF-8 UTF-32BE UTF-32LE Big5/zh">
|
| + <test-case id="IUC10-zh-Hant" encodings="UTF-8 UTF-16LE UTF-16BE UTF-32BE UTF-32LE Big5/zh">
|
| <!-- Copyright © 1991-2005 Unicode, Inc. All rights reserved. -->
|
|
|
| 歐洲,軟體及網際網路:
|
| @@ -393,7 +393,7 @@
|
| </test-case>
|
|
|
| <!-- No ISO-2022-CN in this test because Java doesn't support it in both directions :-( -->
|
| - <test-case id="IUC10-zh-Hans" encodings="UTF-8 UTF-32BE UTF-32LE ISO-2022-CN GB18030/zh">
|
| + <test-case id="IUC10-zh-Hans" encodings="UTF-8 UTF-16LE UTF-16BE UTF-32BE UTF-32LE ISO-2022-CN GB18030/zh">
|
| <!-- Copyright © 1991-2005 Unicode, Inc. All rights reserved. -->
|
|
|
| 欧洲,软件+互联网
|
| @@ -409,7 +409,7 @@
|
| Conference Program
|
| </test-case>
|
|
|
| - <test-case id="WIU-cz" encodings="UTF-8 UTF-32BE UTF-32LE ISO-8859-2/cs">
|
| + <test-case id="WIU-cz" encodings="UTF-8 UTF-16LE UTF-16BE UTF-32BE UTF-32LE ISO-8859-2/cs">
|
| <!-- Copyright © 1991-2005 Unicode, Inc. All rights reserved. -->
|
|
|
| Co je Unicode?
|
| @@ -432,7 +432,7 @@ Conference Program
|
|
|
| </test-case>
|
|
|
| - <test-case id="WIU-el" encodings="UTF-8 UTF-32BE UTF-32LE ISO-8859-7/el">
|
| + <test-case id="WIU-el" encodings="UTF-8 UTF-16LE UTF-16BE UTF-32BE UTF-32LE ISO-8859-7/el">
|
| <!-- Copyright © 1991-2005 Unicode, Inc. All rights reserved. -->
|
|
|
| Τι είναι το Unicode;
|
| @@ -458,7 +458,7 @@ Conference Program
|
|
|
| </test-case>
|
|
|
| - <test-case id="WIU-el-Q" encodings="UTF-8 UTF-32BE UTF-32LE windows-1253/el">
|
| + <test-case id="WIU-el-Q" encodings="UTF-8 UTF-16LE UTF-16BE UTF-32BE UTF-32LE windows-1253/el">
|
| <!-- Copyright © 1991-2005 Unicode, Inc. All rights reserved. -->
|
|
|
| Τι είναι το “Unicode”;
|
| @@ -484,7 +484,7 @@ Conference Program
|
|
|
| </test-case>
|
|
|
| - <test-case id="WIU-pl" encodings="UTF-8 UTF-32BE UTF-32LE ISO-8859-2/pl">
|
| + <test-case id="WIU-pl" encodings="UTF-8 UTF-16LE UTF-16BE UTF-32BE UTF-32LE ISO-8859-2/pl">
|
| <!-- Copyright © 1991-2005 Unicode, Inc. All rights reserved. -->
|
|
|
| Czym jest Unikod ?
|
| @@ -505,7 +505,7 @@ Conference Program
|
|
|
| </test-case>
|
|
|
| - <test-case id="WIU-tr" encodings="UTF-8 UTF-32BE UTF-32LE ISO-8859-9/tr">
|
| + <test-case id="WIU-tr" encodings="UTF-8 UTF-16LE UTF-16BE UTF-32BE UTF-32LE ISO-8859-9/tr">
|
| <!-- Copyright © 1991-2005 Unicode, Inc. All rights reserved. -->
|
|
|
| Evrensel Kod Nedir?
|
| @@ -527,7 +527,7 @@ Conference Program
|
|
|
| </test-case>
|
|
|
| - <test-case id="WIU-tr-Q" encodings="UTF-8 UTF-32BE UTF-32LE windows-1254/tr">
|
| + <test-case id="WIU-tr-Q" encodings="UTF-8 UTF-16LE UTF-16BE UTF-32BE UTF-32LE windows-1254/tr">
|
| <!-- Copyright © 1991-2005 Unicode, Inc. All rights reserved. -->
|
|
|
| “Evrensel Kod” Nedir?
|
| @@ -548,4 +548,14 @@ Conference Program
|
| şifrelemeyi desteklemek zorundadırlar; veriler, farklı şifreleme ve altyapılardan geçerken bozulma riski taşırlar.
|
|
|
| </test-case>
|
| -</charset-detection-tests>
|
| +
|
| +
|
| + <test-case id="bug-10532-utf-16" encodings="UTF-8 UTF-16BE UTF-16LE UTF-32BE UTF-32LE">
|
| + foo 東京・銀座の歌舞伎座。4月に新調された4枚の緞帳のうち3枚は、京都の川島織物セルコンが織った朝日新聞デジタル会員(有料・無料)にご登録いただくと、様々な特典・サービスが受けられます。
|
| + </test-case>
|
| +
|
| + <test-case id="bug-10532-ASCII" encodings="UTF-8 UTF-16BE UTF-16LE UTF-32BE UTF-32LE">
|
| + <!-- Note that plain 7 bit ASCII is detected as UTF-8 -->
|
| + ,1,,,5
|
| + </test-case>
|
| +</charset-detection-tests>
|
|
|