| Index: third_party/icu38/source/test/testdata/conversion.txt
|
| ===================================================================
|
| --- third_party/icu38/source/test/testdata/conversion.txt (revision 10949)
|
| +++ third_party/icu38/source/test/testdata/conversion.txt (working copy)
|
| @@ -1,6 +1,6 @@
|
| //*******************************************************************************
|
| //
|
| -// Copyright (C) 2003-2007, International Business Machines
|
| +// Copyright (C) 2003-2008, International Business Machines
|
| // Corporation and others. All Rights Reserved.
|
| //
|
| // file name: conversion.txt
|
| @@ -162,6 +162,16 @@
|
| :intvector{ 0,1,1,1,1,2,3,8,11,13,14,14,14,14,15,16 },
|
| :int{1}, :int{0}, "", "&C", :bin{""}
|
| }
|
| + // Test ticket 5691: HZ with illegal tilde sequences.
|
| + {
|
| + "HZ",
|
| + :bin{ 417e20427e21437e80447e7b41417e207e41427e7f41437e7d5a },
|
| + "A\\x7E B\\x7E!C\\x7E\\x80D\u4eae\\x7E\\x20\\x7E\u8c05\\x7E\\x7F\u64a9Z",
|
| + :intvector{ 0,1,1,1,1,2,3,4,4,4,4,5,6,7,7,7,7,7,7,7,7,9, // SBCS
|
| + 12,14,14,14,14,14,14,14,14,16,16,16,16,17,19,19,19,19,19,19,19,19,21, // DBCS
|
| + 25 }, // SBCS
|
| + :int{1}, :int{0}, "", "&C", :bin{""}
|
| + }
|
| // Test ticket 5691: Example from Peter Edberg.
|
| {
|
| "ISO-2022-JP",
|
| @@ -170,6 +180,22 @@
|
| :intvector{ 3,5,7,9,14,15,16,17,18,22,23,24 },
|
| :int{1}, :int{0}, "", "?", :bin{""}
|
| }
|
| + // Test bug 6071 (2:1 Unicode:charset SBCS mapping).
|
| + {
|
| + "*test1bmp",
|
| + :bin{ 050008 },
|
| + "e@uv",
|
| + :intvector{ 0,1,2,2 },
|
| + :int{1}, :int{1}, "", "?", :bin{""}
|
| + }
|
| + // test that HZ limits its byte values to lead bytes 21..7d and trail bytes 21..7e
|
| + {
|
| + "HZ",
|
| + :bin{ 7e7b21212120217e217f772100007e217e7e7d207e7e807e0a2b },
|
| + "\u3000\ufffd\u3013\ufffd\u9ccc\ufffd\ufffd\u3013 ~\ufffd+",
|
| + :intvector{ 2,4,6,8,10,12,14,15,19,20,22,25 },
|
| + :int{1}, :int{1}, "", "?", :bin{""}
|
| + }
|
| // improve coverage of ISO-2022-JP converter with hardcoded JIS X 0201 and
|
| // using the Shift-JIS table for JIS X 0208 (ticket #5797)
|
| {
|
| @@ -313,6 +339,21 @@
|
| :intvector{ 0, 5, 7, 9, 9, 9, 9, 9, 9, 9, 9, 12 },
|
| :int{1}, :int{1}, "", "&", :bin{""}
|
| }
|
| + // empty segment (using substitution and stop)
|
| + {
|
| + "ISO-2022-KR",
|
| + :bin{ 1b242943610e0f620d0a },
|
| + "a\uFFFDb\u000D\u000A",
|
| + :intvector{ 4, 6, 7, 8, 9 },
|
| + :int{1}, :int{1}, "", "?", :bin{""}
|
| + }
|
| + {
|
| + "ISO-2022-KR",
|
| + :bin{ 1b242943610e0f620d0a },
|
| + "a",
|
| + :intvector{ 4 },
|
| + :int{1}, :int{1}, "illesc", ".", :bin{"0f"}
|
| + }
|
|
|
| // ISO-2022-JP
|
|
|
| @@ -363,6 +404,21 @@
|
| :bin{ 41c15c1b284a5cc242 }, "A\uff81\\\xa5\uff82B", :intvector{ 0, 1, 2, 6, 7, 8 },
|
| :int{1}, :int{1}, "", ".", :bin{""}
|
| }
|
| + // empty segment (using substitution and stop)
|
| + {
|
| + "ISO-2022-JP",
|
| + :bin{ 61621b24421b284263640d0a },
|
| + "ab\uFFFDcd\u000D\u000A",
|
| + :intvector{ 0, 1, 5, 8, 9, 10, 11 },
|
| + :int{1}, :int{1}, "", "?", :bin{""}
|
| + }
|
| + {
|
| + "ISO-2022-JP",
|
| + :bin{ 61621b24421b284263640d0a },
|
| + "ab",
|
| + :intvector{ 0, 1 },
|
| + :int{1}, :int{1}, "illesc", ".", :bin{"1b2842"}
|
| + }
|
|
|
| // ISO-2022-CN
|
|
|
| @@ -433,6 +489,36 @@
|
| :bin{ 411b242b491b4f2121 }, "\x41", :intvector{ 0 },
|
| :int{1}, :int{1}, "unsuppesc", ".", :bin{ 1b242b49 }
|
| }
|
| + // empty segment 1 (using substitution and stop)
|
| + {
|
| + "ISO-2022-CN",
|
| + :bin{ 611b242941620e0f1b242a481b4e6a65630d0a },
|
| + "ab\uFFFD\u994Cc\u000D\u000A",
|
| + :intvector{ 0, 5, 7, 14, 16, 17, 18 },
|
| + :int{1}, :int{1}, "", "?", :bin{""}
|
| + }
|
| + {
|
| + "ISO-2022-CN",
|
| + :bin{ 611b242941620e0f1b242a481b4e6a65630d0a },
|
| + "ab",
|
| + :intvector{ 0, 5 },
|
| + :int{1}, :int{1}, "illesc", ".", :bin{"0f"}
|
| + }
|
| + // empty segment 2 (using substitution and stop)
|
| + {
|
| + "ISO-2022-CN",
|
| + :bin{ 611b242941620e1b24294768640f630d0a },
|
| + "ab\uFFFD\u5F70c\u000D\u000A",
|
| + :intvector{ 0, 5, 7, 11, 14, 15, 16 },
|
| + :int{1}, :int{1}, "", "?", :bin{""}
|
| + }
|
| + {
|
| + "ISO-2022-CN",
|
| + :bin{ 611b242941620e1b24294768640f630d0a },
|
| + "ab",
|
| + :intvector{ 0, 5 },
|
| + :int{1}, :int{1}, "illesc", ".", :bin{"1b242947"}
|
| + }
|
|
|
| // ISO-2022 SBCS
|
| // [U_ENABLE_GENERIC_ISO_2022]
|
| @@ -447,6 +533,39 @@
|
| // :int{1}, :int{1}, "", ".", :bin{""}
|
| //}
|
|
|
| + // HZ-GB-2312
|
| +
|
| + // empty segment 1 (using substitution and stop)
|
| + {
|
| + "HZ-GB-2312",
|
| + :bin{ 61627e7b7e7d6364 },
|
| + "ab\uFFFDcd",
|
| + :intvector{ 0, 1, 4, 6, 7 },
|
| + :int{1}, :int{1}, "", "?", :bin{""}
|
| + }
|
| + {
|
| + "HZ-GB-2312",
|
| + :bin{ 61627e7b7e7d63640d0a },
|
| + "ab",
|
| + :intvector{ 0, 1 },
|
| + :int{1}, :int{1}, "illesc", ".", :bin{"7e7d"}
|
| + }
|
| + // empty segment 2 & legal redundant switches (using substitution and stop)
|
| + {
|
| + "HZ-GB-2312",
|
| + :bin{ 61627e7b323b3f557e7b7e7b523b7e7d63647e7d65667e7d7e7d },
|
| + "ab\u4E0D\u7A7A\uFFFD\u4E00cdef\uFFFD",
|
| + :intvector{ 0, 1, 4, 6, 10, 12, 16, 17, 20, 21, 24 },
|
| + :int{1}, :int{1}, "", "?", :bin{""}
|
| + }
|
| + {
|
| + "HZ-GB-2312",
|
| + :bin{ 61627e7b323b3f557e7b7e7b523b7e7d63647e7d65667e7d7e7d },
|
| + "ab\u4E0D\u7A7A",
|
| + :intvector{ 0, 1, 4, 6 },
|
| + :int{1}, :int{1}, "illesc", ".", :bin{"7e7b"}
|
| + }
|
| +
|
| // DBCS-only extensions
|
| {
|
| "ibm-970",
|
| @@ -618,6 +737,14 @@
|
| :intvector{ 0, 4, 8, 12 },
|
| :int{1}, :int{0}, "", "?", :bin{""}
|
| }
|
| + // Test iso-2022-jp-2 miscellaneous symbols
|
| + {
|
| + "iso-2022-jp-2",
|
| + :bin{ 1b242843224f224e1b2842 },
|
| + "\u260E\u260F",
|
| + :intvector{ 4, 6 },
|
| + :int{1}, :int{0}, "", ".", :bin{""}
|
| + }
|
| }
|
| }
|
|
|
| @@ -626,6 +753,14 @@
|
| fromUnicode {
|
| Headers { "charset", "unicode", "bytes", "offsets", "flush", "fallbacks", "errorCode", "callback", "invalidUChars" }
|
| Cases {
|
| + // Test bug 6071 (1:2 Unicode:charset SBCS mapping).
|
| + {
|
| + "*test1bmp",
|
| + "e@t",
|
| + :bin{ 05000709 },
|
| + :intvector{ 0,1,2,2 },
|
| + :int{1}, :int{0}, "", "?", ""
|
| + }
|
| // improve coverage of ISO-2022-JP converter with hardcoded JIS X 0201 and
|
| // using the Shift-JIS table for JIS X 0208 (ticket #5797)
|
| {
|
| @@ -1433,16 +1568,29 @@
|
| // versions of ISO-2022-JP
|
| {
|
| "ISO-2022-JP",
|
| - "[\x00-\x0d\x10-\x1a\x1c-\x7f\xa5\u0391-\u03a1\u203e\uff61-\uff9f\u4e00\u4e01\uffe5]",
|
| - "[\x0e\x0f\x1b\u0100-\u0113\u0385-\u038a\u4e02\u4e27-\u4e29\ufa0e-\ufa2d\uffe6-\U0010ffff]",
|
| + "[\x00-\x0d\x10-\x1a\x1c-\x7f\xa5\u0391-\u03a1\u2015\u203e\u4e00\u4e01\uffe5]",
|
| + "[\x0e\x0f\x1b\u0100-\u0113\u0385-\u038a\u2014\u301c\u4e02\u4e27-\u4e29\u4fe0\u663b\u9eb5\ufa0e-\ufa2d\uff61-\uff9f\uffe4\uffe6-\U0010ffff]",
|
| :int{0}
|
| - }
|
| + }
|
| {
|
| "ISO-2022-JP-2",
|
| - "[\x00-\x0d\x10-\x1a\x1c-\u0113\u0384-\u0386\u0388-\u038a\u0390-\u03a1\u203e\uff61-\uff9f\u4e00-\u4e05\uffe6]",
|
| - "[\x0e\x0f\x1b\uffe7-\U0010ffff]",
|
| + "[\x00-\x0d\x10-\x1a\x1c-\x7f\xa0-\u0113\u0384-\u0386\u0388-\u038a\u0390-\u03a1\u203e\u4e00-\u4e05\u4fe0\u663b\uffe6]",
|
| + "[\x0e\x0f\x1b\uff61-\uff9f\uffe4\uffe7-\U0010ffff]",
|
| :int{0}
|
| }
|
| + {
|
| + "JIS7",
|
| + "[\x00-\x0d\x10-\x1a\x1c-\x7f\xa0-\u0113\u0384-\u0386\u0388-\u038a\u0390-\u03a1\u203e\u4e00-\u4e05\u4fe0\u663b\uff61-\uff9f\uffe6]",
|
| + "[\x0e\x0f\x1b\uffe4\uffe7-\U0010ffff]",
|
| + :int{0}
|
| + }
|
| + // with fallbacks
|
| + {
|
| + "ISO-2022-JP",
|
| + "[\x00-\x0d\x10-\x1a\x1c-\x7f\xa5\u0391-\u03a1\u2014\u2015\u203e\u301c\u4e00\u4e01\u4fe0\u9eb5\uff61-\uff9f\uffe5]",
|
| + "[\x0e\x0f\x1b\xa6\u0100-\u0113\u0385-\u038a\u4e02\u4e27-\u4e29\u663b\ufa0e-\ufa2d\uffe4\uffe6-\U0010ffff]",
|
| + :int{1}
|
| + }
|
|
|
| // versions of ISO-2022-CN
|
| {
|
| @@ -1458,6 +1606,22 @@
|
| :int{0}
|
| }
|
|
|
| + // HZ
|
| + {
|
| + "HZ",
|
| + "[\u0410-\u044f\u4e00\u4e01\u4e03]",
|
| + "[\u4e02\u4e04-\u4e06\uac00-\ud7ff]",
|
| + :int{0}
|
| + }
|
| +
|
| + // LMBCS
|
| + {
|
| + "LMBCS",
|
| + "[\x00-\U0010ffff]",
|
| + "[]",
|
| + :int{0}
|
| + }
|
| +
|
| // DBCS-only
|
| {
|
| "ibm-971",
|
|
|