Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(261)

Side by Side Diff: components/translate/core/language_detection/chinese_script_classifier.cc

Issue 2743843002: Implements ChineseScriptClassifier functionality without icu::Transliterator (Closed)
Patch Set: Removes references to unused translit.h Created 3 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « components/translate/core/language_detection/chinese_script_classifier.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2017 The Chromium Authors. All rights reserved. 1 // Copyright 2017 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "components/translate/core/language_detection/chinese_script_classifier .h" 5 #include "components/translate/core/language_detection/chinese_script_classifier .h"
6 6
7 #include <algorithm> 7 #include <algorithm>
8 #include <memory> 8 #include <memory>
9 #include <string> 9 #include <string>
10 #include "base/logging.h" 10 #include "base/logging.h"
11 #include "base/strings/string_util.h" 11 #include "base/strings/string_util.h"
12 #include "third_party/icu/source/common/unicode/uniset.h"
12 #include "third_party/icu/source/common/unicode/unistr.h" 13 #include "third_party/icu/source/common/unicode/unistr.h"
13 #include "third_party/icu/source/i18n/unicode/translit.h"
14 14
15 namespace translate { 15 namespace translate {
16 16
17 namespace { 17 namespace {
18 // BCP 47 language code representing Chinese in Han Simplified script. 18 // BCP 47 language code representing Chinese in Han Simplified script.
19 const char kChineseSimplifiedCode[] = "zh-Hans"; 19 const char kChineseSimplifiedCode[] = "zh-Hans";
20 20
21 // BCP 47 language code representing Chinese in Han Traditional script. 21 // BCP 47 language code representing Chinese in Han Traditional script.
22 const char kChineseTraditionalCode[] = "zh-Hant"; 22 const char kChineseTraditionalCode[] = "zh-Hant";
23 } // namespace 23 } // namespace
24 24
25 ChineseScriptClassifier::ChineseScriptClassifier() { 25 ChineseScriptClassifier::ChineseScriptClassifier() {
26 UParseError parse_status; 26 // zh-Hans and zh-Hant codepoints extracted from:
27 // third_party/icu/source/data/translit/Hans_Hant.txt
28 //
29 // zh-Hans codepoints to be stored in a UnicodeSet for later reference.
30 const std::string& hans_codepoints =
groby1 2017/03/10 19:27:17 Maybe I'm weird about this, but given this is data
riesa 2017/03/10 19:32:01 Sure -- fine with me. I don't know anything about
31 "[\u4e07\u4e0e\u4e13\u4e1a\u4e1b\u4e1c"
32 "\u4e1d\u4e22\u4e24\u4e25\u4e27\u4e2a\u4e2b"
33 "\u4e30\u4e34\u4e3a\u4e3d\u4e3e\u4e49\u4e4c\u4e50"
34 "\u4e54\u4e60\u4e61\u4e66\u4e70\u4e71\u4e89"
35 "\u4e8f\u4e98\u4e9a\u4ea7\u4ea9\u4eae\u4eb2\u4eb5"
36 "\u4eb8\u4ebf\u4ec0\u4ec5\u4ec6\u4ece\u4ed1"
37 "\u4ed3\u4ed7\u4eea\u4eec\u4ef7\u4f17\u4f18\u4f1a"
38 "\u4f1b\u4f1e\u4f1f\u4f20\u4f23\u4f24\u4f25"
39 "\u4f26\u4f27\u4f2a\u4f2b\u4f53\u4f5d\u4f63\u4f65"
40 "\u4fa0\u4fa3\u4fa5\u4fa6\u4fa7\u4fa8\u4fa9"
41 "\u4faa\u4fac\u4fe3\u4fe6\u4fe8\u4fe9\u4fea\u4feb"
42 "\u4fed\u503a\u503e\u506c\u507b\u507e\u507f"
43 "\u50a5\u50a7\u50a8\u50a9\u513f\u5151\u5156\u515a"
44 "\u5170\u5173\u5174\u5179\u517b\u517d\u5181"
45 "\u5185\u5188\u518c\u5199\u519b\u519c\u51a2\u51af"
46 "\u51b1\u51b3\u51b5\u51bb\u51c4\u51c9\u51cf"
47 "\u51d1\u51db\u51e4\u51eb\u51ed\u51ef\u51fb\u51ff"
48 "\u520d\u5218\u5219\u521a\u521b\u5220\u522b"
49 "\u522c\u522d\u5239\u523d\u523f\u5240\u5242\u5250"
50 "\u5251\u5265\u5267\u529d\u529e\u52a1\u52a2"
51 "\u52a8\u52b1\u52b2\u52b3\u52bf\u52cb\u52d6\u52da"
52 "\u52e4\u5300\u5326\u532e\u533a\u533b\u534e"
53 "\u534f\u5355\u5356\u5362\u5364\u5367\u536b\u5374"
54 "\u5382\u5384\u5385\u5386\u5389\u538b\u538c"
55 "\u538d\u5390\u5395\u5398\u53a2\u53a3\u53a6\u53a8"
56 "\u53a9\u53ae\u53bf\u53c1\u53c2\u53cc\u53d8"
57 "\u53d9\u53e0\u53f7\u53f9\u53fd\u5401\u540c\u5413"
58 "\u5415\u5417\u5423\u5428\u542c\u542f\u5434"
59 "\u5450\u5452\u5453\u5455\u5456\u5457\u5458\u5459"
60 "\u545b\u545c\u548f\u5499\u549b\u549d\u54a4"
61 "\u54c4\u54cd\u54d1\u54d2\u54d3\u54d4\u54d5\u54d7"
62 "\u54d9\u54dc\u54dd\u54df\u551b\u551d\u5520"
63 "\u5521\u5522\u5524\u5555\u5567\u556c\u556d\u556e"
64 "\u5570\u5574\u5578\u5582\u55b7\u55bd\u55be"
65 "\u55eb\u55f3\u5618\u5624\u5631\u565c\u566a\u56a3"
66 "\u56e2\u56ed\u56f1\u56f4\u56f5\u56fd\u56fe"
67 "\u5706\u5723\u5739\u573a\u5742\u574f\u5757\u575a"
68 "\u575b\u575c\u575d\u575e\u575f\u5760\u5784"
69 "\u5785\u5786\u5792\u57a6\u57a9\u57ab\u57ad\u57b1"
70 "\u57b2\u57b4\u57d8\u57d9\u57da\u57ef\u5811"
71 "\u5815\u5824\u5899\u58ee\u58f0\u58f3\u58f6\u58f8"
72 "\u5904\u5907\u590d\u591f\u5934\u5938\u5939"
73 "\u593a\u5941\u5942\u594b\u5956\u5965\u5986\u5987"
74 "\u5988\u59a9\u59aa\u59ab\u59d7\u59f9\u5a04"
75 "\u5a05\u5a06\u5a07\u5a08\u5a18\u5a31\u5a32\u5a34"
76 "\u5a73\u5a74\u5a75\u5a76\u5aaa\u5abe\u5ad2"
77 "\u5ad4\u5af1\u5b37\u5b59\u5b66\u5b6a\u5b81\u5b9d"
78 "\u5b9e\u5ba0\u5ba1\u5baa\u5bab\u5bbd\u5bbe"
79 "\u5bdd\u5bf8\u5bf9\u5bfb\u5bfc\u5bff\u5c06\u5c14"
80 "\u5c18\u5c1d\u5c27\u5c34\u5c38\u5c3d\u5c42"
81 "\u5c43\u5c49\u5c4a\u5c5e\u5c61\u5c66\u5c7f\u5c81"
82 "\u5c82\u5c96\u5c97\u5c98\u5c99\u5c9a\u5c9b"
83 "\u5ca9\u5cad\u5cb3\u5cbd\u5cbf\u5cc4\u5ce1\u5ce3"
84 "\u5ce4\u5ce5\u5ce6\u5d02\u5d03\u5d04\u5d2d"
85 "\u5d58\u5d5a\u5d5d\u5dc5\u5de9\u5def\u5e01\u5e05"
86 "\u5e08\u5e0f\u5e10\u5e18\u5e1c\u5e26\u5e27"
87 "\u5e2e\u5e31\u5e3b\u5e3c\u5e42\u5e76\u5e7f\u5e84"
88 "\u5e86\u5e90\u5e91\u5e93\u5e94\u5e99\u5e9e"
89 "\u5e9f\u5ea6\u5eea\u5f00\u5f02\u5f03\u5f11\u5f20"
90 "\u5f25\u5f26\u5f2a\u5f2f\u5f39\u5f3a\u5f52"
91 "\u5f53\u5f55\u5f5d\u5f66\u5f7b\u5f84\u5f95\u5fad"
92 "\u5fc6\u5fcf\u5fe7\u5ffe\u6000\u6001\u6002"
93 "\u6003\u6004\u6005\u6006\u601c\u603b\u603c\u603f"
94 "\u604b\u6052\u6064\u6073\u6076\u6078\u6079"
95 "\u607a\u607b\u607c\u607d\u60a6\u60ab\u60ac\u60ad"
96 "\u60ae\u60af\u60ca\u60e7\u60e8\u60e9\u60eb"
97 "\u60ec\u60ed\u60ee\u60ef\u6120\u6124\u6126\u6151"
98 "\u61d1\u61d2\u61d4\u6206\u620b\u620f\u6217"
99 "\u6218\u621a\u622c\u622e\u622f\u6237\u6247\u6251"
100 "\u6263\u6267\u6269\u626a\u626b\u626c\u6270"
101 "\u6298\u629a\u629b\u629f\u62a0\u62a1\u62a2\u62a4"
102 "\u62a5\u62c5\u62d3\u62df\u62e2\u62e3\u62e5"
103 "\u62e6\u62e7\u62e8\u62e9\u6302\u631a\u631b\u631c"
104 "\u631d\u631e\u631f\u6320\u6321\u6322\u6323"
105 "\u6324\u6325\u6326\u633d\u6342\u6346\u635d\u635e"
106 "\u635f\u6361\u6362\u6363\u636e\u637b\u63b3"
107 "\u63b4\u63b7\u63b8\u63ba\u63bc\u63fd\u63fe\u63ff"
108 "\u6400\u6401\u6402\u6405\u643a\u6444\u6445"
109 "\u6446\u6447\u6448\u644a\u6484\u6491\u64b5\u64b7"
110 "\u64b8\u64ba\u64de\u6512\u654c\u655b\u6570"
111 "\u658b\u6593\u65a9\u65ad\u65c1\u65e0\u65e7\u65f6"
112 "\u65f7\u65f8\u6606\u660c\u6619\u6635\u663c"
113 "\u663d\u663e\u664b\u6652\u6653\u6654\u6655\u6656"
114 "\u6682\u66a7\u672f\u6731\u6740\u6742\u6743"
115 "\u6746\u6761\u6765\u6768\u6769\u676f\u6770\u6781"
116 "\u6784\u679e\u67a2\u67a3\u67a5\u67a7\u67a8"
117 "\u67aa\u67ab\u67ad\u67e0\u67e5\u67fd\u6800\u6805"
118 "\u6807\u6808\u6809\u680a\u680b\u680c\u680e"
119 "\u680f\u6811\u6816\u6837\u683e\u6860\u6861\u6862"
120 "\u6863\u6864\u6865\u6866\u6867\u6868\u6869"
121 "\u68a6\u68bc\u68be\u68bf\u68c0\u68c1\u68c2\u6901"
122 "\u691f\u6920\u6924\u692d\u696b\u697c\u6984"
123 "\u6985\u6987\u6988\u6989\u69a8\u69da\u69db\u69df"
124 "\u69e0\u6a2a\u6a2f\u6a31\u6a50\u6a65\u6a71"
125 "\u6a79\u6a7c\u6a90\u6aa9\u6b22\u6b24\u6b27\u6b7c"
126 "\u6b81\u6b87\u6b8b\u6b92\u6b93\u6b9a\u6ba1"
127 "\u6bb4\u6bb7\u6bc1\u6bc2\u6bd5\u6bd9\u6be1\u6bf5"
128 "\u6c07\u6c22\u6c29\u6c32\u6c47\u6c49\u6c64"
129 "\u6c79\u6c89\u6c9f\u6ca1\u6ca3\u6ca4\u6ca5\u6ca6"
130 "\u6ca7\u6ca9\u6caa\u6cbe\u6cde\u6cea\u6cf6"
131 "\u6cf7\u6cf8\u6cfa\u6cfb\u6cfc\u6cfd\u6cfe\u6d01"
132 "\u6d12\u6d3c\u6d43\u6d45\u6d46\u6d47\u6d48"
133 "\u6d4a\u6d4b\u6d4d\u6d4e\u6d4f\u6d50\u6d51\u6d52"
134 "\u6d53\u6d54\u6d5a\u6d82\u6d9b\u6d9d\u6d9e"
135 "\u6d9f\u6da0\u6da1\u6da3\u6da4\u6da6\u6da7\u6da8"
136 "\u6da9\u6dec\u6e0a\u6e0c\u6e0d\u6e0e\u6e10"
137 "\u6e11\u6e14\u6e17\u6e29\u6e7e\u6e7f\u6e83\u6e85"
138 "\u6e86\u6ed7\u6eda\u6ede\u6edf\u6ee0\u6ee1"
139 "\u6ee2\u6ee4\u6ee5\u6ee6\u6ee8\u6ee9\u6eea\u6f13"
140 "\u6f24\u6f46\u6f47\u6f4b\u6f4d\u6f5c\u6f74"
141 "\u6f9c\u6fd1\u6fd2\u704f\u706d\u706f\u7075\u707e"
142 "\u707f\u7080\u7089\u7096\u709c\u709d\u70ad"
143 "\u70b9\u70bc\u70bd\u70c1\u70c2\u70c3\u70db\u70e6"
144 "\u70e7\u70e8\u70e9\u70eb\u70ec\u70ed\u7115"
145 "\u7116\u7118\u7130\u7174\u718f\u7194\u7231\u7237"
146 "\u724d\u7266\u7275\u727a\u728a\u72b6\u72b7"
147 "\u72b8\u72b9\u72c8\u72dd\u72de\u72ec\u72ed\u72ee"
148 "\u72ef\u72f0\u72f1\u72f2\u7303\u730e\u7315"
149 "\u7321\u732a\u732b\u732c\u732e\u736d\u7391\u739a"
150 "\u739b\u73ae\u73af\u73b0\u73b1\u73ba\u73d0"
151 "\u73d1\u73f0\u73f2\u7403\u7405\u740f\u7410\u743c"
152 "\u7476\u7477\u748e\u74d2\u74ee\u74ef\u7535"
153 "\u753b\u7545\u7574\u7596\u7597\u759f\u75a0\u75a1"
154 "\u75ac\u75ad\u75ae\u75af\u75b1\u75b4\u75c8"
155 "\u75c9\u75d2\u75d6\u75e8\u75ea\u75eb\u7605\u7606"
156 "\u7617\u7618\u762a\u762b\u763e\u763f\u765e"
157 "\u7663\u766b\u7682\u7691\u76b1\u76b2\u76cf\u76d0"
158 "\u76d1\u76d6\u76d7\u76d8\u770d\u772c\u772f"
159 "\u7741\u7750\u7751\u777e\u7786\u7792\u77a9\u77eb"
160 "\u77f6\u77fe\u77ff\u7800\u7801\u7816\u7817"
161 "\u781a\u781c\u783a\u783b\u783e\u7840\u7841\u7855"
162 "\u7856\u7857\u7859\u786e\u7877\u788d\u789b"
163 "\u789c\u78b1\u78f7\u7934\u793c\u7943\u794e\u7962"
164 "\u796f\u7977\u7978\u7980\u7984\u7985\u79bb"
165 "\u79c3\u79cd\u79d8\u79ef\u79f0\u79fd\u79fe\u7a06"
166 "\u7a0e\u7a23\u7a33\u7a51\u7a77\u7a83\u7a8d"
167 "\u7a8e\u7a91\u7a9c\u7a9d\u7aa5\u7aa6\u7aad\u7ad6"
168 "\u7ade\u7b03\u7b0b\u7b14\u7b15\u7b3a\u7b3c"
169 "\u7b3e\u7b51\u7b5a\u7b5b\u7b5c\u7b5d\u7b79\u7b7c"
170 "\u7b7e\u7b80\u7b93\u7ba6\u7ba7\u7ba8\u7ba9"
171 "\u7baa\u7bab\u7bd1\u7bd3\u7bea\u7bee\u7bf1\u7c16"
172 "\u7c41\u7c74\u7c7b\u7c7c\u7c9c\u7c9d\u7ca4"
173 "\u7caa\u7cae\u7cc1\u7cc7\u7cdf\u7d27\u7d77\u7e9f"
174 "\u7ea0\u7ea1\u7ea2\u7ea3\u7ea4\u7ea5\u7ea6"
175 "\u7ea7\u7ea8\u7ea9\u7eaa\u7eab\u7eac\u7ead\u7eae"
176 "\u7eaf\u7eb0\u7eb1\u7eb2\u7eb3\u7eb4\u7eb5"
177 "\u7eb6\u7eb7\u7eb8\u7eb9\u7eba\u7ebb\u7ebc\u7ebd"
178 "\u7ebe\u7ebf\u7ec0\u7ec1\u7ec2\u7ec3\u7ec4"
179 "\u7ec5\u7ec6\u7ec7\u7ec8\u7ec9\u7eca\u7ecb\u7ecc"
180 "\u7ecd\u7ece\u7ecf\u7ed0\u7ed1\u7ed2\u7ed3"
181 "\u7ed4\u7ed5\u7ed6\u7ed7\u7ed8\u7ed9\u7eda\u7edb"
182 "\u7edc\u7edd\u7ede\u7edf\u7ee0\u7ee1\u7ee2"
183 "\u7ee3\u7ee4\u7ee5\u7ee6\u7ee7\u7ee8\u7ee9\u7eea"
184 "\u7eeb\u7eec\u7eed\u7eee\u7eef\u7ef0\u7ef1"
185 "\u7ef2\u7ef3\u7ef4\u7ef5\u7ef6\u7ef7\u7ef8\u7ef9"
186 "\u7efa\u7efb\u7efc\u7efd\u7efe\u7eff\u7f00"
187 "\u7f01\u7f02\u7f03\u7f04\u7f05\u7f06\u7f07\u7f08"
188 "\u7f09\u7f0a\u7f0b\u7f0c\u7f0d\u7f0e\u7f0f"
189 "\u7f11\u7f12\u7f13\u7f14\u7f15\u7f16\u7f17\u7f18"
190 "\u7f19\u7f1a\u7f1b\u7f1c\u7f1d\u7f1e\u7f1f"
191 "\u7f20\u7f21\u7f22\u7f23\u7f24\u7f25\u7f26\u7f27"
192 "\u7f28\u7f29\u7f2a\u7f2b\u7f2c\u7f2d\u7f2e"
193 "\u7f2f\u7f30\u7f31\u7f32\u7f33\u7f34\u7f35\u7f42"
194 "\u7f51\u7f57\u7f5a\u7f62\u7f74\u7f81\u7f9f"
195 "\u7fa1\u7fd8\u7ff1\u8000\u8022\u8027\u8038\u8042"
196 "\u804b\u804c\u804d\u8054\u8069\u806a\u8083"
197 "\u80a0\u80a4\u80ae\u80b4\u80be\u80bf\u80c0\u80c1"
198 "\u80dc\u80e7\u80e8\u80ea\u80eb\u80f6\u8109"
199 "\u810d\u810f\u8110\u8111\u8113\u8114\u811a\u8131"
200 "\u8136\u8138\u814c\u816d\u817b\u817c\u817d"
201 "\u817e\u8191\u81bb\u81dc\u8206\u8223\u8230\u8231"
202 "\u823b\u8270\u8273\u827a\u8282\u8288\u8297"
203 "\u829c\u82a6\u82c1\u82c7\u82c8\u82cb\u82cc\u82cd"
204 "\u82cf\u8303\u830e\u830f\u8311\u8314\u8315"
205 "\u8327\u8346\u8350\u8359\u835a\u835b\u835c\u835e"
206 "\u835f\u8360\u8361\u8363\u8364\u8365\u8366"
207 "\u8367\u8368\u8369\u836a\u836b\u836c\u836d\u836e"
208 "\u836f\u8385\u83b1\u83b2\u83b3\u83b4\u83b6"
209 "\u83b7\u83b8\u83b9\u83ba\u83bc\u841d\u8424\u8425"
210 "\u8426\u8427\u8428\u8471\u8487\u8489\u848b"
211 "\u848c\u84dd\u84df\u84e0\u84e3\u84e5\u84e6\u8502"
212 "\u8511\u8537\u8539\u853a\u853c\u8570\u8572"
213 "\u8574\u85ae\u85af\u85d3\u85e4\u8616\u864f\u8651"
214 "\u865a\u866b\u866c\u866e\u867d\u867e\u867f"
215 "\u8680\u8681\u8682\u8695\u86ac\u86ca\u86ce\u86cf"
216 "\u86ee\u86f0\u86f1\u86f2\u86f3\u86f4\u8715"
217 "\u8717\u8737\u8747\u8748\u8749\u874e\u877c\u877e"
218 "\u8780\u87a8\u87cf\u8839\u8845\u8854\u8865"
219 "\u886c\u886e\u8884\u8885\u8886\u889c\u88ad\u88af"
220 "\u88c5\u88c6\u88c8\u88e2\u88e3\u88e4\u88e5"
221 "\u891b\u8934\u89c1\u89c2\u89c3\u89c4\u89c5\u89c6"
222 "\u89c7\u89c8\u89c9\u89ca\u89cb\u89cc\u89cd"
223 "\u89ce\u89cf\u89d0\u89d1\u89de\u89e6\u89ef\u8a1a"
224 "\u8a89\u8a8a\u8ba0\u8ba1\u8ba2\u8ba3\u8ba4"
225 "\u8ba5\u8ba6\u8ba7\u8ba8\u8ba9\u8baa\u8bab\u8bad"
226 "\u8bae\u8baf\u8bb0\u8bb1\u8bb2\u8bb3\u8bb4"
227 "\u8bb5\u8bb6\u8bb7\u8bb8\u8bb9\u8bba\u8bbb\u8bbc"
228 "\u8bbd\u8bbe\u8bbf\u8bc0\u8bc1\u8bc2\u8bc3"
229 "\u8bc4\u8bc5\u8bc6\u8bc7\u8bc8\u8bc9\u8bca\u8bcb"
230 "\u8bcc\u8bcd\u8bce\u8bcf\u8bd0\u8bd1\u8bd2"
231 "\u8bd3\u8bd4\u8bd5\u8bd6\u8bd7\u8bd8\u8bd9\u8bda"
232 "\u8bdb\u8bdc\u8bdd\u8bde\u8bdf\u8be0\u8be1"
233 "\u8be2\u8be3\u8be4\u8be5\u8be6\u8be7\u8be8\u8be9"
234 "\u8bea\u8beb\u8bec\u8bed\u8bee\u8bef\u8bf0"
235 "\u8bf1\u8bf2\u8bf3\u8bf4\u8bf5\u8bf6\u8bf7\u8bf8"
236 "\u8bf9\u8bfa\u8bfb\u8bfc\u8bfd\u8bfe\u8bff"
237 "\u8c00\u8c01\u8c02\u8c03\u8c04\u8c05\u8c06\u8c07"
238 "\u8c08\u8c0a\u8c0b\u8c0c\u8c0d\u8c0e\u8c0f"
239 "\u8c10\u8c11\u8c12\u8c13\u8c14\u8c15\u8c16\u8c17"
240 "\u8c18\u8c19\u8c1a\u8c1b\u8c1c\u8c1d\u8c1e"
241 "\u8c1f\u8c20\u8c21\u8c22\u8c23\u8c24\u8c25\u8c26"
242 "\u8c27\u8c28\u8c29\u8c2a\u8c2b\u8c2c\u8c2d"
243 "\u8c2e\u8c2f\u8c30\u8c31\u8c32\u8c33\u8c34\u8c35"
244 "\u8c36\u8c6e\u8d1d\u8d1e\u8d1f\u8d20\u8d21"
245 "\u8d22\u8d23\u8d24\u8d25\u8d26\u8d27\u8d28\u8d29"
246 "\u8d2a\u8d2b\u8d2c\u8d2d\u8d2e\u8d2f\u8d30"
247 "\u8d31\u8d32\u8d33\u8d34\u8d35\u8d36\u8d37\u8d38"
248 "\u8d39\u8d3a\u8d3b\u8d3c\u8d3d\u8d3e\u8d3f"
249 "\u8d40\u8d41\u8d42\u8d43\u8d44\u8d45\u8d46\u8d47"
250 "\u8d48\u8d49\u8d4a\u8d4b\u8d4c\u8d4d\u8d4e"
251 "\u8d4f\u8d50\u8d51\u8d52\u8d53\u8d54\u8d55\u8d56"
252 "\u8d57\u8d58\u8d59\u8d5a\u8d5b\u8d5c\u8d5d"
253 "\u8d5e\u8d5f\u8d60\u8d61\u8d62\u8d63\u8d6a\u8d75"
254 "\u8d76\u8d8b\u8db1\u8db8\u8dc3\u8dc4\u8dde"
255 "\u8df5\u8df6\u8df7\u8df8\u8df9\u8dfb\u8e0a\u8e0c"
256 "\u8e2a\u8e2c\u8e2f\u8e51\u8e52\u8e70\u8e7f"
257 "\u8e8f\u8e9c\u8eaf\u8f66\u8f67\u8f68\u8f69\u8f6a"
258 "\u8f6b\u8f6c\u8f6d\u8f6e\u8f6f\u8f70\u8f71"
259 "\u8f72\u8f73\u8f74\u8f75\u8f76\u8f77\u8f78\u8f79"
260 "\u8f7a\u8f7b\u8f7c\u8f7d\u8f7e\u8f7f\u8f80"
261 "\u8f81\u8f82\u8f83\u8f84\u8f85\u8f86\u8f87\u8f88"
262 "\u8f89\u8f8a\u8f8b\u8f8c\u8f8d\u8f8e\u8f8f"
263 "\u8f90\u8f91\u8f92\u8f93\u8f94\u8f95\u8f96\u8f97"
264 "\u8f98\u8f99\u8f9a\u8fa9\u8fab\u8fb9\u8fbd"
265 "\u8fbe\u8fc1\u8fc7\u8fc8\u8fd0\u8fd8\u8fd9\u8fdb"
266 "\u8fdc\u8fdd\u8fde\u8fdf\u8fe9\u8ff3\u8ff9"
267 "\u9002\u9009\u900a\u9012\u9026\u903b\u903e\u9057"
268 "\u9065\u9093\u909d\u90ac\u90ae\u90b9\u90ba"
269 "\u90bb\u90c1\u90cf\u90d0\u90d1\u90d3\u90e6\u90e7"
270 "\u90f8\u9142\u915d\u9166\u9171\u9178\u917d"
271 "\u917e\u917f\u91ca\u91cc\u9274\u92ae\u933e\u9485"
272 "\u9486\u9487\u9488\u9489\u948a\u948b\u948c"
273 "\u948d\u948e\u948f\u9490\u9491\u9492\u9493\u9494"
274 "\u9495\u9496\u9497\u9498\u9499\u949a\u949b"
275 "\u949c\u949d\u949e\u949f\u94a0\u94a1\u94a2\u94a3"
276 "\u94a4\u94a5\u94a6\u94a7\u94a8\u94a9\u94aa"
277 "\u94ab\u94ac\u94ad\u94ae\u94af\u94b0\u94b1\u94b2"
278 "\u94b3\u94b4\u94b5\u94b6\u94b7\u94b8\u94b9"
279 "\u94ba\u94bb\u94bc\u94bd\u94be\u94bf\u94c0\u94c1"
280 "\u94c2\u94c3\u94c4\u94c5\u94c6\u94c7\u94c8"
281 "\u94c9\u94ca\u94cb\u94cc\u94cd\u94ce\u94cf\u94d0"
282 "\u94d1\u94d2\u94d3\u94d4\u94d5\u94d6\u94d7"
283 "\u94d8\u94d9\u94da\u94db\u94dc\u94dd\u94de\u94df"
284 "\u94e0\u94e1\u94e2\u94e3\u94e4\u94e5\u94e6"
285 "\u94e7\u94e8\u94e9\u94ea\u94eb\u94ec\u94ed\u94ee"
286 "\u94ef\u94f0\u94f1\u94f2\u94f3\u94f4\u94f5"
287 "\u94f6\u94f7\u94f8\u94f9\u94fa\u94fb\u94fc\u94fd"
288 "\u94fe\u94ff\u9500\u9501\u9502\u9503\u9504"
289 "\u9505\u9506\u9507\u9508\u9509\u950a\u950b\u950c"
290 "\u950d\u950e\u950f\u9510\u9511\u9512\u9513"
291 "\u9514\u9515\u9516\u9517\u9518\u9519\u951a\u951b"
292 "\u951c\u951d\u951e\u951f\u9520\u9521\u9522"
293 "\u9523\u9524\u9525\u9526\u9527\u9528\u9529\u952a"
294 "\u952b\u952c\u952d\u952e\u952f\u9530\u9531"
295 "\u9532\u9533\u9534\u9535\u9536\u9537\u9538\u9539"
296 "\u953b\u953c\u953d\u953e\u953f\u9540\u9541"
297 "\u9542\u9543\u9544\u9545\u9546\u9547\u9548\u9549"
298 "\u954a\u954b\u954c\u954d\u954e\u954f\u9550"
299 "\u9551\u9552\u9553\u9554\u9556\u9557\u9558\u9559"
300 "\u955a\u955b\u955c\u955d\u955e\u955f\u9560"
301 "\u9561\u9562\u9563\u9564\u9565\u9566\u9567\u9568"
302 "\u9569\u956a\u956b\u956c\u956d\u956e\u956f"
303 "\u9570\u9571\u9572\u9573\u9574\u9575\u9576\u957f"
304 "\u95e8\u95e9\u95ea\u95eb\u95ec\u95ed\u95ee"
305 "\u95ef\u95f0\u95f1\u95f2\u95f3\u95f4\u95f5\u95f6"
306 "\u95f7\u95f8\u95f9\u95fa\u95fb\u95fc\u95fd"
307 "\u95fe\u95ff\u9600\u9601\u9602\u9603\u9604\u9605"
308 "\u9606\u9607\u9608\u9609\u960a\u960b\u960c"
309 "\u960d\u960e\u960f\u9610\u9611\u9612\u9613\u9614"
310 "\u9615\u9616\u9617\u9618\u9619\u961a\u961b"
311 "\u961f\u9633\u9634\u9635\u9636\u9640\u9645\u9646"
312 "\u9647\u9648\u9649\u9655\u9667\u9668\u9669"
313 "\u968f\u9690\u96b6\u96bd\u96be\u96cf\u96e0\u96f3"
314 "\u96fe\u9701\u9721\u972d\u9753\u9759\u9765"
315 "\u9791\u9792\u97af\u97e6\u97e7\u97e8\u97e9\u97ea"
316 "\u97eb\u97ec\u97f5\u9875\u9876\u9877\u9878"
317 "\u9879\u987a\u987b\u987c\u987d\u987e\u987f\u9880"
318 "\u9881\u9882\u9883\u9884\u9885\u9886\u9887"
319 "\u9888\u9889\u988a\u988b\u988c\u988d\u988e\u988f"
320 "\u9890\u9891\u9892\u9893\u9894\u9895\u9896"
321 "\u9897\u9898\u9899\u989a\u989b\u989c\u989d\u989e"
322 "\u989f\u98a0\u98a1\u98a2\u98a4\u98a5\u98a6"
323 "\u98a7\u98ce\u98cf\u98d0\u98d1\u98d2\u98d3\u98d4"
324 "\u98d5\u98d6\u98d7\u98d8\u98d9\u98da\u98de"
325 "\u98e8\u990d\u9963\u9964\u9965\u9966\u9967\u9968"
326 "\u9969\u996a\u996b\u996c\u996d\u996e\u996f"
327 "\u9970\u9971\u9972\u9973\u9974\u9975\u9976\u9977"
328 "\u9978\u9979\u997a\u997b\u997c\u997d\u997e"
329 "\u997f\u9981\u9982\u9983\u9984\u9985\u9986\u9987"
330 "\u9988\u9989\u998a\u998b\u998c\u998d\u998e"
331 "\u998f\u9990\u9991\u9992\u9993\u9994\u9995\u9a6c"
332 "\u9a6d\u9a6e\u9a6f\u9a70\u9a71\u9a72\u9a73"
333 "\u9a74\u9a75\u9a76\u9a77\u9a78\u9a79\u9a7a\u9a7b"
334 "\u9a7c\u9a7d\u9a7e\u9a7f\u9a80\u9a81\u9a82"
335 "\u9a83\u9a84\u9a85\u9a86\u9a87\u9a88\u9a89\u9a8a"
336 "\u9a8b\u9a8c\u9a8d\u9a8e\u9a8f\u9a90\u9a91"
337 "\u9a92\u9a93\u9a94\u9a95\u9a96\u9a97\u9a98\u9a99"
338 "\u9a9a\u9a9b\u9a9c\u9a9d\u9a9e\u9a9f\u9aa0"
339 "\u9aa1\u9aa2\u9aa3\u9aa4\u9aa5\u9aa6\u9aa7\u9ac5"
340 "\u9acb\u9acc\u9b13\u9b47\u9b49\u9c7c\u9c7d"
341 "\u9c7e\u9c7f\u9c80\u9c81\u9c82\u9c83\u9c84\u9c85"
342 "\u9c86\u9c87\u9c88\u9c89\u9c8a\u9c8b\u9c8c"
343 "\u9c8d\u9c8e\u9c8f\u9c90\u9c91\u9c92\u9c93\u9c94"
344 "\u9c95\u9c96\u9c97\u9c98\u9c99\u9c9a\u9c9b"
345 "\u9c9c\u9c9d\u9c9e\u9c9f\u9ca0\u9ca1\u9ca2\u9ca3"
346 "\u9ca4\u9ca5\u9ca6\u9ca7\u9ca8\u9ca9\u9caa"
347 "\u9cab\u9cac\u9cad\u9cae\u9caf\u9cb0\u9cb1\u9cb2"
348 "\u9cb3\u9cb4\u9cb5\u9cb6\u9cb7\u9cb8\u9cb9"
349 "\u9cba\u9cbb\u9cbc\u9cbd\u9cbe\u9cbf\u9cc0\u9cc1"
350 "\u9cc2\u9cc3\u9cc4\u9cc5\u9cc6\u9cc7\u9cc8"
351 "\u9cc9\u9cca\u9ccb\u9ccc\u9ccd\u9cce\u9ccf\u9cd0"
352 "\u9cd1\u9cd2\u9cd3\u9cd4\u9cd5\u9cd6\u9cd7"
353 "\u9cd8\u9cd9\u9cda\u9cdb\u9cdc\u9cdd\u9cde\u9cdf"
354 "\u9ce0\u9ce1\u9ce2\u9ce3\u9e1f\u9e20\u9e21"
355 "\u9e22\u9e23\u9e24\u9e25\u9e26\u9e27\u9e28\u9e29"
356 "\u9e2a\u9e2b\u9e2c\u9e2d\u9e2e\u9e2f\u9e30"
357 "\u9e31\u9e32\u9e33\u9e34\u9e35\u9e36\u9e37\u9e38"
358 "\u9e39\u9e3a\u9e3b\u9e3c\u9e3d\u9e3e\u9e3f"
359 "\u9e40\u9e41\u9e42\u9e43\u9e44\u9e45\u9e46\u9e47"
360 "\u9e48\u9e49\u9e4a\u9e4b\u9e4c\u9e4d\u9e4e"
361 "\u9e4f\u9e50\u9e51\u9e52\u9e53\u9e54\u9e55\u9e56"
362 "\u9e57\u9e58\u9e59\u9e5a\u9e5b\u9e5c\u9e5d"
363 "\u9e5e\u9e5f\u9e60\u9e61\u9e62\u9e63\u9e64\u9e65"
364 "\u9e66\u9e67\u9e68\u9e69\u9e6a\u9e6b\u9e6c"
365 "\u9e6d\u9e6f\u9e70\u9e71\u9e72\u9e73\u9e74\u9e7e"
366 "\u9ea6\u9eb8\u9ebb\u9ec4\u9ec9\u9ee1\u9ee9"
367 "\u9eea\u9efe\u9f0b\u9f0d\u9f17\u9f39\u9f50\u9f51"
368 "\u9f7f\u9f80\u9f81\u9f82\u9f83\u9f84\u9f85"
369 "\u9f86\u9f87\u9f88\u9f89\u9f8a\u9f8b\u9f8c\u9f99"
370 "\u9f9a\u9f9b\u9f9f]";
371 const std::string& hant_codepoints =
372 "[\u003b\u4e1f\u4e26\u4e82\u4e99\u4e9e\u4efd\u4f47"
373 "\u4f48\u4f54\u4f57\u4f75\u4f86\u4f96\u4fb6"
374 "\u4fb7\u4fc1\u4fc2\u4fd4\u4fe0\u4fec\u4ff1\u5000"
375 "\u5006\u5008\u5009\u500b\u5011\u5016\u5021"
376 "\u5023\u502b\u5049\u5074\u5075\u507d\u5091\u5096"
377 "\u5098\u5099\u50a2\u50ad\u50af\u50b3\u50b4"
378 "\u50b5\u50b7\u50be\u50c2\u50c5\u50c7\u50c9\u50cf"
379 "\u50d1\u50d5\u50de\u50e5\u50e8\u50f1\u50f9"
380 "\u5100\u5102\u5104\u5108\u5109\u5110\u5114\u5115"
381 "\u5118\u511f\u512a\u5132\u5137\u5138\u513a"
382 "\u513b\u513c\u5147\u514c\u5152\u5157\u5169\u518a"
383 "\u51aa\u51c8\u51cd\u51dc\u51f1\u5225\u522a"
384 "\u5244\u5247\u524b\u524e\u5257\u525b\u525d\u526e"
385 "\u5274\u5275\u5277\u5283\u5287\u5289\u528a"
386 "\u528c\u528d\u528f\u5291\u529a\u52c1\u52d5\u52d7"
387 "\u52d9\u52db\u52dd\u52de\u52e2\u52e9\u52f1"
388 "\u52f3\u52f5\u52f8\u52fb\u532d\u532f\u5331\u5340"
389 "\u5354\u5379\u537b\u5399\u53a0\u53ad\u53b2"
390 "\u53b4\u53c3\u53c4\u53e2\u540b\u5412\u5422\u5433"
391 "\u5436\u5442\u54b7\u54bc\u54e1\u5504\u551a"
392 "\u5538\u554f\u5553\u555e\u555f\u5562\u558e\u559a"
393 "\u55a8\u55aa\u55ab\u55ac\u55ae\u55b2\u55c6"
394 "\u55c7\u55ca\u55ce\u55da\u55e9\u55f6\u5606\u560d"
395 "\u5614\u5616\u5617\u561c\u5629\u562e\u562f"
396 "\u5630\u5635\u5638\u563d\u5653\u565a\u565d\u5660"
397 "\u5665\u5666\u566f\u5672\u5674\u5678\u5679"
398 "\u5680\u5687\u568c\u5690\u5695\u5699\u56a5\u56a6"
399 "\u56a8\u56ae\u56b2\u56b3\u56b4\u56b6\u56c0"
400 "\u56c1\u56c2\u56c5\u56c8\u56c9\u56cd\u56d1\u56d3"
401 "\u56ea\u5707\u570b\u570d\u5712\u5713\u5716"
402 "\u5718\u57b5\u57e1\u57f0\u57f7\u5805\u580a\u5816"
403 "\u581d\u582f\u5831\u5834\u584a\u584b\u584f"
404 "\u5852\u5857\u585a\u5862\u5864\u5875\u5879\u588a"
405 "\u589c\u58ae\u58b3\u58bb\u58be\u58c7\u58cb"
406 "\u58ce\u58d3\u58d8\u58d9\u58da\u58dc\u58de\u58df"
407 "\u58e0\u58e2\u58e9\u58ef\u58fa\u58fc\u58fd"
408 "\u5920\u5922\u5925\u593e\u5950\u5967\u5969\u596a"
409 "\u596c\u596e\u597c\u599d\u59ca\u59cd\u59e6"
410 "\u59ea\u5a1b\u5a41\u5a66\u5a6d\u5aa7\u5aaf\u5abc"
411 "\u5abd\u5acb\u5ad7\u5af5\u5afb\u5aff\u5b00"
412 "\u5b08\u5b0b\u5b0c\u5b19\u5b1d\u5b21\u5b24\u5b2a"
413 "\u5b30\u5b38\u5b43\u5b4c\u5b6b\u5b78\u5b7f"
414 "\u5bae\u5bdf\u5be2\u5be6\u5be7\u5be9\u5beb\u5bec"
415 "\u5bf5\u5bf6\u5c05\u5c07\u5c08\u5c0b\u5c0d"
416 "\u5c0e\u5c37\u5c46\u5c4d\u5c53\u5c5c\u5c62\u5c64"
417 "\u5c68\u5c6c\u5ca1\u5cf4\u5cf6\u5cfd\u5d0d"
418 "\u5d11\u5d17\u5d19\u5d22\u5d2c\u5d50\u5d81\u5d84"
419 "\u5d87\u5d94\u5d97\u5da0\u5da2\u5da7\u5dae"
420 "\u5db4\u5db8\u5dba\u5dbc\u5dbd\u5dcb\u5dd2\u5dd4"
421 "\u5dd6\u5df0\u5e25\u5e2b\u5e33\u5e36\u5e40"
422 "\u5e43\u5e57\u5e58\u5e5f\u5e63\u5e6b\u5e6c\u5e79"
423 "\u5e7e\u5eab\u5ec1\u5ec2\u5ec4\u5ec8\u5eda"
424 "\u5edd\u5edf\u5ee0\u5ee1\u5ee2\u5ee3\u5ee9\u5eec"
425 "\u5ef3\u5efb\u5f12\u5f14\u5f33\u5f35\u5f37"
426 "\u5f46\u5f48\u5f4c\u5f4e\u5f59\u5f5e\u5f65\u5f7f"
427 "\u5f8c\u5f91\u5f9e\u5fa0\u5fa9\u5fac\u5fb9"
428 "\u6035\u6046\u6065\u6085\u609e\u60b3\u60b5\u60b6"
429 "\u60bd\u60e1\u60f1\u60f2\u60fb\u611b\u611c"
430 "\u6128\u6134\u6137\u613e\u6144\u6147\u614b\u614d"
431 "\u6158\u615a\u615f\u6163\u6164\u616a\u616b"
432 "\u616e\u6173\u6176\u617c\u617e\u6182\u618a\u6190"
433 "\u6191\u6192\u619a\u61a4\u61ab\u61ae\u61b2"
434 "\u61b6\u61c3\u61c7\u61c9\u61cc\u61cd\u61de\u61df"
435 "\u61e3\u61e8\u61ee\u61f2\u61f6\u61f7\u61f8"
436 "\u61fa\u61fc\u61fe\u6200\u6207\u6214\u6227\u6229"
437 "\u6230\u6231\u6232\u6236\u62cb\u6329\u633e"
438 "\u6368\u636b\u6372\u6383\u6384\u6397\u6399\u639b"
439 "\u63a1\u63c0\u63da\u63db\u63ee\u6406\u640d"
440 "\u6416\u6417\u6425\u6427\u6428\u6435\u6436\u643e"
441 "\u6440\u6451\u645c\u645f\u646f\u6473\u6476"
442 "\u647a\u647b\u6488\u648f\u6490\u6493\u649a\u649d"
443 "\u649f\u64a2\u64a3\u64a5\u64ab\u64b2\u64b3"
444 "\u64bb\u64be\u64bf\u64c1\u64c4\u64c7\u64ca\u64cb"
445 "\u64d3\u64d4\u64da\u64e0\u64e3\u64ec\u64ef"
446 "\u64f0\u64f1\u64f2\u64f4\u64f7\u64fa\u64fb\u64fc"
447 "\u64fe\u6504\u6506\u650f\u6514\u6516\u6519"
448 "\u651b\u651c\u651d\u6522\u6523\u6524\u652a\u652c"
449 "\u6557\u6558\u6575\u6578\u6582\u6583\u6595"
450 "\u65ac\u65b7\u65bc\u6607\u6642\u6649\u665d\u6688"
451 "\u6689\u6698\u66a2\u66ab\u66b1\u66c4\u66c6"
452 "\u66c7\u66c9\u66cf\u66d6\u66e0\u66e8\u66ec\u66f8"
453 "\u6703\u6727\u6756\u6771\u6792\u67f5\u687f"
454 "\u6894\u6898\u689d\u689f\u68b2\u68c4\u68d6\u68d7"
455 "\u68df\u68e7\u68f2\u68f6\u690f\u694a\u6953"
456 "\u6968\u696d\u6975\u6996\u69aa\u69ae\u69b2\u69bf"
457 "\u69cb\u69cd\u69d3\u69d6\u69e4\u69e7\u69e8"
458 "\u69f3\u6a01\u6a02\u6a05\u6a11\u6a13\u6a19\u6a1e"
459 "\u6a23\u6a38\u6a39\u6a3a\u6a48\u6a4b\u6a5f"
460 "\u6a62\u6a6b\u6a81\u6a89\u6a94\u6a9c\u6a9d\u6a9f"
461 "\u6aa2\u6aa3\u6aae\u6aaf\u6ab3\u6ab8\u6abb"
462 "\u6ad3\u6ada\u6adb\u6add\u6ade\u6adf\u6ae5\u6ae7"
463 "\u6ae8\u6aea\u6aeb\u6aec\u6af1\u6af3\u6af8"
464 "\u6afa\u6afb\u6b04\u6b0a\u6b0f\u6b12\u6b16\u6b1e"
465 "\u6b35\u6b3d\u6b4e\u6b50\u6b5b\u6b5f\u6b61"
466 "\u6b72\u6b77\u6b78\u6b7f\u6b98\u6b9e\u6ba4\u6ba8"
467 "\u6bab\u6bae\u6baf\u6bb0\u6bb2\u6bba\u6bbb"
468 "\u6bbc\u6bc0\u6bc6\u6bec\u6bff\u6c02\u6c08\u6c0c"
469 "\u6c23\u6c2b\u6c2c\u6c33\u6c39\u6c3e\u6c4e"
470 "\u6c59\u6c7a\u6c8d\u6c92\u6c96\u6cc1\u6d29\u6d36"
471 "\u6d79\u6d87\u6dbc\u6dd2\u6dda\u6de5\u6dea"
472 "\u6df5\u6df6\u6dfa\u6e19\u6e1b\u6e26\u6e2c\u6e3e"
473 "\u6e4a\u6e5e\u6e67\u6e6f\u6e88\u6e96\u6e9d"
474 "\u6eab\u6ebc\u6ec4\u6ec5\u6ecc\u6ece\u6eec\u6eef"
475 "\u6ef2\u6ef7\u6ef8\u6efb\u6efe\u6eff\u6f01"
476 "\u6f1a\u6f22\u6f23\u6f2c\u6f32\u6f35\u6f38\u6f3f"
477 "\u6f41\u6f51\u6f54\u6f59\u6f5b\u6f64\u6f6f"
478 "\u6f70\u6f77\u6f7f\u6f80\u6f86\u6f87\u6f97\u6fa0"
479 "\u6fa4\u6fa6\u6fa9\u6fae\u6fb1\u6fc1\u6fc3"
480 "\u6fd5\u6fd8\u6fdf\u6fe4\u6feb\u6fec\u6ff0\u6ff1"
481 "\u6ffa\u6ffc\u6ffe\u7005\u7006\u7007\u7009"
482 "\u700b\u700f\u7015\u7018\u701d\u701f\u7020\u7026"
483 "\u7027\u7028\u7030\u7032\u703e\u7043\u7044"
484 "\u7051\u7055\u7058\u705d\u7060\u7063\u7064\u7067"
485 "\u707d\u70ba\u70cf\u70f4\u7120\u7121\u7149"
486 "\u7152\u7159\u7162\u7165\u7169\u716c\u7171\u7185"
487 "\u7192\u7197\u71b1\u71b2\u71be\u71c1\u71c4"
488 "\u71c8\u71c9\u71d0\u71d2\u71d9\u71dc\u71df\u71e6"
489 "\u71ec\u71ed\u71f4\u71f6\u71fb\u71fc\u71fe"
490 "\u71ff\u720d\u7210\u721b\u722d\u7232\u723a\u723e"
491 "\u7240\u7246\u724b\u7258\u727d\u7296\u72a2"
492 "\u72a7\u72c0\u72f9\u72fd\u7319\u7336\u733b\u7341"
493 "\u7343\u7344\u7345\u734e\u7368\u736a\u736b"
494 "\u736e\u7370\u7371\u7372\u7375\u7377\u7378\u737a"
495 "\u737b\u737c\u7380\u73fe\u743a\u743f\u744b"
496 "\u7452\u7463\u7464\u7469\u746a\u746f\u7472\u7489"
497 "\u74a3\u74a6\u74ab\u74b0\u74bd\u74ca\u74cf"
498 "\u74d4\u74da\u750c\u7515\u751a\u7522\u7523\u755d"
499 "\u7562\u756b\u7570\u7576\u7587\u758a\u75c0"
500 "\u75d9\u75e0\u75fe\u7602\u760b\u760d\u7613\u761e"
501 "\u7621\u7627\u762e\u7632\u763a\u763b\u7642"
502 "\u7646\u7647\u7649\u7652\u7658\u765f\u7661\u7662"
503 "\u7664\u7665\u7667\u7669\u766c\u766d\u766e"
504 "\u7670\u7671\u7672\u767c\u7681\u769a\u76b0\u76b8"
505 "\u76ba\u76c3\u76dc\u76de\u76e1\u76e3\u76e4"
506 "\u76e7\u76ea\u7725\u773e\u774f\u775c\u775e\u776a"
507 "\u7787\u7798\u779c\u779e\u77ad\u77b6\u77bc"
508 "\u77d3\u77da\u77ef\u7802\u7832\u7843\u785c\u7864"
509 "\u7868\u786f\u78a9\u78ad\u78b3\u78b8\u78ba"
510 "\u78bc\u78d1\u78da\u78e3\u78e7\u78ef\u78fd\u7906"
511 "\u790e\u7919\u7921\u7926\u792a\u792b\u792c"
512 "\u792e\u7931\u7955\u797f\u798d\u798e\u7995\u79a1"
513 "\u79a6\u79aa\u79ae\u79b0\u79b1\u79bf\u79c8"
514 "\u7a05\u7a08\u7a0f\u7a1c\u7a1f\u7a2e\u7a31\u7a40"
515 "\u7a4c\u7a4d\u7a4e\u7a60\u7a61\u7a62\u7a69"
516 "\u7a6b\u7a6d\u7aa9\u7aaa\u7aae\u7aaf\u7ab5\u7ab6"
517 "\u7aba\u7ac4\u7ac5\u7ac7\u7ac8\u7aca\u7aea"
518 "\u7af6\u7b46\u7b4d\u7b67\u7b74\u7b87\u7b8b\u7b8e"
519 "\u7b8f\u7b9d\u7bc0\u7bc4\u7bc9\u7bcb\u7bd4"
520 "\u7be4\u7be9\u7bf3\u7c00\u7c06\u7c0d\u7c1e\u7c21"
521 "\u7c23\u7c2b\u7c37\u7c39\u7c3d\u7c3e\u7c43"
522 "\u7c4c\u7c50\u7c59\u7c5c\u7c5f\u7c60\u7c64\u7c69"
523 "\u7c6a\u7c6c\u7c6e\u7c72\u7ca7\u7cb5\u7cdd"
524 "\u7cde\u7ce7\u7cf0\u7cf2\u7cf4\u7cf6\u7cf9\u7cfe"
525 "\u7d00\u7d02\u7d04\u7d05\u7d06\u7d07\u7d08"
526 "\u7d09\u7d0b\u7d0d\u7d10\u7d13\u7d14\u7d15\u7d16"
527 "\u7d17\u7d18\u7d19\u7d1a\u7d1b\u7d1c\u7d1d"
528 "\u7d21\u7d2c\u7d2e\u7d30\u7d31\u7d32\u7d33\u7d35"
529 "\u7d39\u7d3a\u7d3c\u7d3f\u7d40\u7d42\u7d43"
530 "\u7d44\u7d45\u7d46\u7d4e\u7d50\u7d55\u7d5b\u7d5d"
531 "\u7d5e\u7d61\u7d62\u7d66\u7d68\u7d70\u7d71"
532 "\u7d72\u7d73\u7d76\u7d79\u7d81\u7d83\u7d86\u7d88"
533 "\u7d89\u7d8c\u7d8f\u7d90\u7d91\u7d93\u7d9c"
534 "\u7d9e\u7da0\u7da2\u7da3\u7dab\u7dac\u7dad\u7daf"
535 "\u7db0\u7db1\u7db2\u7db3\u7db4\u7db5\u7db8"
536 "\u7db9\u7dba\u7dbb\u7dbd\u7dbe\u7dbf\u7dc4\u7dc7"
537 "\u7dca\u7dcb\u7dd1\u7dd2\u7dd3\u7dd4\u7dd7"
538 "\u7dd8\u7dd9\u7dda\u7ddd\u7dde\u7de0\u7de1\u7de3"
539 "\u7de6\u7de8\u7de9\u7dec\u7def\u7df1\u7df2"
540 "\u7df4\u7df6\u7df9\u7dfb\u7e08\u7e09\u7e0a\u7e0b"
541 "\u7e10\u7e11\u7e15\u7e17\u7e1b\u7e1d\u7e1e"
542 "\u7e1f\u7e23\u7e27\u7e2b\u7e2d\u7e2e\u7e31\u7e32"
543 "\u7e33\u7e34\u7e35\u7e36\u7e37\u7e39\u7e3d"
544 "\u7e3e\u7e43\u7e45\u7e46\u7e47\u7e52\u7e54\u7e55"
545 "\u7e5a\u7e5e\u7e61\u7e62\u7e69\u7e6a\u7e6b"
546 "\u7e6d\u7e6e\u7e6f\u7e70\u7e73\u7e78\u7e79\u7e7c"
547 "\u7e7d\u7e7e\u7e7f\u7e88\u7e8a\u7e8c\u7e8d"
548 "\u7e8f\u7e93\u7e94\u7e98\u7e9c\u7f3d\u7f48\u7f4c"
549 "\u7f4e\u7f63\u7f70\u7f75\u7f77\u7f85\u7f86"
550 "\u7f88\u7f8b\u7fa3\u7fa5\u7fa8\u7fa9\u7fb6\u7fd2"
551 "\u7feb\u7ff9\u7ffa\u802c\u802e\u8056\u805e"
552 "\u806f\u8070\u8072\u8073\u8075\u8076\u8077\u8079"
553 "\u807d\u807e\u8085\u8105\u8108\u811b\u8123"
554 "\u812b\u8139\u814e\u8156\u8161\u8166\u816b\u8173"
555 "\u8178\u8183\u819a\u81a0\u81a9\u81bd\u81be"
556 "\u81bf\u81c9\u81cd\u81cf\u81d8\u81da\u81df\u81e0"
557 "\u81e2\u81e5\u81e8\u81fa\u8207\u8208\u8209"
558 "\u820a\u8216\u8259\u8264\u8266\u826b\u8271\u8277"
559 "\u82bb\u8332\u834a\u8373\u838a\u8396\u83a2"
560 "\u83a7\u83d3\u83ef\u83f8\u8407\u840a\u842c\u8435"
561 "\u8449\u8452\u8464\u8466\u846f\u8477\u8490"
562 "\u8493\u8494\u849e\u84bc\u84c0\u84c6\u84cb\u84ee"
563 "\u84ef\u84fd\u8514\u851e\u8523\u8525\u8526"
564 "\u852d\u8534\u8541\u8546\u854e\u8552\u8553\u8555"
565 "\u8558\u8562\u8569\u856a\u856d\u8577\u8580"
566 "\u8588\u858a\u858c\u8591\u8594\u8598\u859f\u85a6"
567 "\u85a9\u85b0\u85b3\u85b4\u85ba\u85cd\u85ce"
568 "\u85dd\u85e5\u85ea\u85f4\u85f6\u85f7\u85f9\u85fa"
569 "\u8604\u8606\u8607\u860a\u860b\u861a\u861e"
570 "\u8622\u862d\u863a\u863f\u8646\u8655\u865b\u865c"
571 "\u865f\u8667\u866f\u86fa\u86fb\u8706\u8755"
572 "\u875f\u8766\u8768\u8778\u8784\u879e\u87a2\u87ae"
573 "\u87bb\u87bf\u87c4\u87c8\u87ce\u87e3\u87ec"
574 "\u87ef\u87f2\u87f6\u87fb\u8805\u8806\u880d\u8810"
575 "\u8811\u8814\u881f\u8823\u8827\u8828\u8831"
576 "\u8836\u883b\u8846\u884a\u8853\u8855\u885a\u885b"
577 "\u885d\u889e\u88b4\u88ca\u88cf\u88dc\u88dd"
578 "\u88e1\u88fd\u8907\u890c\u8918\u8932\u8933\u8938"
579 "\u893b\u8947\u894f\u8956\u895d\u8960\u8964"
580 "\u896a\u896c\u896f\u8972\u8988\u898b\u898e\u898f"
581 "\u8993\u8996\u8998\u89a1\u89a5\u89a6\u89aa"
582 "\u89ac\u89af\u89b2\u89b7\u89ba\u89bd\u89bf\u89c0"
583 "\u89f4\u89f6\u89f8\u8a01\u8a02\u8a03\u8a08"
584 "\u8a0a\u8a0c\u8a0e\u8a10\u8a12\u8a13\u8a15\u8a16"
585 "\u8a17\u8a18\u8a1b\u8a1d\u8a1f\u8a22\u8a23"
586 "\u8a25\u8a29\u8a2a\u8a2d\u8a31\u8a34\u8a36\u8a3a"
587 "\u8a3b\u8a3c\u8a41\u8a46\u8a4e\u8a50\u8a52"
588 "\u8a54\u8a55\u8a56\u8a57\u8a58\u8a5b\u8a5e\u8a60"
589 "\u8a61\u8a62\u8a63\u8a66\u8a69\u8a6b\u8a6c"
590 "\u8a6d\u8a6e\u8a70\u8a71\u8a72\u8a73\u8a75\u8a7c"
591 "\u8a7f\u8a84\u8a85\u8a86\u8a87\u8a8c\u8a8d"
592 "\u8a91\u8a92\u8a95\u8a98\u8a9a\u8a9e\u8aa0\u8aa1"
593 "\u8aa3\u8aa4\u8aa5\u8aa6\u8aa8\u8aaa\u8aac"
594 "\u8ab0\u8ab2\u8ab6\u8ab9\u8abc\u8abe\u8abf\u8ac2"
595 "\u8ac4\u8ac7\u8ac9\u8acb\u8acd\u8acf\u8ad1"
596 "\u8ad2\u8ad6\u8ad7\u8adb\u8adc\u8add\u8ade\u8ae1"
597 "\u8ae2\u8ae4\u8ae6\u8ae7\u8aeb\u8aed\u8aee"
598 "\u8af1\u8af3\u8af6\u8af7\u8af8\u8afa\u8afc\u8afe"
599 "\u8b00\u8b01\u8b02\u8b04\u8b05\u8b0a\u8b0e"
600 "\u8b10\u8b14\u8b16\u8b17\u8b19\u8b1a\u8b1b\u8b1d"
601 "\u8b20\u8b21\u8b28\u8b2b\u8b2c\u8b2d\u8b33"
602 "\u8b39\u8b3e\u8b41\u8b45\u8b49\u8b4e\u8b4f\u8b56"
603 "\u8b58\u8b59\u8b5a\u8b5c\u8b5f\u8b6b\u8b6f"
604 "\u8b70\u8b74\u8b77\u8b78\u8b7d\u8b7e\u8b80\u8b8a"
605 "\u8b8c\u8b8e\u8b92\u8b93\u8b95\u8b96\u8b9a"
606 "\u8b9c\u8b9e\u8c48\u8c4e\u8c50\u8c54\u8c6c\u8c76"
607 "\u8c8d\u8c93\u8c99\u8c9d\u8c9e\u8c9f\u8ca0"
608 "\u8ca1\u8ca2\u8ca7\u8ca8\u8ca9\u8caa\u8cab\u8cac"
609 "\u8caf\u8cb0\u8cb2\u8cb3\u8cb4\u8cb6\u8cb7"
610 "\u8cb8\u8cba\u8cbb\u8cbc\u8cbd\u8cbf\u8cc0\u8cc1"
611 "\u8cc2\u8cc3\u8cc4\u8cc5\u8cc7\u8cc8\u8cca"
612 "\u8cd1\u8cd2\u8cd3\u8cd5\u8cd9\u8cda\u8cdc\u8cde"
613 "\u8ce0\u8ce1\u8ce2\u8ce3\u8ce4\u8ce6\u8ce7"
614 "\u8cea\u8ceb\u8cec\u8ced\u8cf0\u8cf4\u8cf5\u8cf8"
615 "\u8cfa\u8cfb\u8cfc\u8cfd\u8cfe\u8d04\u8d05"
616 "\u8d07\u8d08\u8d0a\u8d0b\u8d0d\u8d0f\u8d10\u8d13"
617 "\u8d14\u8d16\u8d17\u8d1b\u8d1c\u8d6c\u8d95"
618 "\u8d99\u8da8\u8db2\u8de1\u8de4\u8dfc\u8e10\u8e21"
619 "\u8e30\u8e34\u8e4c\u8e55\u8e5f\u8e63\u8e64"
620 "\u8e67\u8e7a\u8e82\u8e89\u8e8a\u8e8b\u8e8d\u8e91"
621 "\u8e92\u8e93\u8e95\u8e9a\u8ea1\u8ea5\u8ea6"
622 "\u8eaa\u8ec0\u8eca\u8ecb\u8ecc\u8ecd\u8ed1\u8ed2"
623 "\u8ed4\u8edb\u8edf\u8ee4\u8eeb\u8ef2\u8ef8"
624 "\u8ef9\u8efa\u8efb\u8efc\u8efe\u8f03\u8f05\u8f07"
625 "\u8f08\u8f09\u8f0a\u8f12\u8f13\u8f14\u8f15"
626 "\u8f1b\u8f1c\u8f1d\u8f1e\u8f1f\u8f25\u8f26\u8f29"
627 "\u8f2a\u8f2c\u8f2f\u8f33\u8f38\u8f3b\u8f3e"
628 "\u8f3f\u8f40\u8f42\u8f44\u8f45\u8f46\u8f49\u8f4d"
629 "\u8f4e\u8f54\u8f5d\u8f5f\u8f61\u8f62\u8f64"
630 "\u8fa6\u8fad\u8fae\u8faf\u8fb2\u8ff4\u9015\u9019"
631 "\u9023\u9031\u9032\u904a\u904b\u904e\u9054"
632 "\u9055\u9059\u905c\u905e\u9060\u9069\u906f\u9072"
633 "\u9077\u9078\u907a\u907c\u9081\u9084\u9087"
634 "\u908a\u908f\u9090\u90df\u90f5\u9106\u9109\u9112"
635 "\u9114\u9116\u9127\u912d\u9130\u9132\u9134"
636 "\u9136\u913a\u9147\u9148\u9165\u9183\u9196\u919c"
637 "\u919e\u91ab\u91ac\u91b1\u91bc\u91c0\u91c1"
638 "\u91c3\u91c5\u91cb\u91d0\u91d2\u91d3\u91d4\u91d5"
639 "\u91d7\u91d8\u91d9\u91dd\u91e3\u91e4\u91e6"
640 "\u91e7\u91e9\u91f5\u91f7\u91f9\u91fa\u9200\u9201"
641 "\u9203\u9204\u9208\u9209\u920d\u920e\u9210"
642 "\u9211\u9212\u9214\u9215\u921e\u9223\u9225\u9226"
643 "\u9227\u922e\u9230\u9233\u9234\u9237\u9238"
644 "\u9239\u923a\u923d\u923e\u923f\u9240\u9245\u9248"
645 "\u9249\u924b\u924d\u9251\u9255\u9257\u925a"
646 "\u925b\u925e\u9262\u9264\u9266\u926c\u926d\u9276"
647 "\u9278\u927a\u927b\u927f\u9283\u9285\u928d"
648 "\u9291\u9293\u9296\u9298\u929a\u929b\u929c\u92a0"
649 "\u92a3\u92a5\u92a6\u92a8\u92a9\u92aa\u92ab"
650 "\u92ac\u92b1\u92b2\u92b3\u92b7\u92b9\u92bb\u92bc"
651 "\u92c1\u92c3\u92c5\u92c7\u92cc\u92cf\u92d2"
652 "\u92d9\u92dd\u92df\u92e3\u92e4\u92e5\u92e6\u92e8"
653 "\u92e9\u92ea\u92ed\u92ee\u92ef\u92f0\u92f1"
654 "\u92f6\u92f8\u92fc\u9301\u9304\u9306\u9307\u9308"
655 "\u930f\u9310\u9312\u9315\u9318\u9319\u931a"
656 "\u931b\u931f\u9320\u9321\u9322\u9326\u9328\u9329"
657 "\u932b\u932e\u932f\u9332\u9333\u9336\u9338"
658 "\u9340\u9341\u9343\u9346\u9347\u9348\u934a\u934b"
659 "\u934d\u9354\u9358\u935a\u935b\u9360\u9364"
660 "\u9365\u9369\u936c\u9370\u9375\u9376\u937a\u937e"
661 "\u9382\u9384\u9387\u938a\u9394\u9396\u9397"
662 "\u9398\u939a\u939b\u93a1\u93a2\u93a3\u93a6\u93a7"
663 "\u93a9\u93aa\u93ac\u93ae\u93b0\u93b2\u93b3"
664 "\u93b5\u93b8\u93bf\u93c3\u93c7\u93c8\u93cc\u93cd"
665 "\u93d0\u93d1\u93d7\u93d8\u93dc\u93dd\u93de"
666 "\u93df\u93e1\u93e2\u93e4\u93e8\u93f0\u93f5\u93f7"
667 "\u93f9\u93fd\u9403\u940b\u9410\u9412\u9413"
668 "\u9414\u9418\u9419\u941d\u9420\u9426\u9427\u9428"
669 "\u942b\u942e\u9432\u9433\u9435\u9436\u9438"
670 "\u943a\u943f\u9444\u944a\u944c\u9451\u9452\u9454"
671 "\u9455\u945e\u9460\u9463\u9465\u946d\u9470"
672 "\u9471\u9472\u9477\u9479\u947c\u947d\u947e\u947f"
673 "\u9481\u9577\u9580\u9582\u9583\u9586\u9588"
674 "\u9589\u958b\u958c\u958e\u958f\u9591\u9592\u9593"
675 "\u9594\u9598\u95a1\u95a2\u95a3\u95a5\u95a7"
676 "\u95a8\u95a9\u95ab\u95ac\u95ad\u95b1\u95b2\u95b6"
677 "\u95b9\u95bb\u95bc\u95bd\u95be\u95bf\u95c3"
678 "\u95c6\u95c7\u95c8\u95ca\u95cb\u95cc\u95cd\u95d0"
679 "\u95d2\u95d3\u95d4\u95d5\u95d6\u95d8\u95dc"
680 "\u95de\u95e0\u95e1\u95e2\u95e4\u95e5\u9628\u962a"
681 "\u9658\u965d\u965e\u9663\u9670\u9673\u9678"
682 "\u967d\u9684\u9689\u968a\u968e\u9695\u969b\u96a8"
683 "\u96aa\u96b1\u96b4\u96b8\u96bb\u96cb\u96d6"
684 "\u96d9\u96db\u96dc\u96de\u96e2\u96e3\u96f2\u96fb"
685 "\u9711\u9722\u9727\u973d\u9742\u9744\u9748"
686 "\u975a\u975c\u9766\u9768\u9777\u9780\u978f\u979d"
687 "\u97a6\u97bd\u97c1\u97c3\u97c6\u97c9\u97cb"
688 "\u97cc\u97cd\u97d3\u97d9\u97dc\u97de\u97ee\u97fb"
689 "\u97ff\u9801\u9802\u9803\u9805\u9806\u9807"
690 "\u9808\u980a\u980c\u980e\u980f\u9810\u9811\u9812"
691 "\u9813\u9817\u9818\u981c\u9821\u9824\u9826"
692 "\u982d\u982e\u9830\u9832\u9834\u9837\u9838\u9839"
693 "\u983b\u983d\u9846\u984c\u984d\u984e\u984f"
694 "\u9852\u9853\u9854\u9858\u9859\u985b\u985e\u9862"
695 "\u9865\u9867\u986b\u986c\u986f\u9870\u9871"
696 "\u9873\u9874\u98a8\u98ad\u98ae\u98af\u98b1\u98b3"
697 "\u98b6\u98b8\u98ba\u98bb\u98bc\u98c0\u98c4"
698 "\u98c6\u98c8\u98db\u98e0\u98e2\u98e3\u98e5\u98e9"
699 "\u98ea\u98eb\u98ed\u98ef\u98f2\u98f4\u98fc"
700 "\u98fd\u98fe\u98ff\u9903\u9904\u9905\u9909\u990a"
701 "\u990c\u990e\u990f\u9911\u9912\u9913\u9915"
702 "\u9916\u9918\u991a\u991b\u991c\u991e\u9921\u9928"
703 "\u992c\u9931\u9933\u9935\u9936\u9937\u993a"
704 "\u993c\u993d\u993e\u993f\u9941\u9943\u9945\u9948"
705 "\u9949\u994a\u994b\u994c\u9951\u9952\u9957"
706 "\u995c\u995e\u9962\u99ac\u99ad\u99ae\u99b1\u99b3"
707 "\u99b4\u99b9\u99c1\u99d0\u99d1\u99d2\u99d4"
708 "\u99d5\u99d8\u99d9\u99db\u99dd\u99df\u99e1\u99e2"
709 "\u99ed\u99f0\u99f1\u99f8\u99ff\u9a01\u9a02"
710 "\u9a05\u9a0c\u9a0d\u9a0e\u9a0f\u9a16\u9a19\u9a24"
711 "\u9a27\u9a2b\u9a2d\u9a2e\u9a30\u9a36\u9a37"
712 "\u9a38\u9a3e\u9a40\u9a41\u9a42\u9a43\u9a44\u9a45"
713 "\u9a4a\u9a4c\u9a4d\u9a4f\u9a55\u9a57\u9a5a"
714 "\u9a5b\u9a5f\u9a62\u9a64\u9a65\u9a66\u9a6a\u9a6b"
715 "\u9aaf\u9acf\u9ad2\u9ad4\u9ad5\u9ad6\u9b00"
716 "\u9b06\u9b0d\u9b1a\u9b22\u9b25\u9b27\u9b28\u9b29"
717 "\u9b2d\u9b2e\u9b31\u9b4e\u9b58\u9b5a\u9b5b"
718 "\u9b62\u9b68\u9b6f\u9b74\u9b77\u9b7a\u9b81\u9b83"
719 "\u9b8a\u9b8b\u9b8d\u9b8e\u9b90\u9b91\u9b92"
720 "\u9b93\u9b9a\u9b9c\u9b9d\u9b9e\u9ba6\u9baa\u9bab"
721 "\u9bad\u9bae\u9bb3\u9bb6\u9bba\u9bc0\u9bc1"
722 "\u9bc7\u9bc9\u9bca\u9bd2\u9bd4\u9bd5\u9bd6\u9bdb"
723 "\u9bdd\u9be1\u9be2\u9be4\u9be7\u9be8\u9bea"
724 "\u9beb\u9bf0\u9bf4\u9bf7\u9bfd\u9bff\u9c01\u9c02"
725 "\u9c03\u9c08\u9c09\u9c0d\u9c0f\u9c10\u9c12"
726 "\u9c13\u9c1c\u9c1f\u9c20\u9c23\u9c25\u9c28\u9c29"
727 "\u9c2d\u9c2e\u9c31\u9c32\u9c33\u9c35\u9c37"
728 "\u9c39\u9c3a\u9c3b\u9c3c\u9c3e\u9c42\u9c45\u9c48"
729 "\u9c49\u9c52\u9c54\u9c56\u9c57\u9c58\u9c5d"
730 "\u9c5f\u9c60\u9c63\u9c64\u9c67\u9c68\u9c6d\u9c6f"
731 "\u9c77\u9c78\u9c7a\u9ce5\u9ce7\u9ce9\u9cec"
732 "\u9cf2\u9cf3\u9cf4\u9cf6\u9cfe\u9d06\u9d07\u9d09"
733 "\u9d12\u9d15\u9d1b\u9d1d\u9d1e\u9d1f\u9d23"
734 "\u9d26\u9d28\u9d2f\u9d30\u9d34\u9d37\u9d3b\u9d3f"
735 "\u9d41\u9d42\u9d43\u9d50\u9d51\u9d52\u9d53"
736 "\u9d5c\u9d5d\u9d60\u9d61\u9d6a\u9d6c\u9d6e\u9d6f"
737 "\u9d72\u9d77\u9d7e\u9d84\u9d87\u9d89\u9d8a"
738 "\u9d93\u9d96\u9d98\u9d9a\u9da1\u9da5\u9da9\u9daa"
739 "\u9dac\u9daf\u9db2\u9db4\u9db9\u9dba\u9dbb"
740 "\u9dbc\u9dc0\u9dc1\u9dc2\u9dc4\u9dc8\u9dca\u9dd3"
741 "\u9dd6\u9dd7\u9dd9\u9dda\u9de5\u9de6\u9deb"
742 "\u9def\u9df2\u9df3\u9df8\u9df9\u9dfa\u9dfd\u9dff"
743 "\u9e02\u9e07\u9e0c\u9e0f\u9e15\u9e18\u9e1a"
744 "\u9e1b\u9e1d\u9e1e\u9e75\u9e79\u9e7a\u9e7c\u9e7d"
745 "\u9e97\u9ea4\u9ea5\u9ea9\u9eaf\u9eb5\u9ebc"
746 "\u9ebd\u9ec3\u9ecc\u9ede\u9ee8\u9ef2\u9ef4\u9ef6"
747 "\u9ef7\u9efd\u9eff\u9f07\u9f08\u9f09\u9f15"
748 "\u9f34\u9f4a\u9f4b\u9f4e\u9f4f\u9f52\u9f54\u9f55"
749 "\u9f57\u9f59\u9f5c\u9f5f\u9f60\u9f61\u9f63"
750 "\u9f66\u9f67\u9f69\u9f6a\u9f6c\u9f72\u9f76\u9f77"
751 "\u9f8d\u9f8e\u9f90\u9f94\u9f95\u9f9c]";
752
753 // Create UnicodeSets for zh-Hans and zh-Hant for later reference.
27 UErrorCode status = U_ZERO_ERROR; 754 UErrorCode status = U_ZERO_ERROR;
28 // The Transliterator IDs are defined in: 755 hans_set_.reset(new icu::UnicodeSet(
29 // third_party/icu/source/data/translit/root.txt. 756 icu::UnicodeString::fromUTF8(hans_codepoints), status));
30 // 757 DVLOG(1) << u_errorName(status);
31 // Chromium keeps only a subset of these, defined in: 758 hant_set_.reset(new icu::UnicodeSet(
32 // third_party/icu/source/data/translit/root_subset.txt 759 icu::UnicodeString::fromUTF8(hant_codepoints), status));
33 hans2hant_.reset(icu::Transliterator::createInstance( 760 DVLOG(1) << u_errorName(status);
34 icu::UnicodeString("Hans-Hant"), UTRANS_FORWARD, parse_status, status)); 761
35 DVLOG(1) << "Hans-Hant Transliterator initialization status: " 762 // Make these sets immutable. This keeps the class threadsafe and
36 << u_errorName(status); 763 // makes certain operations on these sets faster.
37 hant2hans_.reset(icu::Transliterator::createInstance( 764 hans_set_->freeze();
38 icu::UnicodeString("Hant-Hans"), UTRANS_FORWARD, parse_status, status)); 765 hant_set_->freeze();
39 DVLOG(1) << "Hant-Hans Transliterator initialization status: "
40 << u_errorName(status);
41 } 766 }
42 767
43 bool ChineseScriptClassifier::IsInitialized() const { 768 bool ChineseScriptClassifier::IsInitialized() const {
44 return hans2hant_ && hant2hans_; 769 return hans_set_ && hant_set_;
45 } 770 }
46 771
47 ChineseScriptClassifier::~ChineseScriptClassifier() {} 772 ChineseScriptClassifier::~ChineseScriptClassifier() {}
48 773
49 std::string ChineseScriptClassifier::Classify(const std::string& input) const { 774 std::string ChineseScriptClassifier::Classify(const std::string& input) const {
50 // If there was a problem with initialization, return the empty string. 775 // If there was a problem with initialization, return the empty string.
51 if (!IsInitialized()) { 776 if (!IsInitialized()) {
52 return ""; 777 return "";
53 } 778 }
54 779
55 // Operate only on first 500 bytes. 780 // Operate only on first 500 bytes.
56 std::string input_subset; 781 std::string input_subset;
57 base::TruncateUTF8ToByteSize(input, 500, &input_subset); 782 base::TruncateUTF8ToByteSize(input, 500, &input_subset);
58 783
59 // Remove whitespace since transliterators may not preserve it. 784 // Remove whitespace since transliterators may not preserve it.
60 input_subset.erase(std::remove_if(input_subset.begin(), input_subset.end(), 785 input_subset.erase(std::remove_if(input_subset.begin(), input_subset.end(),
61 base::IsUnicodeWhitespace), 786 base::IsUnicodeWhitespace),
62 input_subset.end()); 787 input_subset.end());
63 788
64 // Convert two copies of the input to icu::UnicodeString. Two copies are 789 // Convert the input to icu::UnicodeString so we can iterate over codepoints.
65 // necessary because transliteration happens in place only. 790 icu::UnicodeString input_codepoints =
66 icu::UnicodeString original_input =
67 icu::UnicodeString::fromUTF8(input_subset); 791 icu::UnicodeString::fromUTF8(input_subset);
68 icu::UnicodeString hant_input = icu::UnicodeString::fromUTF8(input_subset);
69 icu::UnicodeString hans_input = icu::UnicodeString::fromUTF8(input_subset);
70
71 // Get the zh-Hant version of this input.
72 hans2hant_->transliterate(hant_input);
73 // Get the zh-Hans version of this input.
74 hant2hans_->transliterate(hans_input);
75
76 // Debugging only: show the input, the Hant version, and the Hans version.
77 if (VLOG_IS_ON(1)) {
78 std::string hant_string;
79 std::string hans_string;
80 hans_input.toUTF8String(hans_string);
81 hant_input.toUTF8String(hant_string);
82 DVLOG(1) << "Original input:\n" << input_subset;
83 DVLOG(1) << "zh-Hant output:\n" << hant_string;
84 DVLOG(1) << "zh-Hans output:\n" << hans_string;
85 }
86 792
87 // Count matches between the original input chars and the Hant and Hans 793 // Count matches between the original input chars and the Hant and Hans
88 // versions of the input. 794 // versions of the input.
89 int hant_count = 0; 795 int hant_count = 0;
90 int hans_count = 0; 796 int hans_count = 0;
91 797
92 // Iterate over all chars in the original input and compute matches between 798 for (int index = 0; index < input_codepoints.length(); ++index) {
93 // the Hant version and the Hans version. 799 const auto codepoint = input_codepoints.charAt(index);
94 // 800 if (hans_set_->contains(codepoint))
95 // All segments (original, Hant, and Hans) should have the same length, but
96 // in case of some corner case or bug in which they turn out not to be,
97 // we compute the minimum length we are allowed to traverse.
98 const int min_length =
99 std::min(original_input.length(),
100 std::min(hans_input.length(), hant_input.length()));
101 for (int index = 0; index < min_length; ++index) {
102 const auto original_char = original_input.charAt(index);
103 const auto hans_char = hans_input.charAt(index);
104 const auto hant_char = hant_input.charAt(index);
105 if (hans_char == hant_char) {
106 continue;
107 } else if (original_char == hans_char) {
108 // Hans-specific char found.
109 ++hans_count; 801 ++hans_count;
110 } else if (original_char == hant_char) { 802 if (hant_set_->contains(codepoint))
111 // Hant-specific char found.
112 ++hant_count; 803 ++hant_count;
113 }
114 } 804 }
115 DVLOG(1) << "Found " << hans_count << " zh-Hans chars in input"; 805 DVLOG(1) << "Found " << hans_count << " zh-Hans chars in input";
116 DVLOG(1) << "Found " << hant_count << " zh-Hant chars in input"; 806 DVLOG(1) << "Found " << hant_count << " zh-Hant chars in input";
117 807
118 if (hant_count > hans_count) { 808 if (hant_count > hans_count) {
119 return kChineseTraditionalCode; 809 return kChineseTraditionalCode;
120 } else if (hans_count > hant_count) { 810 } else if (hans_count > hant_count) {
121 return kChineseSimplifiedCode; 811 return kChineseSimplifiedCode;
122 } else { // hans_count == hant_count 812 } else { // hans_count == hant_count
123 // All characters are the same in both scripts. In this case, we return the 813 // All characters are the same in both scripts. In this case, we return the
124 // following code. 814 // following code.
125 return kChineseSimplifiedCode; 815 return kChineseSimplifiedCode;
126 } 816 }
127 } 817 }
128 818
129 } // namespace translate 819 } // namespace translate
OLDNEW
« no previous file with comments | « components/translate/core/language_detection/chinese_script_classifier.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698