OLD | NEW |
| (Empty) |
1 Index: D:/src/src/third_party/libphonenumber/cpp/src/phonenumberutil_test.cc | |
2 =================================================================== | |
3 --- phonenumberutil_test.cc (revision 186) | |
4 +++ phonenumberutil_test.cc (working copy) | |
5 @@ -1101,11 +1101,11 @@ | |
6 ExtractPossibleNumber("Tel:+800-345-600", &extracted_number); | |
7 EXPECT_EQ("+800-345-600", extracted_number); | |
8 // Should recognise wide digits as possible start values. | |
9 - ExtractPossibleNumber("023", &extracted_number); | |
10 - EXPECT_EQ("023", extracted_number); | |
11 + ExtractPossibleNumber("\xEF\xBC\x90\xEF\xBC\x92\xEF\xBC\x93", &extracted_numb
er); | |
12 + EXPECT_EQ("\xEF\xBC\x90\xEF\xBC\x92\xEF\xBC\x93", extracted_number); | |
13 // Dashes are not possible start values and should be removed. | |
14 - ExtractPossibleNumber("Num-123", &extracted_number); | |
15 - EXPECT_EQ("123", extracted_number); | |
16 + ExtractPossibleNumber("Num-\xEF\xBC\x91\xEF\xBC\x92\xEF\xBC\x93", &extracted_
number); | |
17 + EXPECT_EQ("\xEF\xBC\x91\xEF\xBC\x92\xEF\xBC\x93", extracted_number); | |
18 // If not possible number present, return empty string. | |
19 ExtractPossibleNumber("Num-....", &extracted_number); | |
20 EXPECT_EQ("", extracted_number); | |
21 @@ -1119,7 +1119,7 @@ | |
22 ExtractPossibleNumber("(650) 253-0000.", &extracted_number); | |
23 EXPECT_EQ("650) 253-0000", extracted_number); | |
24 // This case has a trailing RTL char. | |
25 - ExtractPossibleNumber("(650) 253-0000", &extracted_number); | |
26 + ExtractPossibleNumber("(650) 253-0000\xE2\x80\x8F", &extracted_number); | |
27 EXPECT_EQ("650) 253-0000", extracted_number); | |
28 } | |
29 | |
30 @@ -1163,7 +1163,7 @@ | |
31 // This number is no longer valid. | |
32 EXPECT_FALSE(phone_util_.IsValidNumber(bs_number)); | |
33 | |
34 - // La Mayotte and Réunion use 'leadingDigits' to differentiate them. | |
35 + // La Mayotte and R\xC3\xA9union use 'leadingDigits' to differentiate them. | |
36 PhoneNumber re_number; | |
37 re_number.set_country_code(262); | |
38 re_number.set_national_number(262123456ULL); | |
39 @@ -1631,13 +1631,13 @@ | |
40 EXPECT_TRUE(IsViablePhoneNumber("0800-4-PIZZA")); | |
41 // Only one or two digits before possible punctuation followed by more digits
. | |
42 // The punctuation used here is the unicode character u+3000. | |
43 - EXPECT_TRUE(IsViablePhoneNumber("1 34")); | |
44 - EXPECT_FALSE(IsViablePhoneNumber("1 3+4")); | |
45 + EXPECT_TRUE(IsViablePhoneNumber("1\xE3\x80\x80" "34")); | |
46 + EXPECT_FALSE(IsViablePhoneNumber("1\xE3\x80\x80" "3+4")); | |
47 // Unicode variants of possible starting character and other allowed | |
48 // punctuation/digits. | |
49 - EXPECT_TRUE(IsViablePhoneNumber("(1) 3456789")); | |
50 + EXPECT_TRUE(IsViablePhoneNumber("\xEF\xBC\x88" "1\xEF\xBC\x89\xE3\x80\x80" "3
456789")); | |
51 // Testing a leading + is okay. | |
52 - EXPECT_TRUE(IsViablePhoneNumber("+1) 3456789")); | |
53 + EXPECT_TRUE(IsViablePhoneNumber("+1\xEF\xBC\x89\xE3\x80\x80" "3456789")); | |
54 } | |
55 | |
56 TEST_F(PhoneNumberUtilTest, NormaliseRemovePunctuation) { | |
57 @@ -1659,13 +1659,13 @@ | |
58 TEST_F(PhoneNumberUtilTest, NormaliseOtherDigits) { | |
59 // The first digit is a full-width 2, the last digit is an Arabic-indic digit | |
60 // 5. | |
61 - string input_number("25٥"); | |
62 + string input_number("\xEF\xBC\x92" "5\xD9\xA5"); | |
63 Normalize(&input_number); | |
64 static const string kExpectedOutput("255"); | |
65 EXPECT_EQ(kExpectedOutput, input_number) | |
66 << "Conversion did not correctly replace non-latin digits"; | |
67 // The first digit is an Eastern-Arabic 5, the latter an Eastern-Arabic 0. | |
68 - string eastern_arabic_input_number("۵2۰"); | |
69 + string eastern_arabic_input_number("\xDB\xB5" "2\xDB\xB0"); | |
70 Normalize(&eastern_arabic_input_number); | |
71 static const string kExpectedOutput2("520"); | |
72 EXPECT_EQ(kExpectedOutput2, eastern_arabic_input_number) | |
73 @@ -2321,21 +2321,21 @@ | |
74 // Using a full-width plus sign. | |
75 test_number.Clear(); | |
76 EXPECT_EQ(PhoneNumberUtil::NO_PARSING_ERROR, | |
77 - phone_util_.Parse("+1 (650) 333-6000", | |
78 + phone_util_.Parse("\xEF\xBC\x8B" "1 (650) 333-6000", | |
79 RegionCode::SG(), &test_number)); | |
80 EXPECT_EQ(us_number, test_number); | |
81 // The whole number, including punctuation, is here represented in full-width | |
82 // form. | |
83 test_number.Clear(); | |
84 EXPECT_EQ(PhoneNumberUtil::NO_PARSING_ERROR, | |
85 - phone_util_.Parse("+1 (650) 333-6000", | |
86 + phone_util_.Parse("\xEF\xBC\x8B\xEF\xBC\x91\xE3\x80\x80\xEF\xBC\x88
\xEF\xBC\x96\xEF\xBC\x95\xEF\xBC\x90\xEF\xBC\x89\xE3\x80\x80\xEF\xBC\x93\xEF\xBC
\x93\xEF\xBC\x93\xEF\xBC\x8D\xEF\xBC\x96\xEF\xBC\x90\xEF\xBC\x90\xEF\xBC\x90", | |
87 RegionCode::SG(), &test_number)); | |
88 EXPECT_EQ(us_number, test_number); | |
89 | |
90 // Using the U+30FC dash. | |
91 test_number.Clear(); | |
92 EXPECT_EQ(PhoneNumberUtil::NO_PARSING_ERROR, | |
93 - phone_util_.Parse("+1 (650) 333ー6000", | |
94 + phone_util_.Parse("\xEF\xBC\x8B\xEF\xBC\x91\xE3\x80\x80\xEF\xBC\x88
\xEF\xBC\x96\xEF\xBC\x95\xEF\xBC\x90\xEF\xBC\x89\xE3\x80\x80\xEF\xBC\x93\xEF\xBC
\x93\xEF\xBC\x93\xE3\x83\xBC\xEF\xBC\x96\xEF\xBC\x90\xEF\xBC\x90\xEF\xBC\x90", | |
95 RegionCode::SG(), &test_number)); | |
96 EXPECT_EQ(us_number, test_number); | |
97 } | |
98 @@ -2575,7 +2575,7 @@ | |
99 // Test with full-width plus. | |
100 result_proto.Clear(); | |
101 EXPECT_EQ(PhoneNumberUtil::NO_PARSING_ERROR, | |
102 - phone_util_.Parse("+64 3 331 6005", RegionCode::ZZ(), | |
103 + phone_util_.Parse("\xEF\xBC\x8B" "64 3 331 6005", RegionCode::ZZ(), | |
104 &result_proto)); | |
105 EXPECT_EQ(nz_number, result_proto); | |
106 // Test with normal plus but leading characters that need to be stripped. | |
107 @@ -2733,7 +2733,7 @@ | |
108 EXPECT_EQ(us_with_extension, test_number); | |
109 test_number.Clear(); | |
110 EXPECT_EQ(PhoneNumberUtil::NO_PARSING_ERROR, | |
111 - phone_util_.Parse("(800) 901-3355 ,extensión 7246433", | |
112 + phone_util_.Parse("(800) 901-3355 ,extensi\xC3\xB3n 7246433", | |
113 RegionCode::US(), | |
114 &test_number)); | |
115 EXPECT_EQ(us_with_extension, test_number); | |
116 @@ -2741,7 +2741,7 @@ | |
117 // Repeat with the small letter o with acute accent created by combining | |
118 // characters. | |
119 EXPECT_EQ(PhoneNumberUtil::NO_PARSING_ERROR, | |
120 - phone_util_.Parse("(800) 901-3355 ,extensión 7246433", | |
121 + phone_util_.Parse("(800) 901-3355 ,extensio\xCC\x81n 7246433", | |
122 RegionCode::US(), | |
123 &test_number)); | |
124 EXPECT_EQ(us_with_extension, test_number); | |
125 Index: D:/src/src/third_party/libphonenumber/cpp/src/phonenumberutil.cc | |
126 =================================================================== | |
127 --- phonenumberutil.cc (revision 186) | |
128 +++ phonenumberutil.cc (working copy) | |
129 @@ -72,7 +72,7 @@ | |
130 // The kPlusSign signifies the international prefix. | |
131 const char kPlusSign[] = "+"; | |
132 | |
133 -const char kPlusChars[] = "++"; | |
134 +const char kPlusChars[] = "+\xEF\xBC\x8B"; | |
135 scoped_ptr<const reg_exp::RegularExpression> plus_chars_pattern; | |
136 | |
137 const char kRfc3966ExtnPrefix[] = ";ext="; | |
138 @@ -88,7 +88,7 @@ | |
139 | |
140 // Digits accepted in phone numbers. | |
141 // Both Arabic-Indic and Eastern Arabic-Indic are supported. | |
142 -const char kValidDigits[] = "0-90-9٠-٩۰-۹"; | |
143 +const char kValidDigits[] = "0-9\xEF\xBC\x90-\xEF\xBC\x99\xD9\xA0-\xD9\xA9\xDB\
xB0-\xDB\xB9"; | |
144 // We accept alpha characters in phone numbers, ASCII only. We store lower-case | |
145 // here only since our regular expressions are case-insensitive. | |
146 const char kValidAlpha[] = "a-z"; | |
147 @@ -140,7 +140,7 @@ | |
148 // itself. In emacs, you can use M-x unicode-what to query information about th
e | |
149 // unicode character. | |
150 const char kValidPunctuation[] = | |
151 - "-x‐-―−ー--/ ()()[].\\[\\]/~⁓∼~"; | |
152 + "-x\xE2\x80\x90-\xE2\x80\x95\xE2\x88\x92\xE3\x83\xBC\xEF\xBC\x8D-\xEF\xBC\x
8F \xC2\xA0\xE2\x80\x8B\xE2\x81\xA0\xE3\x80\x80()\xEF\xBC\x88\xEF\xBC\x89\xEF\xB
C\xBB\xEF\xBC\xBD.\\[\\]/~\xE2\x81\x93\xE2\x88\xBC\xEF\xBD\x9E"; | |
153 | |
154 // Regular expression of viable phone numbers. This is location independent. | |
155 // Checks we have at least three leading digits, and only valid punctuation, | |
156 @@ -454,7 +454,7 @@ | |
157 // defined order. | |
158 void CreateRegularExpressions() { | |
159 unique_international_prefix.reset( | |
160 - reg_exp::CreateRegularExpression("[\\d]+(?:[~⁓∼~][\\d]+)?")); | |
161 + reg_exp::CreateRegularExpression("[\\d]+(?:[~\xE2\x81\x93\xE2\x88\xBC\xEF
\xBD\x9E][\\d]+)?")); | |
162 first_group_capturing_pattern.reset( | |
163 reg_exp::CreateRegularExpression("(\\$1)")); | |
164 carrier_code_pattern.reset( | |
165 @@ -476,16 +476,16 @@ | |
166 StrCat("[", kPlusChars, "]*(?:[", kValidPunctuation, "]*[", kValidDigits, | |
167 "]){3,}[", kValidAlpha, kValidPunctuation, kValidDigits, "]*"))); | |
168 // Canonical-equivalence doesn't seem to be an option with RE2, so we allow | |
169 - // two options for representing the ó - the character itself, and one in the | |
170 + // two options for representing the \xC3\xB3 - the character itself, and one
in the | |
171 // unicode decomposed form with the combining acute accent. Note that there | |
172 // are currently three capturing groups for the extension itself - if this | |
173 // number is changed, MaybeStripExtension needs to be updated. | |
174 const string capturing_extn_digits = StrCat("([", kValidDigits, "]{1,7})"); | |
175 known_extn_patterns.reset(new string( | |
176 StrCat(kRfc3966ExtnPrefix, capturing_extn_digits, "|" | |
177 - "[ \\t,]*(?:ext(?:ensi(?:ó?|ó))?n?|extn?|[,xx##~~]|" | |
178 - "int|int|anexo)" | |
179 - "[:\\..]?[ \\t,-]*", capturing_extn_digits, "#?|" | |
180 + "[ \xC2\xA0\\t,]*(?:ext(?:ensi(?:o\xCC\x81?|\xC3\xB3))?n?|\xEF\xBD
\x85\xEF\xBD\x98\xEF\xBD\x94\xEF\xBD\x8E?|[,x\xEF\xBD\x98#\xEF\xBC\x83~\xEF\xBD\
x9E]|" | |
181 + "int|\xEF\xBD\x89\xEF\xBD\x8E\xEF\xBD\x94|anexo)" | |
182 + "[:\\.\xEF\xBC\x8E]?[ \xC2\xA0\\t,-]*", capturing_extn_digits, "#?
|" | |
183 "[- ]+([", kValidDigits, "]{1,5})#"))); | |
184 extn_pattern.reset(reg_exp::CreateRegularExpression( | |
185 StrCat("(?i)(?:", *known_extn_patterns, ")$").c_str())); | |
186 @@ -509,35 +509,35 @@ | |
187 all_plus_number_grouping_symbols->insert( | |
188 make_pair(ToUnicodeCodepoint("-"), '-')); | |
189 all_plus_number_grouping_symbols->insert( | |
190 - make_pair(ToUnicodeCodepoint("-"), '-')); | |
191 + make_pair(ToUnicodeCodepoint("\xEF\xBC\x8D"), '-')); | |
192 all_plus_number_grouping_symbols->insert( | |
193 - make_pair(ToUnicodeCodepoint("‐"), '-')); | |
194 + make_pair(ToUnicodeCodepoint("\xE2\x80\x90"), '-')); | |
195 all_plus_number_grouping_symbols->insert( | |
196 - make_pair(ToUnicodeCodepoint("‑"), '-')); | |
197 + make_pair(ToUnicodeCodepoint("\xE2\x80\x91"), '-')); | |
198 all_plus_number_grouping_symbols->insert( | |
199 - make_pair(ToUnicodeCodepoint("‒"), '-')); | |
200 + make_pair(ToUnicodeCodepoint("\xE2\x80\x92"), '-')); | |
201 all_plus_number_grouping_symbols->insert( | |
202 - make_pair(ToUnicodeCodepoint("–"), '-')); | |
203 + make_pair(ToUnicodeCodepoint("\xE2\x80\x93"), '-')); | |
204 all_plus_number_grouping_symbols->insert( | |
205 - make_pair(ToUnicodeCodepoint("—"), '-')); | |
206 + make_pair(ToUnicodeCodepoint("\xE2\x80\x94"), '-')); | |
207 all_plus_number_grouping_symbols->insert( | |
208 - make_pair(ToUnicodeCodepoint("―"), '-')); | |
209 + make_pair(ToUnicodeCodepoint("\xE2\x80\x95"), '-')); | |
210 all_plus_number_grouping_symbols->insert( | |
211 - make_pair(ToUnicodeCodepoint("−"), '-')); | |
212 + make_pair(ToUnicodeCodepoint("\xE2\x88\x92"), '-')); | |
213 all_plus_number_grouping_symbols->insert( | |
214 make_pair(ToUnicodeCodepoint("/"), '/')); | |
215 all_plus_number_grouping_symbols->insert( | |
216 - make_pair(ToUnicodeCodepoint("/"), '/')); | |
217 + make_pair(ToUnicodeCodepoint("\xEF\xBC\x8F"), '/')); | |
218 all_plus_number_grouping_symbols->insert( | |
219 make_pair(ToUnicodeCodepoint(" "), ' ')); | |
220 all_plus_number_grouping_symbols->insert( | |
221 - make_pair(ToUnicodeCodepoint(" "), ' ')); | |
222 + make_pair(ToUnicodeCodepoint("\xE3\x80\x80"), ' ')); | |
223 all_plus_number_grouping_symbols->insert( | |
224 - make_pair(ToUnicodeCodepoint(""), ' ')); | |
225 + make_pair(ToUnicodeCodepoint("\xE2\x81\xA0"), ' ')); | |
226 all_plus_number_grouping_symbols->insert( | |
227 make_pair(ToUnicodeCodepoint("."), '.')); | |
228 all_plus_number_grouping_symbols->insert( | |
229 - make_pair(ToUnicodeCodepoint("."), '.')); | |
230 + make_pair(ToUnicodeCodepoint("\xEF\xBC\x8E"), '.')); | |
231 // Only the upper-case letters are added here - the lower-case versions are | |
232 // added programmatically. | |
233 alpha_mappings->insert(make_pair(ToUnicodeCodepoint("A"), '2')); | |
234 @@ -849,7 +849,7 @@ | |
235 // Note here that all NANPA formatting rules are contained by US, so we use | |
236 // that to format NANPA numbers. The same applies to Russian Fed regions - | |
237 // rules are contained by Russia. French Indian Ocean country rules are | |
238 - // contained by Réunion. | |
239 + // contained by R\xC3\xA9union. | |
240 string region_code; | |
241 GetRegionCodeForCountryCode(country_calling_code, ®ion_code); | |
242 if (!HasValidRegionCode(region_code, country_calling_code, | |
243 @@ -1015,7 +1015,7 @@ | |
244 // For regions that share a country calling code, the country calling code | |
245 // need not be dialled. This also applies when dialling within a region, so | |
246 // this if clause covers both these cases. | |
247 - // Technically this is the case for dialling from la Réunion to other | |
248 + // Technically this is the case for dialling from la R\xC3\xA9union to othe
r | |
249 // overseas departments of France (French Guiana, Martinique, Guadeloupe), | |
250 // but not vice versa - so we don't cover this edge case for now and for | |
251 // those cases return the version including country calling code. | |
OLD | NEW |