| OLD | NEW |
| (Empty) |
| 1 Index: D:/src/src/third_party/libphonenumber/cpp/src/phonenumberutil_test.cc | |
| 2 =================================================================== | |
| 3 --- phonenumberutil_test.cc (revision 186) | |
| 4 +++ phonenumberutil_test.cc (working copy) | |
| 5 @@ -1101,11 +1101,11 @@ | |
| 6 ExtractPossibleNumber("Tel:+800-345-600", &extracted_number); | |
| 7 EXPECT_EQ("+800-345-600", extracted_number); | |
| 8 // Should recognise wide digits as possible start values. | |
| 9 - ExtractPossibleNumber("023", &extracted_number); | |
| 10 - EXPECT_EQ("023", extracted_number); | |
| 11 + ExtractPossibleNumber("\xEF\xBC\x90\xEF\xBC\x92\xEF\xBC\x93", &extracted_numb
er); | |
| 12 + EXPECT_EQ("\xEF\xBC\x90\xEF\xBC\x92\xEF\xBC\x93", extracted_number); | |
| 13 // Dashes are not possible start values and should be removed. | |
| 14 - ExtractPossibleNumber("Num-123", &extracted_number); | |
| 15 - EXPECT_EQ("123", extracted_number); | |
| 16 + ExtractPossibleNumber("Num-\xEF\xBC\x91\xEF\xBC\x92\xEF\xBC\x93", &extracted_
number); | |
| 17 + EXPECT_EQ("\xEF\xBC\x91\xEF\xBC\x92\xEF\xBC\x93", extracted_number); | |
| 18 // If not possible number present, return empty string. | |
| 19 ExtractPossibleNumber("Num-....", &extracted_number); | |
| 20 EXPECT_EQ("", extracted_number); | |
| 21 @@ -1119,7 +1119,7 @@ | |
| 22 ExtractPossibleNumber("(650) 253-0000.", &extracted_number); | |
| 23 EXPECT_EQ("650) 253-0000", extracted_number); | |
| 24 // This case has a trailing RTL char. | |
| 25 - ExtractPossibleNumber("(650) 253-0000", &extracted_number); | |
| 26 + ExtractPossibleNumber("(650) 253-0000\xE2\x80\x8F", &extracted_number); | |
| 27 EXPECT_EQ("650) 253-0000", extracted_number); | |
| 28 } | |
| 29 | |
| 30 @@ -1163,7 +1163,7 @@ | |
| 31 // This number is no longer valid. | |
| 32 EXPECT_FALSE(phone_util_.IsValidNumber(bs_number)); | |
| 33 | |
| 34 - // La Mayotte and Réunion use 'leadingDigits' to differentiate them. | |
| 35 + // La Mayotte and R\xC3\xA9union use 'leadingDigits' to differentiate them. | |
| 36 PhoneNumber re_number; | |
| 37 re_number.set_country_code(262); | |
| 38 re_number.set_national_number(262123456ULL); | |
| 39 @@ -1631,13 +1631,13 @@ | |
| 40 EXPECT_TRUE(IsViablePhoneNumber("0800-4-PIZZA")); | |
| 41 // Only one or two digits before possible punctuation followed by more digits
. | |
| 42 // The punctuation used here is the unicode character u+3000. | |
| 43 - EXPECT_TRUE(IsViablePhoneNumber("1 34")); | |
| 44 - EXPECT_FALSE(IsViablePhoneNumber("1 3+4")); | |
| 45 + EXPECT_TRUE(IsViablePhoneNumber("1\xE3\x80\x80" "34")); | |
| 46 + EXPECT_FALSE(IsViablePhoneNumber("1\xE3\x80\x80" "3+4")); | |
| 47 // Unicode variants of possible starting character and other allowed | |
| 48 // punctuation/digits. | |
| 49 - EXPECT_TRUE(IsViablePhoneNumber("(1) 3456789")); | |
| 50 + EXPECT_TRUE(IsViablePhoneNumber("\xEF\xBC\x88" "1\xEF\xBC\x89\xE3\x80\x80" "3
456789")); | |
| 51 // Testing a leading + is okay. | |
| 52 - EXPECT_TRUE(IsViablePhoneNumber("+1) 3456789")); | |
| 53 + EXPECT_TRUE(IsViablePhoneNumber("+1\xEF\xBC\x89\xE3\x80\x80" "3456789")); | |
| 54 } | |
| 55 | |
| 56 TEST_F(PhoneNumberUtilTest, NormaliseRemovePunctuation) { | |
| 57 @@ -1659,13 +1659,13 @@ | |
| 58 TEST_F(PhoneNumberUtilTest, NormaliseOtherDigits) { | |
| 59 // The first digit is a full-width 2, the last digit is an Arabic-indic digit | |
| 60 // 5. | |
| 61 - string input_number("25٥"); | |
| 62 + string input_number("\xEF\xBC\x92" "5\xD9\xA5"); | |
| 63 Normalize(&input_number); | |
| 64 static const string kExpectedOutput("255"); | |
| 65 EXPECT_EQ(kExpectedOutput, input_number) | |
| 66 << "Conversion did not correctly replace non-latin digits"; | |
| 67 // The first digit is an Eastern-Arabic 5, the latter an Eastern-Arabic 0. | |
| 68 - string eastern_arabic_input_number("۵2۰"); | |
| 69 + string eastern_arabic_input_number("\xDB\xB5" "2\xDB\xB0"); | |
| 70 Normalize(&eastern_arabic_input_number); | |
| 71 static const string kExpectedOutput2("520"); | |
| 72 EXPECT_EQ(kExpectedOutput2, eastern_arabic_input_number) | |
| 73 @@ -2321,21 +2321,21 @@ | |
| 74 // Using a full-width plus sign. | |
| 75 test_number.Clear(); | |
| 76 EXPECT_EQ(PhoneNumberUtil::NO_PARSING_ERROR, | |
| 77 - phone_util_.Parse("+1 (650) 333-6000", | |
| 78 + phone_util_.Parse("\xEF\xBC\x8B" "1 (650) 333-6000", | |
| 79 RegionCode::SG(), &test_number)); | |
| 80 EXPECT_EQ(us_number, test_number); | |
| 81 // The whole number, including punctuation, is here represented in full-width | |
| 82 // form. | |
| 83 test_number.Clear(); | |
| 84 EXPECT_EQ(PhoneNumberUtil::NO_PARSING_ERROR, | |
| 85 - phone_util_.Parse("+1 (650) 333-6000", | |
| 86 + phone_util_.Parse("\xEF\xBC\x8B\xEF\xBC\x91\xE3\x80\x80\xEF\xBC\x88
\xEF\xBC\x96\xEF\xBC\x95\xEF\xBC\x90\xEF\xBC\x89\xE3\x80\x80\xEF\xBC\x93\xEF\xBC
\x93\xEF\xBC\x93\xEF\xBC\x8D\xEF\xBC\x96\xEF\xBC\x90\xEF\xBC\x90\xEF\xBC\x90", | |
| 87 RegionCode::SG(), &test_number)); | |
| 88 EXPECT_EQ(us_number, test_number); | |
| 89 | |
| 90 // Using the U+30FC dash. | |
| 91 test_number.Clear(); | |
| 92 EXPECT_EQ(PhoneNumberUtil::NO_PARSING_ERROR, | |
| 93 - phone_util_.Parse("+1 (650) 333ー6000", | |
| 94 + phone_util_.Parse("\xEF\xBC\x8B\xEF\xBC\x91\xE3\x80\x80\xEF\xBC\x88
\xEF\xBC\x96\xEF\xBC\x95\xEF\xBC\x90\xEF\xBC\x89\xE3\x80\x80\xEF\xBC\x93\xEF\xBC
\x93\xEF\xBC\x93\xE3\x83\xBC\xEF\xBC\x96\xEF\xBC\x90\xEF\xBC\x90\xEF\xBC\x90", | |
| 95 RegionCode::SG(), &test_number)); | |
| 96 EXPECT_EQ(us_number, test_number); | |
| 97 } | |
| 98 @@ -2575,7 +2575,7 @@ | |
| 99 // Test with full-width plus. | |
| 100 result_proto.Clear(); | |
| 101 EXPECT_EQ(PhoneNumberUtil::NO_PARSING_ERROR, | |
| 102 - phone_util_.Parse("+64 3 331 6005", RegionCode::ZZ(), | |
| 103 + phone_util_.Parse("\xEF\xBC\x8B" "64 3 331 6005", RegionCode::ZZ(), | |
| 104 &result_proto)); | |
| 105 EXPECT_EQ(nz_number, result_proto); | |
| 106 // Test with normal plus but leading characters that need to be stripped. | |
| 107 @@ -2733,7 +2733,7 @@ | |
| 108 EXPECT_EQ(us_with_extension, test_number); | |
| 109 test_number.Clear(); | |
| 110 EXPECT_EQ(PhoneNumberUtil::NO_PARSING_ERROR, | |
| 111 - phone_util_.Parse("(800) 901-3355 ,extensión 7246433", | |
| 112 + phone_util_.Parse("(800) 901-3355 ,extensi\xC3\xB3n 7246433", | |
| 113 RegionCode::US(), | |
| 114 &test_number)); | |
| 115 EXPECT_EQ(us_with_extension, test_number); | |
| 116 @@ -2741,7 +2741,7 @@ | |
| 117 // Repeat with the small letter o with acute accent created by combining | |
| 118 // characters. | |
| 119 EXPECT_EQ(PhoneNumberUtil::NO_PARSING_ERROR, | |
| 120 - phone_util_.Parse("(800) 901-3355 ,extensión 7246433", | |
| 121 + phone_util_.Parse("(800) 901-3355 ,extensio\xCC\x81n 7246433", | |
| 122 RegionCode::US(), | |
| 123 &test_number)); | |
| 124 EXPECT_EQ(us_with_extension, test_number); | |
| 125 Index: D:/src/src/third_party/libphonenumber/cpp/src/phonenumberutil.cc | |
| 126 =================================================================== | |
| 127 --- phonenumberutil.cc (revision 186) | |
| 128 +++ phonenumberutil.cc (working copy) | |
| 129 @@ -72,7 +72,7 @@ | |
| 130 // The kPlusSign signifies the international prefix. | |
| 131 const char kPlusSign[] = "+"; | |
| 132 | |
| 133 -const char kPlusChars[] = "++"; | |
| 134 +const char kPlusChars[] = "+\xEF\xBC\x8B"; | |
| 135 scoped_ptr<const reg_exp::RegularExpression> plus_chars_pattern; | |
| 136 | |
| 137 const char kRfc3966ExtnPrefix[] = ";ext="; | |
| 138 @@ -88,7 +88,7 @@ | |
| 139 | |
| 140 // Digits accepted in phone numbers. | |
| 141 // Both Arabic-Indic and Eastern Arabic-Indic are supported. | |
| 142 -const char kValidDigits[] = "0-90-9٠-٩۰-۹"; | |
| 143 +const char kValidDigits[] = "0-9\xEF\xBC\x90-\xEF\xBC\x99\xD9\xA0-\xD9\xA9\xDB\
xB0-\xDB\xB9"; | |
| 144 // We accept alpha characters in phone numbers, ASCII only. We store lower-case | |
| 145 // here only since our regular expressions are case-insensitive. | |
| 146 const char kValidAlpha[] = "a-z"; | |
| 147 @@ -140,7 +140,7 @@ | |
| 148 // itself. In emacs, you can use M-x unicode-what to query information about th
e | |
| 149 // unicode character. | |
| 150 const char kValidPunctuation[] = | |
| 151 - "-x‐-―−ー--/ ()()[].\\[\\]/~⁓∼~"; | |
| 152 + "-x\xE2\x80\x90-\xE2\x80\x95\xE2\x88\x92\xE3\x83\xBC\xEF\xBC\x8D-\xEF\xBC\x
8F \xC2\xA0\xE2\x80\x8B\xE2\x81\xA0\xE3\x80\x80()\xEF\xBC\x88\xEF\xBC\x89\xEF\xB
C\xBB\xEF\xBC\xBD.\\[\\]/~\xE2\x81\x93\xE2\x88\xBC\xEF\xBD\x9E"; | |
| 153 | |
| 154 // Regular expression of viable phone numbers. This is location independent. | |
| 155 // Checks we have at least three leading digits, and only valid punctuation, | |
| 156 @@ -454,7 +454,7 @@ | |
| 157 // defined order. | |
| 158 void CreateRegularExpressions() { | |
| 159 unique_international_prefix.reset( | |
| 160 - reg_exp::CreateRegularExpression("[\\d]+(?:[~⁓∼~][\\d]+)?")); | |
| 161 + reg_exp::CreateRegularExpression("[\\d]+(?:[~\xE2\x81\x93\xE2\x88\xBC\xEF
\xBD\x9E][\\d]+)?")); | |
| 162 first_group_capturing_pattern.reset( | |
| 163 reg_exp::CreateRegularExpression("(\\$1)")); | |
| 164 carrier_code_pattern.reset( | |
| 165 @@ -476,16 +476,16 @@ | |
| 166 StrCat("[", kPlusChars, "]*(?:[", kValidPunctuation, "]*[", kValidDigits, | |
| 167 "]){3,}[", kValidAlpha, kValidPunctuation, kValidDigits, "]*"))); | |
| 168 // Canonical-equivalence doesn't seem to be an option with RE2, so we allow | |
| 169 - // two options for representing the ó - the character itself, and one in the | |
| 170 + // two options for representing the \xC3\xB3 - the character itself, and one
in the | |
| 171 // unicode decomposed form with the combining acute accent. Note that there | |
| 172 // are currently three capturing groups for the extension itself - if this | |
| 173 // number is changed, MaybeStripExtension needs to be updated. | |
| 174 const string capturing_extn_digits = StrCat("([", kValidDigits, "]{1,7})"); | |
| 175 known_extn_patterns.reset(new string( | |
| 176 StrCat(kRfc3966ExtnPrefix, capturing_extn_digits, "|" | |
| 177 - "[ \\t,]*(?:ext(?:ensi(?:ó?|ó))?n?|extn?|[,xx##~~]|" | |
| 178 - "int|int|anexo)" | |
| 179 - "[:\\..]?[ \\t,-]*", capturing_extn_digits, "#?|" | |
| 180 + "[ \xC2\xA0\\t,]*(?:ext(?:ensi(?:o\xCC\x81?|\xC3\xB3))?n?|\xEF\xBD
\x85\xEF\xBD\x98\xEF\xBD\x94\xEF\xBD\x8E?|[,x\xEF\xBD\x98#\xEF\xBC\x83~\xEF\xBD\
x9E]|" | |
| 181 + "int|\xEF\xBD\x89\xEF\xBD\x8E\xEF\xBD\x94|anexo)" | |
| 182 + "[:\\.\xEF\xBC\x8E]?[ \xC2\xA0\\t,-]*", capturing_extn_digits, "#?
|" | |
| 183 "[- ]+([", kValidDigits, "]{1,5})#"))); | |
| 184 extn_pattern.reset(reg_exp::CreateRegularExpression( | |
| 185 StrCat("(?i)(?:", *known_extn_patterns, ")$").c_str())); | |
| 186 @@ -509,35 +509,35 @@ | |
| 187 all_plus_number_grouping_symbols->insert( | |
| 188 make_pair(ToUnicodeCodepoint("-"), '-')); | |
| 189 all_plus_number_grouping_symbols->insert( | |
| 190 - make_pair(ToUnicodeCodepoint("-"), '-')); | |
| 191 + make_pair(ToUnicodeCodepoint("\xEF\xBC\x8D"), '-')); | |
| 192 all_plus_number_grouping_symbols->insert( | |
| 193 - make_pair(ToUnicodeCodepoint("‐"), '-')); | |
| 194 + make_pair(ToUnicodeCodepoint("\xE2\x80\x90"), '-')); | |
| 195 all_plus_number_grouping_symbols->insert( | |
| 196 - make_pair(ToUnicodeCodepoint("‑"), '-')); | |
| 197 + make_pair(ToUnicodeCodepoint("\xE2\x80\x91"), '-')); | |
| 198 all_plus_number_grouping_symbols->insert( | |
| 199 - make_pair(ToUnicodeCodepoint("‒"), '-')); | |
| 200 + make_pair(ToUnicodeCodepoint("\xE2\x80\x92"), '-')); | |
| 201 all_plus_number_grouping_symbols->insert( | |
| 202 - make_pair(ToUnicodeCodepoint("–"), '-')); | |
| 203 + make_pair(ToUnicodeCodepoint("\xE2\x80\x93"), '-')); | |
| 204 all_plus_number_grouping_symbols->insert( | |
| 205 - make_pair(ToUnicodeCodepoint("—"), '-')); | |
| 206 + make_pair(ToUnicodeCodepoint("\xE2\x80\x94"), '-')); | |
| 207 all_plus_number_grouping_symbols->insert( | |
| 208 - make_pair(ToUnicodeCodepoint("―"), '-')); | |
| 209 + make_pair(ToUnicodeCodepoint("\xE2\x80\x95"), '-')); | |
| 210 all_plus_number_grouping_symbols->insert( | |
| 211 - make_pair(ToUnicodeCodepoint("−"), '-')); | |
| 212 + make_pair(ToUnicodeCodepoint("\xE2\x88\x92"), '-')); | |
| 213 all_plus_number_grouping_symbols->insert( | |
| 214 make_pair(ToUnicodeCodepoint("/"), '/')); | |
| 215 all_plus_number_grouping_symbols->insert( | |
| 216 - make_pair(ToUnicodeCodepoint("/"), '/')); | |
| 217 + make_pair(ToUnicodeCodepoint("\xEF\xBC\x8F"), '/')); | |
| 218 all_plus_number_grouping_symbols->insert( | |
| 219 make_pair(ToUnicodeCodepoint(" "), ' ')); | |
| 220 all_plus_number_grouping_symbols->insert( | |
| 221 - make_pair(ToUnicodeCodepoint(" "), ' ')); | |
| 222 + make_pair(ToUnicodeCodepoint("\xE3\x80\x80"), ' ')); | |
| 223 all_plus_number_grouping_symbols->insert( | |
| 224 - make_pair(ToUnicodeCodepoint(""), ' ')); | |
| 225 + make_pair(ToUnicodeCodepoint("\xE2\x81\xA0"), ' ')); | |
| 226 all_plus_number_grouping_symbols->insert( | |
| 227 make_pair(ToUnicodeCodepoint("."), '.')); | |
| 228 all_plus_number_grouping_symbols->insert( | |
| 229 - make_pair(ToUnicodeCodepoint("."), '.')); | |
| 230 + make_pair(ToUnicodeCodepoint("\xEF\xBC\x8E"), '.')); | |
| 231 // Only the upper-case letters are added here - the lower-case versions are | |
| 232 // added programmatically. | |
| 233 alpha_mappings->insert(make_pair(ToUnicodeCodepoint("A"), '2')); | |
| 234 @@ -849,7 +849,7 @@ | |
| 235 // Note here that all NANPA formatting rules are contained by US, so we use | |
| 236 // that to format NANPA numbers. The same applies to Russian Fed regions - | |
| 237 // rules are contained by Russia. French Indian Ocean country rules are | |
| 238 - // contained by Réunion. | |
| 239 + // contained by R\xC3\xA9union. | |
| 240 string region_code; | |
| 241 GetRegionCodeForCountryCode(country_calling_code, ®ion_code); | |
| 242 if (!HasValidRegionCode(region_code, country_calling_code, | |
| 243 @@ -1015,7 +1015,7 @@ | |
| 244 // For regions that share a country calling code, the country calling code | |
| 245 // need not be dialled. This also applies when dialling within a region, so | |
| 246 // this if clause covers both these cases. | |
| 247 - // Technically this is the case for dialling from la Réunion to other | |
| 248 + // Technically this is the case for dialling from la R\xC3\xA9union to othe
r | |
| 249 // overseas departments of France (French Guiana, Martinique, Guadeloupe), | |
| 250 // but not vice versa - so we don't cover this edge case for now and for | |
| 251 // those cases return the version including country calling code. | |
| OLD | NEW |