Index: third_party/libphonenumber/patches/utf8_v186.patch |
=================================================================== |
--- third_party/libphonenumber/patches/utf8_v186.patch (revision 111991) |
+++ third_party/libphonenumber/patches/utf8_v186.patch (working copy) |
@@ -1,251 +0,0 @@ |
-Index: D:/src/src/third_party/libphonenumber/cpp/src/phonenumberutil_test.cc |
-=================================================================== |
---- phonenumberutil_test.cc (revision 186) |
-+++ phonenumberutil_test.cc (working copy) |
-@@ -1101,11 +1101,11 @@ |
- ExtractPossibleNumber("Tel:+800-345-600", &extracted_number); |
- EXPECT_EQ("+800-345-600", extracted_number); |
- // Should recognise wide digits as possible start values. |
-- ExtractPossibleNumber("023", &extracted_number); |
-- EXPECT_EQ("023", extracted_number); |
-+ ExtractPossibleNumber("\xEF\xBC\x90\xEF\xBC\x92\xEF\xBC\x93", &extracted_number); |
-+ EXPECT_EQ("\xEF\xBC\x90\xEF\xBC\x92\xEF\xBC\x93", extracted_number); |
- // Dashes are not possible start values and should be removed. |
-- ExtractPossibleNumber("Num-123", &extracted_number); |
-- EXPECT_EQ("123", extracted_number); |
-+ ExtractPossibleNumber("Num-\xEF\xBC\x91\xEF\xBC\x92\xEF\xBC\x93", &extracted_number); |
-+ EXPECT_EQ("\xEF\xBC\x91\xEF\xBC\x92\xEF\xBC\x93", extracted_number); |
- // If not possible number present, return empty string. |
- ExtractPossibleNumber("Num-....", &extracted_number); |
- EXPECT_EQ("", extracted_number); |
-@@ -1119,7 +1119,7 @@ |
- ExtractPossibleNumber("(650) 253-0000.", &extracted_number); |
- EXPECT_EQ("650) 253-0000", extracted_number); |
- // This case has a trailing RTL char. |
-- ExtractPossibleNumber("(650) 253-0000", &extracted_number); |
-+ ExtractPossibleNumber("(650) 253-0000\xE2\x80\x8F", &extracted_number); |
- EXPECT_EQ("650) 253-0000", extracted_number); |
- } |
- |
-@@ -1163,7 +1163,7 @@ |
- // This number is no longer valid. |
- EXPECT_FALSE(phone_util_.IsValidNumber(bs_number)); |
- |
-- // La Mayotte and Réunion use 'leadingDigits' to differentiate them. |
-+ // La Mayotte and R\xC3\xA9union use 'leadingDigits' to differentiate them. |
- PhoneNumber re_number; |
- re_number.set_country_code(262); |
- re_number.set_national_number(262123456ULL); |
-@@ -1631,13 +1631,13 @@ |
- EXPECT_TRUE(IsViablePhoneNumber("0800-4-PIZZA")); |
- // Only one or two digits before possible punctuation followed by more digits. |
- // The punctuation used here is the unicode character u+3000. |
-- EXPECT_TRUE(IsViablePhoneNumber("1 34")); |
-- EXPECT_FALSE(IsViablePhoneNumber("1 3+4")); |
-+ EXPECT_TRUE(IsViablePhoneNumber("1\xE3\x80\x80" "34")); |
-+ EXPECT_FALSE(IsViablePhoneNumber("1\xE3\x80\x80" "3+4")); |
- // Unicode variants of possible starting character and other allowed |
- // punctuation/digits. |
-- EXPECT_TRUE(IsViablePhoneNumber("(1) 3456789")); |
-+ EXPECT_TRUE(IsViablePhoneNumber("\xEF\xBC\x88" "1\xEF\xBC\x89\xE3\x80\x80" "3456789")); |
- // Testing a leading + is okay. |
-- EXPECT_TRUE(IsViablePhoneNumber("+1) 3456789")); |
-+ EXPECT_TRUE(IsViablePhoneNumber("+1\xEF\xBC\x89\xE3\x80\x80" "3456789")); |
- } |
- |
- TEST_F(PhoneNumberUtilTest, NormaliseRemovePunctuation) { |
-@@ -1659,13 +1659,13 @@ |
- TEST_F(PhoneNumberUtilTest, NormaliseOtherDigits) { |
- // The first digit is a full-width 2, the last digit is an Arabic-indic digit |
- // 5. |
-- string input_number("25٥"); |
-+ string input_number("\xEF\xBC\x92" "5\xD9\xA5"); |
- Normalize(&input_number); |
- static const string kExpectedOutput("255"); |
- EXPECT_EQ(kExpectedOutput, input_number) |
- << "Conversion did not correctly replace non-latin digits"; |
- // The first digit is an Eastern-Arabic 5, the latter an Eastern-Arabic 0. |
-- string eastern_arabic_input_number("۵2۰"); |
-+ string eastern_arabic_input_number("\xDB\xB5" "2\xDB\xB0"); |
- Normalize(&eastern_arabic_input_number); |
- static const string kExpectedOutput2("520"); |
- EXPECT_EQ(kExpectedOutput2, eastern_arabic_input_number) |
-@@ -2321,21 +2321,21 @@ |
- // Using a full-width plus sign. |
- test_number.Clear(); |
- EXPECT_EQ(PhoneNumberUtil::NO_PARSING_ERROR, |
-- phone_util_.Parse("+1 (650) 333-6000", |
-+ phone_util_.Parse("\xEF\xBC\x8B" "1 (650) 333-6000", |
- RegionCode::SG(), &test_number)); |
- EXPECT_EQ(us_number, test_number); |
- // The whole number, including punctuation, is here represented in full-width |
- // form. |
- test_number.Clear(); |
- EXPECT_EQ(PhoneNumberUtil::NO_PARSING_ERROR, |
-- phone_util_.Parse("+1 (650) 333-6000", |
-+ phone_util_.Parse("\xEF\xBC\x8B\xEF\xBC\x91\xE3\x80\x80\xEF\xBC\x88\xEF\xBC\x96\xEF\xBC\x95\xEF\xBC\x90\xEF\xBC\x89\xE3\x80\x80\xEF\xBC\x93\xEF\xBC\x93\xEF\xBC\x93\xEF\xBC\x8D\xEF\xBC\x96\xEF\xBC\x90\xEF\xBC\x90\xEF\xBC\x90", |
- RegionCode::SG(), &test_number)); |
- EXPECT_EQ(us_number, test_number); |
- |
- // Using the U+30FC dash. |
- test_number.Clear(); |
- EXPECT_EQ(PhoneNumberUtil::NO_PARSING_ERROR, |
-- phone_util_.Parse("+1 (650) 333ー6000", |
-+ phone_util_.Parse("\xEF\xBC\x8B\xEF\xBC\x91\xE3\x80\x80\xEF\xBC\x88\xEF\xBC\x96\xEF\xBC\x95\xEF\xBC\x90\xEF\xBC\x89\xE3\x80\x80\xEF\xBC\x93\xEF\xBC\x93\xEF\xBC\x93\xE3\x83\xBC\xEF\xBC\x96\xEF\xBC\x90\xEF\xBC\x90\xEF\xBC\x90", |
- RegionCode::SG(), &test_number)); |
- EXPECT_EQ(us_number, test_number); |
- } |
-@@ -2575,7 +2575,7 @@ |
- // Test with full-width plus. |
- result_proto.Clear(); |
- EXPECT_EQ(PhoneNumberUtil::NO_PARSING_ERROR, |
-- phone_util_.Parse("+64 3 331 6005", RegionCode::ZZ(), |
-+ phone_util_.Parse("\xEF\xBC\x8B" "64 3 331 6005", RegionCode::ZZ(), |
- &result_proto)); |
- EXPECT_EQ(nz_number, result_proto); |
- // Test with normal plus but leading characters that need to be stripped. |
-@@ -2733,7 +2733,7 @@ |
- EXPECT_EQ(us_with_extension, test_number); |
- test_number.Clear(); |
- EXPECT_EQ(PhoneNumberUtil::NO_PARSING_ERROR, |
-- phone_util_.Parse("(800) 901-3355 ,extensión 7246433", |
-+ phone_util_.Parse("(800) 901-3355 ,extensi\xC3\xB3n 7246433", |
- RegionCode::US(), |
- &test_number)); |
- EXPECT_EQ(us_with_extension, test_number); |
-@@ -2741,7 +2741,7 @@ |
- // Repeat with the small letter o with acute accent created by combining |
- // characters. |
- EXPECT_EQ(PhoneNumberUtil::NO_PARSING_ERROR, |
-- phone_util_.Parse("(800) 901-3355 ,extensión 7246433", |
-+ phone_util_.Parse("(800) 901-3355 ,extensio\xCC\x81n 7246433", |
- RegionCode::US(), |
- &test_number)); |
- EXPECT_EQ(us_with_extension, test_number); |
-Index: D:/src/src/third_party/libphonenumber/cpp/src/phonenumberutil.cc |
-=================================================================== |
---- phonenumberutil.cc (revision 186) |
-+++ phonenumberutil.cc (working copy) |
-@@ -72,7 +72,7 @@ |
- // The kPlusSign signifies the international prefix. |
- const char kPlusSign[] = "+"; |
- |
--const char kPlusChars[] = "++"; |
-+const char kPlusChars[] = "+\xEF\xBC\x8B"; |
- scoped_ptr<const reg_exp::RegularExpression> plus_chars_pattern; |
- |
- const char kRfc3966ExtnPrefix[] = ";ext="; |
-@@ -88,7 +88,7 @@ |
- |
- // Digits accepted in phone numbers. |
- // Both Arabic-Indic and Eastern Arabic-Indic are supported. |
--const char kValidDigits[] = "0-90-9٠-٩۰-۹"; |
-+const char kValidDigits[] = "0-9\xEF\xBC\x90-\xEF\xBC\x99\xD9\xA0-\xD9\xA9\xDB\xB0-\xDB\xB9"; |
- // We accept alpha characters in phone numbers, ASCII only. We store lower-case |
- // here only since our regular expressions are case-insensitive. |
- const char kValidAlpha[] = "a-z"; |
-@@ -140,7 +140,7 @@ |
- // itself. In emacs, you can use M-x unicode-what to query information about the |
- // unicode character. |
- const char kValidPunctuation[] = |
-- "-x‐-―−ー--/ ()()[].\\[\\]/~⁓∼~"; |
-+ "-x\xE2\x80\x90-\xE2\x80\x95\xE2\x88\x92\xE3\x83\xBC\xEF\xBC\x8D-\xEF\xBC\x8F \xC2\xA0\xE2\x80\x8B\xE2\x81\xA0\xE3\x80\x80()\xEF\xBC\x88\xEF\xBC\x89\xEF\xBC\xBB\xEF\xBC\xBD.\\[\\]/~\xE2\x81\x93\xE2\x88\xBC\xEF\xBD\x9E"; |
- |
- // Regular expression of viable phone numbers. This is location independent. |
- // Checks we have at least three leading digits, and only valid punctuation, |
-@@ -454,7 +454,7 @@ |
- // defined order. |
- void CreateRegularExpressions() { |
- unique_international_prefix.reset( |
-- reg_exp::CreateRegularExpression("[\\d]+(?:[~⁓∼~][\\d]+)?")); |
-+ reg_exp::CreateRegularExpression("[\\d]+(?:[~\xE2\x81\x93\xE2\x88\xBC\xEF\xBD\x9E][\\d]+)?")); |
- first_group_capturing_pattern.reset( |
- reg_exp::CreateRegularExpression("(\\$1)")); |
- carrier_code_pattern.reset( |
-@@ -476,16 +476,16 @@ |
- StrCat("[", kPlusChars, "]*(?:[", kValidPunctuation, "]*[", kValidDigits, |
- "]){3,}[", kValidAlpha, kValidPunctuation, kValidDigits, "]*"))); |
- // Canonical-equivalence doesn't seem to be an option with RE2, so we allow |
-- // two options for representing the ó - the character itself, and one in the |
-+ // two options for representing the \xC3\xB3 - the character itself, and one in the |
- // unicode decomposed form with the combining acute accent. Note that there |
- // are currently three capturing groups for the extension itself - if this |
- // number is changed, MaybeStripExtension needs to be updated. |
- const string capturing_extn_digits = StrCat("([", kValidDigits, "]{1,7})"); |
- known_extn_patterns.reset(new string( |
- StrCat(kRfc3966ExtnPrefix, capturing_extn_digits, "|" |
-- "[ \\t,]*(?:ext(?:ensi(?:ó?|ó))?n?|extn?|[,xx##~~]|" |
-- "int|int|anexo)" |
-- "[:\\..]?[ \\t,-]*", capturing_extn_digits, "#?|" |
-+ "[ \xC2\xA0\\t,]*(?:ext(?:ensi(?:o\xCC\x81?|\xC3\xB3))?n?|\xEF\xBD\x85\xEF\xBD\x98\xEF\xBD\x94\xEF\xBD\x8E?|[,x\xEF\xBD\x98#\xEF\xBC\x83~\xEF\xBD\x9E]|" |
-+ "int|\xEF\xBD\x89\xEF\xBD\x8E\xEF\xBD\x94|anexo)" |
-+ "[:\\.\xEF\xBC\x8E]?[ \xC2\xA0\\t,-]*", capturing_extn_digits, "#?|" |
- "[- ]+([", kValidDigits, "]{1,5})#"))); |
- extn_pattern.reset(reg_exp::CreateRegularExpression( |
- StrCat("(?i)(?:", *known_extn_patterns, ")$").c_str())); |
-@@ -509,35 +509,35 @@ |
- all_plus_number_grouping_symbols->insert( |
- make_pair(ToUnicodeCodepoint("-"), '-')); |
- all_plus_number_grouping_symbols->insert( |
-- make_pair(ToUnicodeCodepoint("-"), '-')); |
-+ make_pair(ToUnicodeCodepoint("\xEF\xBC\x8D"), '-')); |
- all_plus_number_grouping_symbols->insert( |
-- make_pair(ToUnicodeCodepoint("‐"), '-')); |
-+ make_pair(ToUnicodeCodepoint("\xE2\x80\x90"), '-')); |
- all_plus_number_grouping_symbols->insert( |
-- make_pair(ToUnicodeCodepoint("‑"), '-')); |
-+ make_pair(ToUnicodeCodepoint("\xE2\x80\x91"), '-')); |
- all_plus_number_grouping_symbols->insert( |
-- make_pair(ToUnicodeCodepoint("‒"), '-')); |
-+ make_pair(ToUnicodeCodepoint("\xE2\x80\x92"), '-')); |
- all_plus_number_grouping_symbols->insert( |
-- make_pair(ToUnicodeCodepoint("–"), '-')); |
-+ make_pair(ToUnicodeCodepoint("\xE2\x80\x93"), '-')); |
- all_plus_number_grouping_symbols->insert( |
-- make_pair(ToUnicodeCodepoint("—"), '-')); |
-+ make_pair(ToUnicodeCodepoint("\xE2\x80\x94"), '-')); |
- all_plus_number_grouping_symbols->insert( |
-- make_pair(ToUnicodeCodepoint("―"), '-')); |
-+ make_pair(ToUnicodeCodepoint("\xE2\x80\x95"), '-')); |
- all_plus_number_grouping_symbols->insert( |
-- make_pair(ToUnicodeCodepoint("−"), '-')); |
-+ make_pair(ToUnicodeCodepoint("\xE2\x88\x92"), '-')); |
- all_plus_number_grouping_symbols->insert( |
- make_pair(ToUnicodeCodepoint("/"), '/')); |
- all_plus_number_grouping_symbols->insert( |
-- make_pair(ToUnicodeCodepoint("/"), '/')); |
-+ make_pair(ToUnicodeCodepoint("\xEF\xBC\x8F"), '/')); |
- all_plus_number_grouping_symbols->insert( |
- make_pair(ToUnicodeCodepoint(" "), ' ')); |
- all_plus_number_grouping_symbols->insert( |
-- make_pair(ToUnicodeCodepoint(" "), ' ')); |
-+ make_pair(ToUnicodeCodepoint("\xE3\x80\x80"), ' ')); |
- all_plus_number_grouping_symbols->insert( |
-- make_pair(ToUnicodeCodepoint(""), ' ')); |
-+ make_pair(ToUnicodeCodepoint("\xE2\x81\xA0"), ' ')); |
- all_plus_number_grouping_symbols->insert( |
- make_pair(ToUnicodeCodepoint("."), '.')); |
- all_plus_number_grouping_symbols->insert( |
-- make_pair(ToUnicodeCodepoint("."), '.')); |
-+ make_pair(ToUnicodeCodepoint("\xEF\xBC\x8E"), '.')); |
- // Only the upper-case letters are added here - the lower-case versions are |
- // added programmatically. |
- alpha_mappings->insert(make_pair(ToUnicodeCodepoint("A"), '2')); |
-@@ -849,7 +849,7 @@ |
- // Note here that all NANPA formatting rules are contained by US, so we use |
- // that to format NANPA numbers. The same applies to Russian Fed regions - |
- // rules are contained by Russia. French Indian Ocean country rules are |
-- // contained by Réunion. |
-+ // contained by R\xC3\xA9union. |
- string region_code; |
- GetRegionCodeForCountryCode(country_calling_code, ®ion_code); |
- if (!HasValidRegionCode(region_code, country_calling_code, |
-@@ -1015,7 +1015,7 @@ |
- // For regions that share a country calling code, the country calling code |
- // need not be dialled. This also applies when dialling within a region, so |
- // this if clause covers both these cases. |
-- // Technically this is the case for dialling from la Réunion to other |
-+ // Technically this is the case for dialling from la R\xC3\xA9union to other |
- // overseas departments of France (French Guiana, Martinique, Guadeloupe), |
- // but not vice versa - so we don't cover this edge case for now and for |
- // those cases return the version including country calling code. |