third_party/libphonenumber/patches/utf8_v186.patch - Issue 8736001: Pull the phone library directly. Delete old version.

Side by Side Diff: third_party/libphonenumber/patches/utf8_v186.patch

Issue 8736001: Pull the phone library directly. Delete old version. (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/

Patch Set: '' Created 9 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
	(Empty)
1 Index: D:/src/src/third_party/libphonenumber/cpp/src/phonenumberutil_test.cc

2 ===================================================================

3 --- phonenumberutil_test.cc (revision 186)

4 +++ phonenumberutil_test.cc (working copy)

5 @@ -1101,11 +1101,11 @@

6 ExtractPossibleNumber("Tel:+800-345-600", &extracted_number);

7 EXPECT_EQ("+800-345-600", extracted_number);

8 // Should recognise wide digits as possible start values.

9 - ExtractPossibleNumber("０２３", &extracted_number);

10 - EXPECT_EQ("０２３", extracted_number);

11 + ExtractPossibleNumber("\xEF\xBC\x90\xEF\xBC\x92\xEF\xBC\x93", &extracted_numb er);

12 + EXPECT_EQ("\xEF\xBC\x90\xEF\xBC\x92\xEF\xBC\x93", extracted_number);

13 // Dashes are not possible start values and should be removed.

14 - ExtractPossibleNumber("Num-１２３", &extracted_number);

15 - EXPECT_EQ("１２３", extracted_number);

16 + ExtractPossibleNumber("Num-\xEF\xBC\x91\xEF\xBC\x92\xEF\xBC\x93", &extracted_ number);

17 + EXPECT_EQ("\xEF\xBC\x91\xEF\xBC\x92\xEF\xBC\x93", extracted_number);

18 // If not possible number present, return empty string.

19 ExtractPossibleNumber("Num-....", &extracted_number);

20 EXPECT_EQ("", extracted_number);

21 @@ -1119,7 +1119,7 @@

22 ExtractPossibleNumber("(650) 253-0000.", &extracted_number);

23 EXPECT_EQ("650) 253-0000", extracted_number);

24 // This case has a trailing RTL char.

25 - ExtractPossibleNumber("(650) 253-0000‏", &extracted_number);

26 + ExtractPossibleNumber("(650) 253-0000\xE2\x80\x8F", &extracted_number);

27 EXPECT_EQ("650) 253-0000", extracted_number);

28 }

29

30 @@ -1163,7 +1163,7 @@

31 // This number is no longer valid.

32 EXPECT_FALSE(phone_util_.IsValidNumber(bs_number));

33

34 - // La Mayotte and Réunion use 'leadingDigits' to differentiate them.

35 + // La Mayotte and R\xC3\xA9union use 'leadingDigits' to differentiate them.

36 PhoneNumber re_number;

37 re_number.set_country_code(262);

38 re_number.set_national_number(262123456ULL);

39 @@ -1631,13 +1631,13 @@

40 EXPECT_TRUE(IsViablePhoneNumber("0800-4-PIZZA"));

41 // Only one or two digits before possible punctuation followed by more digits .

42 // The punctuation used here is the unicode character u+3000.

43 - EXPECT_TRUE(IsViablePhoneNumber("1　34"));

44 - EXPECT_FALSE(IsViablePhoneNumber("1　3+4"));

45 + EXPECT_TRUE(IsViablePhoneNumber("1\xE3\x80\x80" "34"));

46 + EXPECT_FALSE(IsViablePhoneNumber("1\xE3\x80\x80" "3+4"));

47 // Unicode variants of possible starting character and other allowed

48 // punctuation/digits.

49 - EXPECT_TRUE(IsViablePhoneNumber("（1）　3456789"));

50 + EXPECT_TRUE(IsViablePhoneNumber("\xEF\xBC\x88" "1\xEF\xBC\x89\xE3\x80\x80" "3 456789"));

51 // Testing a leading + is okay.

52 - EXPECT_TRUE(IsViablePhoneNumber("+1）　3456789"));

53 + EXPECT_TRUE(IsViablePhoneNumber("+1\xEF\xBC\x89\xE3\x80\x80" "3456789"));

54 }

55

56 TEST_F(PhoneNumberUtilTest, NormaliseRemovePunctuation) {

57 @@ -1659,13 +1659,13 @@

58 TEST_F(PhoneNumberUtilTest, NormaliseOtherDigits) {

59 // The first digit is a full-width 2, the last digit is an Arabic-indic digit

60 // 5.

61 - string input_number("２5٥");

62 + string input_number("\xEF\xBC\x92" "5\xD9\xA5");

63 Normalize(&input_number);

64 static const string kExpectedOutput("255");

65 EXPECT_EQ(kExpectedOutput, input_number)

66 << "Conversion did not correctly replace non-latin digits";

67 // The first digit is an Eastern-Arabic 5, the latter an Eastern-Arabic 0.

68 - string eastern_arabic_input_number("۵2۰");

69 + string eastern_arabic_input_number("\xDB\xB5" "2\xDB\xB0");

70 Normalize(&eastern_arabic_input_number);

71 static const string kExpectedOutput2("520");

72 EXPECT_EQ(kExpectedOutput2, eastern_arabic_input_number)

73 @@ -2321,21 +2321,21 @@

74 // Using a full-width plus sign.

75 test_number.Clear();

76 EXPECT_EQ(PhoneNumberUtil::NO_PARSING_ERROR,

77 - phone_util_.Parse("＋1 (650) 333-6000",

78 + phone_util_.Parse("\xEF\xBC\x8B" "1 (650) 333-6000",

79 RegionCode::SG(), &test_number));

80 EXPECT_EQ(us_number, test_number);

81 // The whole number, including punctuation, is here represented in full-width

82 // form.

83 test_number.Clear();

84 EXPECT_EQ(PhoneNumberUtil::NO_PARSING_ERROR,

85 - phone_util_.Parse("＋１　（６５０）　３３３－６０００",

86 + phone_util_.Parse("\xEF\xBC\x8B\xEF\xBC\x91\xE3\x80\x80\xEF\xBC\x88 \xEF\xBC\x96\xEF\xBC\x95\xEF\xBC\x90\xEF\xBC\x89\xE3\x80\x80\xEF\xBC\x93\xEF\xBC \x93\xEF\xBC\x93\xEF\xBC\x8D\xEF\xBC\x96\xEF\xBC\x90\xEF\xBC\x90\xEF\xBC\x90",

87 RegionCode::SG(), &test_number));

88 EXPECT_EQ(us_number, test_number);

89

90 // Using the U+30FC dash.

91 test_number.Clear();

92 EXPECT_EQ(PhoneNumberUtil::NO_PARSING_ERROR,

93 - phone_util_.Parse("＋１　（６５０）　３３３ー６０００",

94 + phone_util_.Parse("\xEF\xBC\x8B\xEF\xBC\x91\xE3\x80\x80\xEF\xBC\x88 \xEF\xBC\x96\xEF\xBC\x95\xEF\xBC\x90\xEF\xBC\x89\xE3\x80\x80\xEF\xBC\x93\xEF\xBC \x93\xEF\xBC\x93\xE3\x83\xBC\xEF\xBC\x96\xEF\xBC\x90\xEF\xBC\x90\xEF\xBC\x90",

95 RegionCode::SG(), &test_number));

96 EXPECT_EQ(us_number, test_number);

97 }

98 @@ -2575,7 +2575,7 @@

99 // Test with full-width plus.

100 result_proto.Clear();

101 EXPECT_EQ(PhoneNumberUtil::NO_PARSING_ERROR,

102 - phone_util_.Parse("＋64 3 331 6005", RegionCode::ZZ(),

103 + phone_util_.Parse("\xEF\xBC\x8B" "64 3 331 6005", RegionCode::ZZ(),

104 &result_proto));

105 EXPECT_EQ(nz_number, result_proto);

106 // Test with normal plus but leading characters that need to be stripped.

107 @@ -2733,7 +2733,7 @@

108 EXPECT_EQ(us_with_extension, test_number);

109 test_number.Clear();

110 EXPECT_EQ(PhoneNumberUtil::NO_PARSING_ERROR,

111 - phone_util_.Parse("(800) 901-3355 ,extensión 7246433",

112 + phone_util_.Parse("(800) 901-3355 ,extensi\xC3\xB3n 7246433",

113 RegionCode::US(),

114 &test_number));

115 EXPECT_EQ(us_with_extension, test_number);

116 @@ -2741,7 +2741,7 @@

117 // Repeat with the small letter o with acute accent created by combining

118 // characters.

119 EXPECT_EQ(PhoneNumberUtil::NO_PARSING_ERROR,

120 - phone_util_.Parse("(800) 901-3355 ,extensión 7246433",

121 + phone_util_.Parse("(800) 901-3355 ,extensio\xCC\x81n 7246433",

122 RegionCode::US(),

123 &test_number));

124 EXPECT_EQ(us_with_extension, test_number);

125 Index: D:/src/src/third_party/libphonenumber/cpp/src/phonenumberutil.cc

126 ===================================================================

127 --- phonenumberutil.cc (revision 186)

128 +++ phonenumberutil.cc (working copy)

129 @@ -72,7 +72,7 @@

130 // The kPlusSign signifies the international prefix.

131 const char kPlusSign[] = "+";

132

133 -const char kPlusChars[] = "+＋";

134 +const char kPlusChars[] = "+\xEF\xBC\x8B";

135 scoped_ptr<const reg_exp::RegularExpression> plus_chars_pattern;

136

137 const char kRfc3966ExtnPrefix[] = ";ext=";

138 @@ -88,7 +88,7 @@

139

140 // Digits accepted in phone numbers.

141 // Both Arabic-Indic and Eastern Arabic-Indic are supported.

142 -const char kValidDigits[] = "0-9０-９٠-٩۰-۹";

143 +const char kValidDigits[] = "0-9\xEF\xBC\x90-\xEF\xBC\x99\xD9\xA0-\xD9\xA9\xDB\ xB0-\xDB\xB9";

144 // We accept alpha characters in phone numbers, ASCII only. We store lower-case

145 // here only since our regular expressions are case-insensitive.

146 const char kValidAlpha[] = "a-z";

147 @@ -140,7 +140,7 @@

148 // itself. In emacs, you can use M-x unicode-what to query information about th e

149 // unicode character.

150 const char kValidPunctuation[] =

151 - "-x‐-―−ー－-／ ⁠　()（）［］.\\[\\]/~⁓∼～";

152 + "-x\xE2\x80\x90-\xE2\x80\x95\xE2\x88\x92\xE3\x83\xBC\xEF\xBC\x8D-\xEF\xBC\x 8F \xC2\xA0\xE2\x80\x8B\xE2\x81\xA0\xE3\x80\x80()\xEF\xBC\x88\xEF\xBC\x89\xEF\xB C\xBB\xEF\xBC\xBD.\\[\\]/~\xE2\x81\x93\xE2\x88\xBC\xEF\xBD\x9E";

153

154 // Regular expression of viable phone numbers. This is location independent.

155 // Checks we have at least three leading digits, and only valid punctuation,

156 @@ -454,7 +454,7 @@

157 // defined order.

158 void CreateRegularExpressions() {

159 unique_international_prefix.reset(

160 - reg_exp::CreateRegularExpression("[\\d]+(?:[~⁓∼～][\\d]+)?"));

161 + reg_exp::CreateRegularExpression("[\\d]+(?:[~\xE2\x81\x93\xE2\x88\xBC\xEF \xBD\x9E][\\d]+)?"));

162 first_group_capturing_pattern.reset(

163 reg_exp::CreateRegularExpression("(\\$1)"));

164 carrier_code_pattern.reset(

165 @@ -476,16 +476,16 @@

166 StrCat("[", kPlusChars, "](?:[", kValidPunctuation, "][", kValidDigits,

167 "]){3,}[", kValidAlpha, kValidPunctuation, kValidDigits, "]*")));

168 // Canonical-equivalence doesn't seem to be an option with RE2, so we allow

169 - // two options for representing the ó - the character itself, and one in the

170 + // two options for representing the \xC3\xB3 - the character itself, and one in the

171 // unicode decomposed form with the combining acute accent. Note that there

172 // are currently three capturing groups for the extension itself - if this

173 // number is changed, MaybeStripExtension needs to be updated.

174 const string capturing_extn_digits = StrCat("([", kValidDigits, "]{1,7})");

175 known_extn_patterns.reset(new string(

176 StrCat(kRfc3966ExtnPrefix, capturing_extn_digits, "\|"

177 - "[ \\t,]*(?:ext(?:ensi(?:ó?\|ó))?n?\|ｅｘｔｎ?\|[,xｘ#＃~～]\|"

178 - "int\|ｉｎｔ\|anexo)"

179 - "[:\\.．]?[ \\t,-]*", capturing_extn_digits, "#?\|"

180 + "[ \xC2\xA0\\t,]*(?:ext(?:ensi(?:o\xCC\x81?\|\xC3\xB3))?n?\|\xEF\xBD \x85\xEF\xBD\x98\xEF\xBD\x94\xEF\xBD\x8E?\|[,x\xEF\xBD\x98#\xEF\xBC\x83~\xEF\xBD\ x9E]\|"

181 + "int\|\xEF\xBD\x89\xEF\xBD\x8E\xEF\xBD\x94\|anexo)"

182 + "[:\\.\xEF\xBC\x8E]?[ \xC2\xA0\\t,-]*", capturing_extn_digits, "#? \|"

183 "[- ]+([", kValidDigits, "]{1,5})#")));

184 extn_pattern.reset(reg_exp::CreateRegularExpression(

185 StrCat("(?i)(?:", *known_extn_patterns, ")$").c_str()));

186 @@ -509,35 +509,35 @@

187 all_plus_number_grouping_symbols->insert(

188 make_pair(ToUnicodeCodepoint("-"), '-'));

189 all_plus_number_grouping_symbols->insert(

190 - make_pair(ToUnicodeCodepoint("－"), '-'));

191 + make_pair(ToUnicodeCodepoint("\xEF\xBC\x8D"), '-'));

192 all_plus_number_grouping_symbols->insert(

193 - make_pair(ToUnicodeCodepoint("‐"), '-'));

194 + make_pair(ToUnicodeCodepoint("\xE2\x80\x90"), '-'));

195 all_plus_number_grouping_symbols->insert(

196 - make_pair(ToUnicodeCodepoint("‑"), '-'));

197 + make_pair(ToUnicodeCodepoint("\xE2\x80\x91"), '-'));

198 all_plus_number_grouping_symbols->insert(

199 - make_pair(ToUnicodeCodepoint("‒"), '-'));

200 + make_pair(ToUnicodeCodepoint("\xE2\x80\x92"), '-'));

201 all_plus_number_grouping_symbols->insert(

202 - make_pair(ToUnicodeCodepoint("–"), '-'));

203 + make_pair(ToUnicodeCodepoint("\xE2\x80\x93"), '-'));

204 all_plus_number_grouping_symbols->insert(

205 - make_pair(ToUnicodeCodepoint("—"), '-'));

206 + make_pair(ToUnicodeCodepoint("\xE2\x80\x94"), '-'));

207 all_plus_number_grouping_symbols->insert(

208 - make_pair(ToUnicodeCodepoint("―"), '-'));

209 + make_pair(ToUnicodeCodepoint("\xE2\x80\x95"), '-'));

210 all_plus_number_grouping_symbols->insert(

211 - make_pair(ToUnicodeCodepoint("−"), '-'));

212 + make_pair(ToUnicodeCodepoint("\xE2\x88\x92"), '-'));

213 all_plus_number_grouping_symbols->insert(

214 make_pair(ToUnicodeCodepoint("/"), '/'));

215 all_plus_number_grouping_symbols->insert(

216 - make_pair(ToUnicodeCodepoint("／"), '/'));

217 + make_pair(ToUnicodeCodepoint("\xEF\xBC\x8F"), '/'));

218 all_plus_number_grouping_symbols->insert(

219 make_pair(ToUnicodeCodepoint(" "), ' '));

220 all_plus_number_grouping_symbols->insert(

221 - make_pair(ToUnicodeCodepoint("　"), ' '));

222 + make_pair(ToUnicodeCodepoint("\xE3\x80\x80"), ' '));

223 all_plus_number_grouping_symbols->insert(

224 - make_pair(ToUnicodeCodepoint("⁠"), ' '));

225 + make_pair(ToUnicodeCodepoint("\xE2\x81\xA0"), ' '));

226 all_plus_number_grouping_symbols->insert(

227 make_pair(ToUnicodeCodepoint("."), '.'));

228 all_plus_number_grouping_symbols->insert(

229 - make_pair(ToUnicodeCodepoint("．"), '.'));

230 + make_pair(ToUnicodeCodepoint("\xEF\xBC\x8E"), '.'));

231 // Only the upper-case letters are added here - the lower-case versions are

232 // added programmatically.

233 alpha_mappings->insert(make_pair(ToUnicodeCodepoint("A"), '2'));

234 @@ -849,7 +849,7 @@

235 // Note here that all NANPA formatting rules are contained by US, so we use

236 // that to format NANPA numbers. The same applies to Russian Fed regions -

237 // rules are contained by Russia. French Indian Ocean country rules are

238 - // contained by Réunion.

239 + // contained by R\xC3\xA9union.

240 string region_code;

241 GetRegionCodeForCountryCode(country_calling_code, &region_code);

242 if (!HasValidRegionCode(region_code, country_calling_code,

243 @@ -1015,7 +1015,7 @@

244 // For regions that share a country calling code, the country calling code

245 // need not be dialled. This also applies when dialling within a region, so

246 // this if clause covers both these cases.

247 - // Technically this is the case for dialling from la Réunion to other

248 + // Technically this is the case for dialling from la R\xC3\xA9union to othe r

249 // overseas departments of France (French Guiana, Martinique, Guadeloupe),

250 // but not vice versa - so we don't cover this edge case for now and for

251 // those cases return the version including country calling code.

OLD	NEW