OLD | NEW |
(Empty) | |
| 1 // Copyright (C) 2009 Google Inc. |
| 2 // |
| 3 // Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 // you may not use this file except in compliance with the License. |
| 5 // You may obtain a copy of the License at |
| 6 // |
| 7 // http://www.apache.org/licenses/LICENSE-2.0 |
| 8 // |
| 9 // Unless required by applicable law or agreed to in writing, software |
| 10 // distributed under the License is distributed on an "AS IS" BASIS, |
| 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 // See the License for the specific language governing permissions and |
| 13 // limitations under the License. |
| 14 |
| 15 // Author: Shaopeng Jia |
| 16 // Open-sourced by: Philippe Liard |
| 17 |
| 18 #include "phonenumberutil.h" |
| 19 |
| 20 #include <algorithm> |
| 21 #include <cctype> |
| 22 #include <cstddef> |
| 23 #include <fstream> |
| 24 #include <iostream> |
| 25 #include <iterator> |
| 26 #include <map> |
| 27 #include <sstream> |
| 28 #include <utility> |
| 29 #include <vector> |
| 30 |
| 31 #include <google/protobuf/message_lite.h> |
| 32 #include <unicode/uchar.h> |
| 33 #include <unicode/utf8.h> |
| 34 |
| 35 #include "base/logging.h" |
| 36 #include "base/memory/singleton.h" |
| 37 #include "default_logger.h" |
| 38 #include "encoding_utils.h" |
| 39 #include "metadata.h" |
| 40 #include "normalize_utf8.h" |
| 41 #include "phonemetadata.pb.h" |
| 42 #include "phonenumber.h" |
| 43 #include "phonenumber.pb.h" |
| 44 #include "regexp_adapter.h" |
| 45 #include "regexp_cache.h" |
| 46 #include "region_code.h" |
| 47 #include "stl_util.h" |
| 48 #include "stringutil.h" |
| 49 #include "utf/unicodetext.h" |
| 50 #include "utf/utf.h" |
| 51 |
| 52 namespace i18n { |
| 53 namespace phonenumbers { |
| 54 |
| 55 using std::cerr; |
| 56 using std::endl; |
| 57 using std::ifstream; |
| 58 using std::make_pair; |
| 59 using std::sort; |
| 60 using std::stringstream; |
| 61 |
| 62 using google::protobuf::RepeatedPtrField; |
| 63 |
| 64 // static |
| 65 const char PhoneNumberUtil::kPlusChars[] = "+\xEF\xBC\x8B"; |
| 66 // To find out the unicode code-point of the characters below in vim, highlight |
| 67 // the character and type 'ga'. Note that the - is used to express ranges of |
| 68 // full-width punctuation below, as well as being present in the expression |
| 69 // itself. In emacs, you can use M-x unicode-what to query information about the |
| 70 // unicode character. |
| 71 // static |
| 72 const char PhoneNumberUtil::kValidPunctuation[] = |
| 73 "-x\xE2\x80\x90-\xE2\x80\x95\xE2\x88\x92\xE3\x83\xBC\xEF\xBC\x8D-\xEF\xBC\x8
F \xC2\xA0\xE2\x80\x8B\xE2\x81\xA0\xE3\x80\x80()\xEF\xBC\x88\xEF\xBC\x89\xEF\xBC
\xBB\xEF\xBC\xBD.\\[\\]/~\xE2\x81\x93\xE2\x88\xBC"; |
| 74 |
| 75 namespace { |
| 76 |
| 77 scoped_ptr<Logger> logger_; |
| 78 scoped_ptr<RegExpCache> regexp_cache; |
| 79 |
| 80 // These objects are created in the function InitializeStaticMapsAndSets. |
| 81 |
| 82 // These mappings map a character (key) to a specific digit that should replace |
| 83 // it for normalization purposes. |
| 84 scoped_ptr<map<char32, char> > alpha_mappings; |
| 85 // For performance reasons, store a map of combining alpha_mappings with ASCII |
| 86 // digits. |
| 87 scoped_ptr<map<char32, char> > alpha_phone_mappings; |
| 88 // Separate map of all symbols that we wish to retain when formatting alpha |
| 89 // numbers. This includes digits, ascii letters and number grouping symbols such |
| 90 // as "-" and " ". |
| 91 scoped_ptr<map<char32, char> > all_plus_number_grouping_symbols; |
| 92 |
| 93 // The kPlusSign signifies the international prefix. |
| 94 const char kPlusSign[] = "+"; |
| 95 |
| 96 scoped_ptr<const RegExp> plus_chars_pattern; |
| 97 |
| 98 const char kRfc3966ExtnPrefix[] = ";ext="; |
| 99 |
| 100 // Pattern that makes it easy to distinguish whether a region has a unique |
| 101 // international dialing prefix or not. If a region has a unique international |
| 102 // prefix (e.g. 011 in USA), it will be represented as a string that contains a |
| 103 // sequence of ASCII digits. If there are multiple available international |
| 104 // prefixes in a region, they will be represented as a regex string that always |
| 105 // contains character(s) other than ASCII digits. |
| 106 // Note this regex also includes tilde, which signals waiting for the tone. |
| 107 scoped_ptr<const RegExp> unique_international_prefix; |
| 108 |
| 109 const char kDigits[] = "\\p{Nd}"; |
| 110 scoped_ptr<const RegExp> digits_pattern; |
| 111 // We accept alpha characters in phone numbers, ASCII only. We store lower-case |
| 112 // here only since our regular expressions are case-insensitive. |
| 113 const char kValidAlpha[] = "a-z"; |
| 114 scoped_ptr<const RegExp> capturing_digit_pattern; |
| 115 scoped_ptr<const RegExp> capturing_ascii_digits_pattern; |
| 116 |
| 117 // Regular expression of acceptable characters that may start a phone number |
| 118 // for the purposes of parsing. This allows us to strip away meaningless |
| 119 // prefixes to phone numbers that may be mistakenly given to us. This |
| 120 // consists of digits, the plus symbol and arabic-indic digits. This does |
| 121 // not contain alpha characters, although they may be used later in the |
| 122 // number. It also does not include other punctuation, as this will be |
| 123 // stripped later during parsing and is of no information value when parsing |
| 124 // a number. The string starting with this valid character is captured. |
| 125 // This corresponds to VALID_START_CHAR in the java version. |
| 126 scoped_ptr<const string> valid_start_char; |
| 127 scoped_ptr<const RegExp> valid_start_char_pattern; |
| 128 |
| 129 // Regular expression of characters typically used to start a second phone |
| 130 // number for the purposes of parsing. This allows us to strip off parts of |
| 131 // the number that are actually the start of another number, such as for: |
| 132 // (530) 583-6985 x302/x2303 -> the second extension here makes this actually |
| 133 // two phone numbers, (530) 583-6985 x302 and (530) 583-6985 x2303. We remove |
| 134 // the second extension so that the first number is parsed correctly. The string |
| 135 // preceding this is captured. |
| 136 // This corresponds to SECOND_NUMBER_START in the java version. |
| 137 const char kCaptureUpToSecondNumberStart[] = "(.*)[\\\\/] *x"; |
| 138 scoped_ptr<const RegExp> capture_up_to_second_number_start_pattern; |
| 139 |
| 140 // Regular expression of trailing characters that we want to remove. We remove |
| 141 // all characters that are not alpha or numerical characters. The hash |
| 142 // character is retained here, as it may signify the previous block was an |
| 143 // extension. Note the capturing block at the start to capture the rest of the |
| 144 // number if this was a match. |
| 145 // This corresponds to UNWANTED_END_CHARS in the java version. |
| 146 const char kUnwantedEndChar[] = "[^\\p{N}\\p{L}#]"; |
| 147 scoped_ptr<const RegExp> unwanted_end_char_pattern; |
| 148 |
| 149 // Regular expression of groups of valid punctuation characters. |
| 150 scoped_ptr<const RegExp> separator_pattern; |
| 151 |
| 152 // Regular expression of viable phone numbers. This is location independent. |
| 153 // Checks we have at least three leading digits, and only valid punctuation, |
| 154 // alpha characters and digits in the phone number. Does not include extension |
| 155 // data. The symbol 'x' is allowed here as valid punctuation since it is often |
| 156 // used as a placeholder for carrier codes, for example in Brazilian phone |
| 157 // numbers. We also allow multiple plus-signs at the start. |
| 158 // Corresponds to the following: |
| 159 // plus_sign*([punctuation]*[digits]){3,}([punctuation]|[digits]|[alpha])* |
| 160 scoped_ptr<const string> valid_phone_number; |
| 161 |
| 162 // Default extension prefix to use when formatting. This will be put in front of |
| 163 // any extension component of the number, after the main national number is |
| 164 // formatted. For example, if you wish the default extension formatting to be " |
| 165 // extn: 3456", then you should specify " extn: " here as the default extension |
| 166 // prefix. This can be overridden by region-specific preferences. |
| 167 const char kDefaultExtnPrefix[] = " ext. "; |
| 168 |
| 169 // Regexp of all possible ways to write extensions, for use when parsing. This |
| 170 // will be run as a case-insensitive regexp match. Wide character versions are |
| 171 // also provided after each ascii version. There are three regular expressions |
| 172 // here. The first covers RFC 3966 format, where the extension is added using |
| 173 // ";ext=". The second more generic one starts with optional white space and |
| 174 // ends with an optional full stop (.), followed by zero or more spaces/tabs and |
| 175 // then the numbers themselves. The third one covers the special case of |
| 176 // American numbers where the extension is written with a hash at the end, such |
| 177 // as "- 503#". |
| 178 // Note that the only capturing groups should be around the digits that you want |
| 179 // to capture as part of the extension, or else parsing will fail! |
| 180 scoped_ptr<const string> known_extn_patterns; |
| 181 // Regexp of all known extension prefixes used by different regions followed |
| 182 // by 1 or more valid digits, for use when parsing. |
| 183 scoped_ptr<const RegExp> extn_pattern; |
| 184 |
| 185 // We append optionally the extension pattern to the end here, as a valid phone |
| 186 // number may have an extension prefix appended, followed by 1 or more digits. |
| 187 scoped_ptr<const RegExp> valid_phone_number_pattern; |
| 188 |
| 189 // We use this pattern to check if the phone number has at least three letters |
| 190 // in it - if so, then we treat it as a number where some phone-number digits |
| 191 // are represented by letters. |
| 192 scoped_ptr<const RegExp> valid_alpha_phone_pattern; |
| 193 |
| 194 scoped_ptr<const RegExp> first_group_capturing_pattern; |
| 195 |
| 196 scoped_ptr<const RegExp> carrier_code_pattern; |
| 197 |
| 198 bool LoadCompiledInMetadata(PhoneMetadataCollection* metadata) { |
| 199 if (!metadata->ParseFromArray(metadata_get(), metadata_size())) { |
| 200 cerr << "Could not parse binary data." << endl; |
| 201 return false; |
| 202 } |
| 203 return true; |
| 204 } |
| 205 |
| 206 // Returns a pointer to the description inside the metadata of the appropriate |
| 207 // type. |
| 208 const PhoneNumberDesc* GetNumberDescByType( |
| 209 const PhoneMetadata& metadata, |
| 210 PhoneNumberUtil::PhoneNumberType type) { |
| 211 switch (type) { |
| 212 case PhoneNumberUtil::PREMIUM_RATE: |
| 213 return &metadata.premium_rate(); |
| 214 case PhoneNumberUtil::TOLL_FREE: |
| 215 return &metadata.toll_free(); |
| 216 case PhoneNumberUtil::MOBILE: |
| 217 return &metadata.mobile(); |
| 218 case PhoneNumberUtil::FIXED_LINE: |
| 219 case PhoneNumberUtil::FIXED_LINE_OR_MOBILE: |
| 220 return &metadata.fixed_line(); |
| 221 case PhoneNumberUtil::SHARED_COST: |
| 222 return &metadata.shared_cost(); |
| 223 case PhoneNumberUtil::VOIP: |
| 224 return &metadata.voip(); |
| 225 case PhoneNumberUtil::PERSONAL_NUMBER: |
| 226 return &metadata.personal_number(); |
| 227 case PhoneNumberUtil::PAGER: |
| 228 return &metadata.pager(); |
| 229 case PhoneNumberUtil::UAN: |
| 230 return &metadata.uan(); |
| 231 default: |
| 232 return &metadata.general_desc(); |
| 233 } |
| 234 } |
| 235 |
| 236 // A helper function that is used by Format and FormatByPattern. |
| 237 void FormatNumberByFormat(int country_calling_code, |
| 238 PhoneNumberUtil::PhoneNumberFormat number_format, |
| 239 const string& formatted_national_number, |
| 240 const string& formatted_extension, |
| 241 string* formatted_number) { |
| 242 switch (number_format) { |
| 243 case PhoneNumberUtil::E164: |
| 244 formatted_number->assign(StrCat(kPlusSign, |
| 245 SimpleItoa(country_calling_code), |
| 246 formatted_national_number, |
| 247 formatted_extension)); |
| 248 return; |
| 249 case PhoneNumberUtil::INTERNATIONAL: |
| 250 formatted_number->assign(StrCat(kPlusSign, |
| 251 SimpleItoa(country_calling_code), |
| 252 " ", |
| 253 formatted_national_number, |
| 254 formatted_extension)); |
| 255 return; |
| 256 case PhoneNumberUtil::RFC3966: |
| 257 formatted_number->assign(StrCat(kPlusSign, |
| 258 SimpleItoa(country_calling_code), |
| 259 "-", |
| 260 formatted_national_number, |
| 261 formatted_extension)); |
| 262 return; |
| 263 case PhoneNumberUtil::NATIONAL: |
| 264 default: |
| 265 formatted_number->assign(StrCat(formatted_national_number, |
| 266 formatted_extension)); |
| 267 } |
| 268 } |
| 269 |
| 270 // The number_for_leading_digits_match is a separate parameter, because for |
| 271 // alpha numbers we want to pass in the numeric version to select the right |
| 272 // formatting rule, but then we actually apply the formatting pattern to the |
| 273 // national_number (which in this case has alpha characters in it). |
| 274 // |
| 275 // Note that carrier_code is optional - if an empty string, no carrier code |
| 276 // replacement will take place. |
| 277 void FormatAccordingToFormatsWithCarrier( |
| 278 const string& number_for_leading_digits_match, |
| 279 const RepeatedPtrField<NumberFormat>& available_formats, |
| 280 PhoneNumberUtil::PhoneNumberFormat number_format, |
| 281 const string& national_number, |
| 282 const string& carrier_code, |
| 283 string* formatted_number) { |
| 284 DCHECK(formatted_number); |
| 285 for (RepeatedPtrField<NumberFormat>::const_iterator |
| 286 it = available_formats.begin(); it != available_formats.end(); ++it) { |
| 287 int size = it->leading_digits_pattern_size(); |
| 288 if (size > 0) { |
| 289 const scoped_ptr<RegExpInput> number_copy( |
| 290 RegExpInput::Create(number_for_leading_digits_match)); |
| 291 // We always use the last leading_digits_pattern, as it is the most |
| 292 // detailed. |
| 293 if (!regexp_cache->GetRegExp(it->leading_digits_pattern(size - 1)) |
| 294 .Consume(number_copy.get())) { |
| 295 continue; |
| 296 } |
| 297 } |
| 298 const RegExp& pattern_to_match(regexp_cache->GetRegExp(it->pattern())); |
| 299 if (pattern_to_match.FullMatch(national_number)) { |
| 300 string formatting_pattern(it->format()); |
| 301 if (number_format == PhoneNumberUtil::NATIONAL && |
| 302 carrier_code.length() > 0 && |
| 303 it->domestic_carrier_code_formatting_rule().length() > 0) { |
| 304 // Replace the $CC in the formatting rule with the desired carrier code. |
| 305 string carrier_code_formatting_rule = |
| 306 it->domestic_carrier_code_formatting_rule(); |
| 307 carrier_code_pattern->Replace(&carrier_code_formatting_rule, |
| 308 carrier_code); |
| 309 first_group_capturing_pattern->Replace(&formatting_pattern, |
| 310 carrier_code_formatting_rule); |
| 311 } else { |
| 312 // Use the national prefix formatting rule instead. |
| 313 string national_prefix_formatting_rule = |
| 314 it->national_prefix_formatting_rule(); |
| 315 if (number_format == PhoneNumberUtil::NATIONAL && |
| 316 national_prefix_formatting_rule.length() > 0) { |
| 317 // Apply the national_prefix_formatting_rule as the formatting_pattern |
| 318 // contains only information on how the national significant number |
| 319 // should be formatted at this point. |
| 320 first_group_capturing_pattern->Replace( |
| 321 &formatting_pattern, national_prefix_formatting_rule); |
| 322 } |
| 323 } |
| 324 formatted_number->assign(national_number); |
| 325 pattern_to_match.GlobalReplace(formatted_number, formatting_pattern); |
| 326 return; |
| 327 } |
| 328 } |
| 329 // If no pattern above is matched, we format the number as a whole. |
| 330 formatted_number->assign(national_number); |
| 331 } |
| 332 |
| 333 // Simple wrapper of FormatAccordingToFormatsWithCarrier for the common case of |
| 334 // no carrier code. |
| 335 void FormatAccordingToFormats( |
| 336 const string& number_for_leading_digits_match, |
| 337 const RepeatedPtrField<NumberFormat>& available_formats, |
| 338 PhoneNumberUtil::PhoneNumberFormat number_format, |
| 339 const string& national_number, |
| 340 string* formatted_number) { |
| 341 DCHECK(formatted_number); |
| 342 FormatAccordingToFormatsWithCarrier(number_for_leading_digits_match, |
| 343 available_formats, number_format, |
| 344 national_number, "", formatted_number); |
| 345 } |
| 346 |
| 347 // Returns true when one national number is the suffix of the other or both are |
| 348 // the same. |
| 349 bool IsNationalNumberSuffixOfTheOther(const PhoneNumber& first_number, |
| 350 const PhoneNumber& second_number) { |
| 351 const string& first_number_national_number = |
| 352 SimpleItoa(static_cast<uint64>(first_number.national_number())); |
| 353 const string& second_number_national_number = |
| 354 SimpleItoa(static_cast<uint64>(second_number.national_number())); |
| 355 // Note that HasSuffixString returns true if the numbers are equal. |
| 356 return HasSuffixString(first_number_national_number, |
| 357 second_number_national_number) || |
| 358 HasSuffixString(second_number_national_number, |
| 359 first_number_national_number); |
| 360 } |
| 361 |
| 362 bool IsNumberMatchingDesc(const string& national_number, |
| 363 const PhoneNumberDesc& number_desc) { |
| 364 return regexp_cache->GetRegExp(number_desc.possible_number_pattern()) |
| 365 .FullMatch(national_number) && |
| 366 regexp_cache->GetRegExp(number_desc.national_number_pattern()) |
| 367 .FullMatch(national_number); |
| 368 } |
| 369 |
| 370 PhoneNumberUtil::PhoneNumberType GetNumberTypeHelper( |
| 371 const string& national_number, const PhoneMetadata& metadata) { |
| 372 const PhoneNumberDesc& general_desc = metadata.general_desc(); |
| 373 if (!general_desc.has_national_number_pattern() || |
| 374 !IsNumberMatchingDesc(national_number, general_desc)) { |
| 375 VLOG(4) << "Number type unknown - doesn't match general national number" |
| 376 << " pattern."; |
| 377 return PhoneNumberUtil::UNKNOWN; |
| 378 } |
| 379 if (IsNumberMatchingDesc(national_number, metadata.premium_rate())) { |
| 380 VLOG(4) << "Number is a premium number."; |
| 381 return PhoneNumberUtil::PREMIUM_RATE; |
| 382 } |
| 383 if (IsNumberMatchingDesc(national_number, metadata.toll_free())) { |
| 384 VLOG(4) << "Number is a toll-free number."; |
| 385 return PhoneNumberUtil::TOLL_FREE; |
| 386 } |
| 387 if (IsNumberMatchingDesc(national_number, metadata.shared_cost())) { |
| 388 VLOG(4) << "Number is a shared cost number."; |
| 389 return PhoneNumberUtil::SHARED_COST; |
| 390 } |
| 391 if (IsNumberMatchingDesc(national_number, metadata.voip())) { |
| 392 VLOG(4) << "Number is a VOIP (Voice over IP) number."; |
| 393 return PhoneNumberUtil::VOIP; |
| 394 } |
| 395 if (IsNumberMatchingDesc(national_number, metadata.personal_number())) { |
| 396 VLOG(4) << "Number is a personal number."; |
| 397 return PhoneNumberUtil::PERSONAL_NUMBER; |
| 398 } |
| 399 if (IsNumberMatchingDesc(national_number, metadata.pager())) { |
| 400 VLOG(4) << "Number is a pager number."; |
| 401 return PhoneNumberUtil::PAGER; |
| 402 } |
| 403 if (IsNumberMatchingDesc(national_number, metadata.uan())) { |
| 404 VLOG(4) << "Number is a UAN."; |
| 405 return PhoneNumberUtil::UAN; |
| 406 } |
| 407 |
| 408 bool is_fixed_line = |
| 409 IsNumberMatchingDesc(national_number, metadata.fixed_line()); |
| 410 if (is_fixed_line) { |
| 411 if (metadata.same_mobile_and_fixed_line_pattern()) { |
| 412 VLOG(4) << "Fixed-line and mobile patterns equal, number is fixed-line" |
| 413 << " or mobile"; |
| 414 return PhoneNumberUtil::FIXED_LINE_OR_MOBILE; |
| 415 } else if (IsNumberMatchingDesc(national_number, metadata.mobile())) { |
| 416 VLOG(4) << "Fixed-line and mobile patterns differ, but number is " |
| 417 << "still fixed-line or mobile"; |
| 418 return PhoneNumberUtil::FIXED_LINE_OR_MOBILE; |
| 419 } |
| 420 VLOG(4) << "Number is a fixed line number."; |
| 421 return PhoneNumberUtil::FIXED_LINE; |
| 422 } |
| 423 // Otherwise, test to see if the number is mobile. Only do this if certain |
| 424 // that the patterns for mobile and fixed line aren't the same. |
| 425 if (!metadata.same_mobile_and_fixed_line_pattern() && |
| 426 IsNumberMatchingDesc(national_number, metadata.mobile())) { |
| 427 VLOG(4) << "Number is a mobile number."; |
| 428 return PhoneNumberUtil::MOBILE; |
| 429 } |
| 430 VLOG(4) << "Number type unknown - doesn\'t match any specific number type" |
| 431 << " pattern."; |
| 432 return PhoneNumberUtil::UNKNOWN; |
| 433 } |
| 434 |
| 435 char32 ToUnicodeCodepoint(const char* unicode_char) { |
| 436 char32 codepoint; |
| 437 EncodingUtils::DecodeUTF8Char(unicode_char, &codepoint); |
| 438 return codepoint; |
| 439 } |
| 440 |
| 441 void InitializeStaticMapsAndSets() { |
| 442 // Create global objects. |
| 443 regexp_cache.reset(new RegExpCache(128)); |
| 444 all_plus_number_grouping_symbols.reset(new map<char32, char>); |
| 445 alpha_mappings.reset(new map<char32, char>); |
| 446 alpha_phone_mappings.reset(new map<char32, char>); |
| 447 |
| 448 // Punctuation that we wish to respect in alpha numbers, as they show number |
| 449 // groupings are mapped here. |
| 450 all_plus_number_grouping_symbols->insert( |
| 451 make_pair(ToUnicodeCodepoint("-"), '-')); |
| 452 all_plus_number_grouping_symbols->insert( |
| 453 make_pair(ToUnicodeCodepoint("\xEF\xBC\x8D"), '-')); |
| 454 all_plus_number_grouping_symbols->insert( |
| 455 make_pair(ToUnicodeCodepoint("\xE2\x80\x90"), '-')); |
| 456 all_plus_number_grouping_symbols->insert( |
| 457 make_pair(ToUnicodeCodepoint("\xE2\x80\x91"), '-')); |
| 458 all_plus_number_grouping_symbols->insert( |
| 459 make_pair(ToUnicodeCodepoint("\xE2\x80\x92"), '-')); |
| 460 all_plus_number_grouping_symbols->insert( |
| 461 make_pair(ToUnicodeCodepoint("\xE2\x80\x93"), '-')); |
| 462 all_plus_number_grouping_symbols->insert( |
| 463 make_pair(ToUnicodeCodepoint("\xE2\x80\x94"), '-')); |
| 464 all_plus_number_grouping_symbols->insert( |
| 465 make_pair(ToUnicodeCodepoint("\xE2\x80\x95"), '-')); |
| 466 all_plus_number_grouping_symbols->insert( |
| 467 make_pair(ToUnicodeCodepoint("\xE2\x88\x92"), '-')); |
| 468 all_plus_number_grouping_symbols->insert( |
| 469 make_pair(ToUnicodeCodepoint("/"), '/')); |
| 470 all_plus_number_grouping_symbols->insert( |
| 471 make_pair(ToUnicodeCodepoint("\xEF\xBC\x8F"), '/')); |
| 472 all_plus_number_grouping_symbols->insert( |
| 473 make_pair(ToUnicodeCodepoint(" "), ' ')); |
| 474 all_plus_number_grouping_symbols->insert( |
| 475 make_pair(ToUnicodeCodepoint("\xE3\x80\x80"), ' ')); |
| 476 all_plus_number_grouping_symbols->insert( |
| 477 make_pair(ToUnicodeCodepoint("\xE2\x81\xA0"), ' ')); |
| 478 all_plus_number_grouping_symbols->insert( |
| 479 make_pair(ToUnicodeCodepoint("."), '.')); |
| 480 all_plus_number_grouping_symbols->insert( |
| 481 make_pair(ToUnicodeCodepoint("\xEF\xBC\x8E"), '.')); |
| 482 // Only the upper-case letters are added here - the lower-case versions are |
| 483 // added programmatically. |
| 484 alpha_mappings->insert(make_pair(ToUnicodeCodepoint("A"), '2')); |
| 485 alpha_mappings->insert(make_pair(ToUnicodeCodepoint("B"), '2')); |
| 486 alpha_mappings->insert(make_pair(ToUnicodeCodepoint("C"), '2')); |
| 487 alpha_mappings->insert(make_pair(ToUnicodeCodepoint("D"), '3')); |
| 488 alpha_mappings->insert(make_pair(ToUnicodeCodepoint("E"), '3')); |
| 489 alpha_mappings->insert(make_pair(ToUnicodeCodepoint("F"), '3')); |
| 490 alpha_mappings->insert(make_pair(ToUnicodeCodepoint("G"), '4')); |
| 491 alpha_mappings->insert(make_pair(ToUnicodeCodepoint("H"), '4')); |
| 492 alpha_mappings->insert(make_pair(ToUnicodeCodepoint("I"), '4')); |
| 493 alpha_mappings->insert(make_pair(ToUnicodeCodepoint("J"), '5')); |
| 494 alpha_mappings->insert(make_pair(ToUnicodeCodepoint("K"), '5')); |
| 495 alpha_mappings->insert(make_pair(ToUnicodeCodepoint("L"), '5')); |
| 496 alpha_mappings->insert(make_pair(ToUnicodeCodepoint("M"), '6')); |
| 497 alpha_mappings->insert(make_pair(ToUnicodeCodepoint("N"), '6')); |
| 498 alpha_mappings->insert(make_pair(ToUnicodeCodepoint("O"), '6')); |
| 499 alpha_mappings->insert(make_pair(ToUnicodeCodepoint("P"), '7')); |
| 500 alpha_mappings->insert(make_pair(ToUnicodeCodepoint("Q"), '7')); |
| 501 alpha_mappings->insert(make_pair(ToUnicodeCodepoint("R"), '7')); |
| 502 alpha_mappings->insert(make_pair(ToUnicodeCodepoint("S"), '7')); |
| 503 alpha_mappings->insert(make_pair(ToUnicodeCodepoint("T"), '8')); |
| 504 alpha_mappings->insert(make_pair(ToUnicodeCodepoint("U"), '8')); |
| 505 alpha_mappings->insert(make_pair(ToUnicodeCodepoint("V"), '8')); |
| 506 alpha_mappings->insert(make_pair(ToUnicodeCodepoint("W"), '9')); |
| 507 alpha_mappings->insert(make_pair(ToUnicodeCodepoint("X"), '9')); |
| 508 alpha_mappings->insert(make_pair(ToUnicodeCodepoint("Y"), '9')); |
| 509 alpha_mappings->insert(make_pair(ToUnicodeCodepoint("Z"), '9')); |
| 510 map<char32, char> lower_case_mappings; |
| 511 map<char32, char> alpha_letters; |
| 512 for (map<char32, char>::const_iterator it = alpha_mappings->begin(); |
| 513 it != alpha_mappings->end(); |
| 514 ++it) { |
| 515 // Convert all the upper-case ASCII letters to lower-case. |
| 516 if (it->first < 128) { |
| 517 char letter_as_upper = static_cast<char>(it->first); |
| 518 char32 letter_as_lower = static_cast<char32>(tolower(letter_as_upper)); |
| 519 lower_case_mappings.insert(make_pair(letter_as_lower, it->second)); |
| 520 // Add the letters in both variants to the alpha_letters map. This just |
| 521 // pairs each letter with its upper-case representation so that it can be |
| 522 // retained when normalising alpha numbers. |
| 523 alpha_letters.insert(make_pair(letter_as_lower, letter_as_upper)); |
| 524 alpha_letters.insert(make_pair(it->first, letter_as_upper)); |
| 525 } |
| 526 } |
| 527 // In the Java version we don't insert the lower-case mappings in the map, |
| 528 // because we convert to upper case on the fly. Doing this here would involve |
| 529 // pulling in all of ICU, which we don't want to do if we don't have to. |
| 530 alpha_mappings->insert(lower_case_mappings.begin(), |
| 531 lower_case_mappings.end()); |
| 532 alpha_phone_mappings->insert(alpha_mappings->begin(), |
| 533 alpha_mappings->end()); |
| 534 all_plus_number_grouping_symbols->insert(alpha_letters.begin(), |
| 535 alpha_letters.end()); |
| 536 // Add the ASCII digits so that they don't get deleted by NormalizeHelper(). |
| 537 for (char c = '0'; c <= '9'; ++c) { |
| 538 alpha_phone_mappings->insert(make_pair(c, c)); |
| 539 all_plus_number_grouping_symbols->insert(make_pair(c, c)); |
| 540 } |
| 541 } |
| 542 |
| 543 // Normalizes a string of characters representing a phone number by replacing |
| 544 // all characters found in the accompanying map with the values therein, and |
| 545 // stripping all other characters if remove_non_matches is true. |
| 546 // Parameters: |
| 547 // number - a pointer to a string of characters representing a phone number to |
| 548 // be normalized. |
| 549 // normalization_replacements - a mapping of characters to what they should be |
| 550 // replaced by in the normalized version of the phone number |
| 551 // remove_non_matches - indicates whether characters that are not able to be |
| 552 // replaced should be stripped from the number. If this is false, they will be |
| 553 // left unchanged in the number. |
| 554 void NormalizeHelper(const map<char32, char>& normalization_replacements, |
| 555 bool remove_non_matches, |
| 556 string* number) { |
| 557 DCHECK(number); |
| 558 UnicodeText number_as_unicode; |
| 559 number_as_unicode.PointToUTF8(number->data(), number->size()); |
| 560 string normalized_number; |
| 561 char unicode_char[5]; |
| 562 for (UnicodeText::const_iterator it = number_as_unicode.begin(); |
| 563 it != number_as_unicode.end(); |
| 564 ++it) { |
| 565 map<char32, char>::const_iterator found_glyph_pair = |
| 566 normalization_replacements.find(*it); |
| 567 if (found_glyph_pair != normalization_replacements.end()) { |
| 568 normalized_number.push_back(found_glyph_pair->second); |
| 569 } else if (!remove_non_matches) { |
| 570 // Find out how long this unicode char is so we can append it all. |
| 571 int char_len = it.get_utf8(unicode_char); |
| 572 normalized_number.append(unicode_char, char_len); |
| 573 } |
| 574 // If neither of the above are true, we remove this character. |
| 575 } |
| 576 number->assign(normalized_number); |
| 577 } |
| 578 |
| 579 // Strips the IDD from the start of the number if present. Helper function used |
| 580 // by MaybeStripInternationalPrefixAndNormalize. |
| 581 bool ParsePrefixAsIdd(const RegExp& idd_pattern, string* number) { |
| 582 DCHECK(number); |
| 583 const scoped_ptr<RegExpInput> number_copy(RegExpInput::Create(*number)); |
| 584 // First attempt to strip the idd_pattern at the start, if present. We make a |
| 585 // copy so that we can revert to the original string if necessary. |
| 586 if (idd_pattern.Consume(number_copy.get())) { |
| 587 // Only strip this if the first digit after the match is not a 0, since |
| 588 // country calling codes cannot begin with 0. |
| 589 string extracted_digit; |
| 590 if (capturing_digit_pattern->PartialMatch(number_copy->ToString(), |
| 591 &extracted_digit)) { |
| 592 PhoneNumberUtil::NormalizeDigitsOnly(&extracted_digit); |
| 593 if (extracted_digit == "0") { |
| 594 return false; |
| 595 } |
| 596 } |
| 597 number->assign(number_copy->ToString()); |
| 598 return true; |
| 599 } |
| 600 return false; |
| 601 } |
| 602 |
| 603 PhoneNumberUtil::ValidationResult TestNumberLengthAgainstPattern( |
| 604 const RegExp& number_pattern, const string& number) { |
| 605 string extracted_number; |
| 606 if (number_pattern.FullMatch(number, &extracted_number)) { |
| 607 return PhoneNumberUtil::IS_POSSIBLE; |
| 608 } |
| 609 if (number_pattern.PartialMatch(number, &extracted_number)) { |
| 610 return PhoneNumberUtil::TOO_LONG; |
| 611 } else { |
| 612 return PhoneNumberUtil::TOO_SHORT; |
| 613 } |
| 614 } |
| 615 |
| 616 } // namespace |
| 617 |
| 618 void PhoneNumberUtil::SetLogger(Logger* logger) { |
| 619 Logger::set_logger_impl(logger); |
| 620 } |
| 621 |
| 622 // Private constructor. Also takes care of initialisation. |
| 623 PhoneNumberUtil::PhoneNumberUtil() |
| 624 : country_calling_code_to_region_code_map_(new vector<IntRegionsPair>()), |
| 625 nanpa_regions_(new set<string>()), |
| 626 region_to_metadata_map_(new map<string, PhoneMetadata>()) { |
| 627 logger_.reset(new StdoutLogger()); |
| 628 Logger::set_logger_impl(logger_.get()); |
| 629 PhoneMetadataCollection metadata_collection; |
| 630 if (!LoadCompiledInMetadata(&metadata_collection)) { |
| 631 LOG(DFATAL) << "Could not parse compiled-in metadata."; |
| 632 return; |
| 633 } |
| 634 // Storing data in a temporary map to make it easier to find other regions |
| 635 // that share a country calling code when inserting data. |
| 636 map<int, list<string>* > country_calling_code_to_region_map; |
| 637 for (RepeatedPtrField<PhoneMetadata>::const_iterator it = |
| 638 metadata_collection.metadata().begin(); |
| 639 it != metadata_collection.metadata().end(); |
| 640 ++it) { |
| 641 const PhoneMetadata& phone_metadata = *it; |
| 642 const string& region_code = phone_metadata.id(); |
| 643 region_to_metadata_map_->insert(make_pair(region_code, *it)); |
| 644 int country_calling_code = it->country_code(); |
| 645 map<int, list<string>*>::iterator calling_code_in_map = |
| 646 country_calling_code_to_region_map.find(country_calling_code); |
| 647 if (calling_code_in_map != country_calling_code_to_region_map.end()) { |
| 648 if (it->main_country_for_code()) { |
| 649 calling_code_in_map->second->push_front(region_code); |
| 650 } else { |
| 651 calling_code_in_map->second->push_back(region_code); |
| 652 } |
| 653 } else { |
| 654 // For most country calling codes, there will be only one region code. |
| 655 list<string>* list_with_region_code = new list<string>(); |
| 656 list_with_region_code->push_back(region_code); |
| 657 country_calling_code_to_region_map.insert( |
| 658 make_pair(country_calling_code, list_with_region_code)); |
| 659 } |
| 660 if (country_calling_code == kNanpaCountryCode) { |
| 661 nanpa_regions_->insert(region_code); |
| 662 } |
| 663 } |
| 664 |
| 665 country_calling_code_to_region_code_map_->insert( |
| 666 country_calling_code_to_region_code_map_->begin(), |
| 667 country_calling_code_to_region_map.begin(), |
| 668 country_calling_code_to_region_map.end()); |
| 669 // Sort all the pairs in ascending order according to country calling code. |
| 670 sort(country_calling_code_to_region_code_map_->begin(), |
| 671 country_calling_code_to_region_code_map_->end(), |
| 672 OrderByFirst()); |
| 673 |
| 674 InitializeStaticMapsAndSets(); |
| 675 CreateRegularExpressions(); |
| 676 } |
| 677 |
| 678 PhoneNumberUtil::~PhoneNumberUtil() { |
| 679 STLDeleteContainerPairSecondPointers( |
| 680 country_calling_code_to_region_code_map_->begin(), |
| 681 country_calling_code_to_region_code_map_->end()); |
| 682 } |
| 683 |
| 684 // Public wrapper function to get a PhoneNumberUtil instance with the default |
| 685 // metadata file. |
| 686 // static |
| 687 PhoneNumberUtil* PhoneNumberUtil::GetInstance() { |
| 688 return Singleton<PhoneNumberUtil>::get(); |
| 689 } |
| 690 |
| 691 void PhoneNumberUtil::CreateRegularExpressions() const { |
| 692 unique_international_prefix.reset(RegExp::Create( |
| 693 "[\\d]+(?:[~\xE2\x81\x93\xE2\x88\xBC\xEF\xBD\x9E][\\d]+)?")); |
| 694 // The first_group_capturing_pattern was originally set to $1 but there are |
| 695 // some countries for which the first group is not used in the national |
| 696 // pattern (e.g. Argentina) so the $1 group does not match correctly. |
| 697 // Therefore, we use \d, so that the first group actually used in the pattern |
| 698 // will be matched. |
| 699 first_group_capturing_pattern.reset(RegExp::Create("(\\$\\d)")); |
| 700 carrier_code_pattern.reset(RegExp::Create("\\$CC")); |
| 701 digits_pattern.reset(RegExp::Create(StrCat("[", kDigits, "]*"))); |
| 702 capturing_digit_pattern.reset(RegExp::Create(StrCat("([", kDigits, "])"))); |
| 703 capturing_ascii_digits_pattern.reset(RegExp::Create("(\\d+)")); |
| 704 valid_start_char.reset(new string(StrCat("[", kPlusChars, kDigits, "]"))); |
| 705 valid_start_char_pattern.reset(RegExp::Create(*valid_start_char)); |
| 706 capture_up_to_second_number_start_pattern.reset(RegExp::Create( |
| 707 kCaptureUpToSecondNumberStart)); |
| 708 unwanted_end_char_pattern.reset(RegExp::Create(kUnwantedEndChar)); |
| 709 separator_pattern.reset(RegExp::Create(StrCat("[", kValidPunctuation, "]+"))); |
| 710 valid_phone_number.reset(new string( |
| 711 StrCat("[", kPlusChars, "]*(?:[", kValidPunctuation, "]*[", kDigits, |
| 712 "]){3,}[", kValidAlpha, kValidPunctuation, kDigits, "]*"))); |
| 713 // Canonical-equivalence doesn't seem to be an option with RE2, so we allow |
| 714 // two options for representing the \xC3\xB3 - the character itself, and one i
n the |
| 715 // unicode decomposed form with the combining acute accent. Note that there |
| 716 // are currently three capturing groups for the extension itself - if this |
| 717 // number is changed, MaybeStripExtension needs to be updated. |
| 718 const string capturing_extn_digits = StrCat("([", kDigits, "]{1,7})"); |
| 719 known_extn_patterns.reset(new string( |
| 720 StrCat(kRfc3966ExtnPrefix, capturing_extn_digits, "|" |
| 721 "[ \xC2\xA0\\t,]*(?:ext(?:ensi(?:o\xCC\x81?|\xC3\xB3))?n?|\xEF\xBD\
x85\xEF\xBD\x98\xEF\xBD\x94\xEF\xBD\x8E?|[,x\xEF\xBD\x98#\xEF\xBC\x83~\xEF\xBD\x
9E]|" |
| 722 "int|\xEF\xBD\x89\xEF\xBD\x8E\xEF\xBD\x94|anexo)" |
| 723 "[:\\.\xEF\xBC\x8E]?[ \xC2\xA0\\t,-]*", capturing_extn_digits, "#?|
" |
| 724 "[- ]+([", kDigits, "]{1,5})#"))); |
| 725 extn_pattern.reset(RegExp::Create( |
| 726 StrCat("(?i)(?:", *known_extn_patterns, ")$"))); |
| 727 valid_phone_number_pattern.reset(RegExp::Create( |
| 728 StrCat("(?i)", *valid_phone_number, "(?:", *known_extn_patterns, ")?"))); |
| 729 valid_alpha_phone_pattern.reset(RegExp::Create( |
| 730 StrCat("(?i)(?:.*?[", kValidAlpha, "]){3}"))); |
| 731 plus_chars_pattern.reset(RegExp::Create(StrCat("[", kPlusChars, "]+"))); |
| 732 } |
| 733 |
| 734 const string& PhoneNumberUtil::GetExtnPatterns() const { |
| 735 return *(known_extn_patterns.get()); |
| 736 } |
| 737 |
| 738 void PhoneNumberUtil::TrimUnwantedEndChars(string* number) const { |
| 739 DCHECK(number); |
| 740 UnicodeText number_as_unicode; |
| 741 number_as_unicode.PointToUTF8(number->data(), number->size()); |
| 742 char current_char[5]; |
| 743 int len; |
| 744 UnicodeText::const_reverse_iterator reverse_it(number_as_unicode.end()); |
| 745 for (; reverse_it.base() != number_as_unicode.begin(); ++reverse_it) { |
| 746 len = reverse_it.get_utf8(current_char); |
| 747 current_char[len] = '\0'; |
| 748 if (!unwanted_end_char_pattern->FullMatch(current_char)) { |
| 749 break; |
| 750 } |
| 751 } |
| 752 |
| 753 number->assign(UnicodeText::UTF8Substring(number_as_unicode.begin(), |
| 754 reverse_it.base())); |
| 755 } |
| 756 |
| 757 void PhoneNumberUtil::GetSupportedRegions(set<string>* regions) const { |
| 758 DCHECK(regions); |
| 759 for (map<string, PhoneMetadata>::const_iterator it = |
| 760 region_to_metadata_map_->begin(); it != region_to_metadata_map_->end(); |
| 761 ++it) { |
| 762 regions->insert(it->first); |
| 763 } |
| 764 } |
| 765 |
| 766 void PhoneNumberUtil::GetNddPrefixForRegion(const string& region_code, |
| 767 bool strip_non_digits, |
| 768 string* national_prefix) const { |
| 769 DCHECK(national_prefix); |
| 770 if (!IsValidRegionCode(region_code)) { |
| 771 LOG(ERROR) << "Invalid region code provided."; |
| 772 return; |
| 773 } |
| 774 const PhoneMetadata* metadata = GetMetadataForRegion(region_code); |
| 775 national_prefix->assign(metadata->national_prefix()); |
| 776 if (strip_non_digits) { |
| 777 // Note: if any other non-numeric symbols are ever used in national |
| 778 // prefixes, these would have to be removed here as well. |
| 779 strrmm(national_prefix, "~"); |
| 780 } |
| 781 } |
| 782 |
| 783 bool PhoneNumberUtil::IsValidRegionCode(const string& region_code) const { |
| 784 return (region_to_metadata_map_->find(region_code) != |
| 785 region_to_metadata_map_->end()); |
| 786 } |
| 787 |
| 788 bool PhoneNumberUtil::HasValidRegionCode(const string& region_code, |
| 789 int country_calling_code, |
| 790 const string& number) const { |
| 791 if (!IsValidRegionCode(region_code)) { |
| 792 VLOG(1) << "Number " << number |
| 793 << " has invalid or missing country calling code (" |
| 794 << country_calling_code |
| 795 << ")"; |
| 796 return false; |
| 797 } |
| 798 return true; |
| 799 } |
| 800 |
| 801 // Returns a pointer to the phone metadata for the appropriate region. |
| 802 const PhoneMetadata* PhoneNumberUtil::GetMetadataForRegion( |
| 803 const string& region_code) const { |
| 804 map<string, PhoneMetadata>::const_iterator it = |
| 805 region_to_metadata_map_->find(region_code); |
| 806 if (it != region_to_metadata_map_->end()) { |
| 807 return &it->second; |
| 808 } |
| 809 return NULL; |
| 810 } |
| 811 |
| 812 void PhoneNumberUtil::Format(const PhoneNumber& number, |
| 813 PhoneNumberFormat number_format, |
| 814 string* formatted_number) const { |
| 815 DCHECK(formatted_number); |
| 816 int country_calling_code = number.country_code(); |
| 817 string national_significant_number; |
| 818 GetNationalSignificantNumber(number, &national_significant_number); |
| 819 if (number_format == E164) { |
| 820 // Early exit for E164 case since no formatting of the national number needs |
| 821 // to be applied. Extensions are not formatted. |
| 822 FormatNumberByFormat(country_calling_code, E164, |
| 823 national_significant_number, "", formatted_number); |
| 824 return; |
| 825 } |
| 826 // Note here that all NANPA formatting rules are contained by US, so we use |
| 827 // that to format NANPA numbers. The same applies to Russian Fed regions - |
| 828 // rules are contained by Russia. French Indian Ocean country rules are |
| 829 // contained by R\xC3\xA9union. |
| 830 string region_code; |
| 831 GetRegionCodeForCountryCode(country_calling_code, ®ion_code); |
| 832 if (!HasValidRegionCode(region_code, country_calling_code, |
| 833 national_significant_number)) { |
| 834 formatted_number->assign(national_significant_number); |
| 835 return; |
| 836 } |
| 837 string formatted_extension; |
| 838 MaybeGetFormattedExtension(number, region_code, number_format, |
| 839 &formatted_extension); |
| 840 string formatted_national_number; |
| 841 FormatNationalNumber(national_significant_number, region_code, number_format, |
| 842 &formatted_national_number); |
| 843 FormatNumberByFormat(country_calling_code, number_format, |
| 844 formatted_national_number, |
| 845 formatted_extension, formatted_number); |
| 846 } |
| 847 |
| 848 void PhoneNumberUtil::FormatByPattern( |
| 849 const PhoneNumber& number, |
| 850 PhoneNumberFormat number_format, |
| 851 const RepeatedPtrField<NumberFormat>& user_defined_formats, |
| 852 string* formatted_number) const { |
| 853 DCHECK(formatted_number); |
| 854 int country_calling_code = number.country_code(); |
| 855 // Note GetRegionCodeForCountryCode() is used because formatting information |
| 856 // for regions which share a country calling code is contained by only one |
| 857 // region for performance reasons. For example, for NANPA regions it will be |
| 858 // contained in the metadata for US. |
| 859 string region_code; |
| 860 GetRegionCodeForCountryCode(country_calling_code, ®ion_code); |
| 861 string national_significant_number; |
| 862 GetNationalSignificantNumber(number, &national_significant_number); |
| 863 if (!HasValidRegionCode(region_code, country_calling_code, |
| 864 national_significant_number)) { |
| 865 formatted_number->assign(national_significant_number); |
| 866 return; |
| 867 } |
| 868 RepeatedPtrField<NumberFormat> user_defined_formats_copy; |
| 869 for (RepeatedPtrField<NumberFormat>::const_iterator it = |
| 870 user_defined_formats.begin(); |
| 871 it != user_defined_formats.end(); |
| 872 ++it) { |
| 873 string national_prefix_formatting_rule( |
| 874 it->national_prefix_formatting_rule()); |
| 875 if (!national_prefix_formatting_rule.empty()) { |
| 876 const string& national_prefix = |
| 877 GetMetadataForRegion(region_code)->national_prefix(); |
| 878 NumberFormat* num_format_copy = user_defined_formats_copy.Add(); |
| 879 num_format_copy->MergeFrom(*it); |
| 880 if (!national_prefix.empty()) { |
| 881 // Replace $NP with national prefix and $FG with the first group ($1). |
| 882 GlobalReplaceSubstring("$NP", national_prefix, |
| 883 &national_prefix_formatting_rule); |
| 884 GlobalReplaceSubstring("$FG", "$1", |
| 885 &national_prefix_formatting_rule); |
| 886 num_format_copy->set_national_prefix_formatting_rule( |
| 887 national_prefix_formatting_rule); |
| 888 } else { |
| 889 // We don't want to have a rule for how to format the national prefix if |
| 890 // there isn't one. |
| 891 num_format_copy->clear_national_prefix_formatting_rule(); |
| 892 } |
| 893 } else { |
| 894 user_defined_formats_copy.Add()->MergeFrom(*it); |
| 895 } |
| 896 } |
| 897 string formatted_number_without_extension; |
| 898 FormatAccordingToFormats(national_significant_number, |
| 899 user_defined_formats_copy, |
| 900 number_format, national_significant_number, |
| 901 &formatted_number_without_extension); |
| 902 string formatted_extension; |
| 903 MaybeGetFormattedExtension(number, region_code, NATIONAL, |
| 904 &formatted_extension); |
| 905 FormatNumberByFormat(country_calling_code, number_format, |
| 906 formatted_number_without_extension, formatted_extension, |
| 907 formatted_number); |
| 908 } |
| 909 |
| 910 void PhoneNumberUtil::FormatNationalNumberWithCarrierCode( |
| 911 const PhoneNumber& number, |
| 912 const string& carrier_code, |
| 913 string* formatted_number) const { |
| 914 int country_calling_code = number.country_code(); |
| 915 string national_significant_number; |
| 916 GetNationalSignificantNumber(number, &national_significant_number); |
| 917 // Note GetRegionCodeForCountryCode() is used because formatting information |
| 918 // for regions which share a country calling code is contained by only one |
| 919 // region for performance reasons. For example, for NANPA regions it will be |
| 920 // contained in the metadata for US. |
| 921 string region_code; |
| 922 GetRegionCodeForCountryCode(country_calling_code, ®ion_code); |
| 923 if (!HasValidRegionCode(region_code, country_calling_code, |
| 924 national_significant_number)) { |
| 925 formatted_number->assign(national_significant_number); |
| 926 } |
| 927 string formatted_extension; |
| 928 MaybeGetFormattedExtension(number, region_code, NATIONAL, |
| 929 &formatted_extension); |
| 930 string formatted_national_number; |
| 931 FormatNationalNumberWithCarrier(national_significant_number, region_code, |
| 932 NATIONAL, carrier_code, |
| 933 &formatted_national_number); |
| 934 FormatNumberByFormat(country_calling_code, NATIONAL, |
| 935 formatted_national_number, formatted_extension, |
| 936 formatted_number); |
| 937 } |
| 938 |
| 939 void PhoneNumberUtil::FormatNationalNumberWithPreferredCarrierCode( |
| 940 const PhoneNumber& number, |
| 941 const string& fallback_carrier_code, |
| 942 string* formatted_number) const { |
| 943 FormatNationalNumberWithCarrierCode( |
| 944 number, |
| 945 number.has_preferred_domestic_carrier_code() |
| 946 ? number.preferred_domestic_carrier_code() |
| 947 : fallback_carrier_code, |
| 948 formatted_number); |
| 949 } |
| 950 |
| 951 void PhoneNumberUtil::FormatOutOfCountryCallingNumber( |
| 952 const PhoneNumber& number, |
| 953 const string& calling_from, |
| 954 string* formatted_number) const { |
| 955 DCHECK(formatted_number); |
| 956 if (!IsValidRegionCode(calling_from)) { |
| 957 VLOG(1) << "Trying to format number from invalid region. International" |
| 958 << " formatting applied."; |
| 959 Format(number, INTERNATIONAL, formatted_number); |
| 960 return; |
| 961 } |
| 962 int country_code = number.country_code(); |
| 963 string region_code; |
| 964 GetRegionCodeForCountryCode(country_code, ®ion_code); |
| 965 string national_significant_number; |
| 966 GetNationalSignificantNumber(number, &national_significant_number); |
| 967 if (!HasValidRegionCode(region_code, country_code, |
| 968 national_significant_number)) { |
| 969 formatted_number->assign(national_significant_number); |
| 970 return; |
| 971 } |
| 972 if (country_code == kNanpaCountryCode) { |
| 973 if (IsNANPACountry(calling_from)) { |
| 974 // For NANPA regions, return the national format for these regions but |
| 975 // prefix it with the country calling code. |
| 976 string national_number; |
| 977 Format(number, NATIONAL, &national_number); |
| 978 formatted_number->assign(StrCat(country_code, " ", national_number)); |
| 979 return; |
| 980 } |
| 981 } else if (country_code == GetCountryCodeForRegion(calling_from)) { |
| 982 // If neither region is a NANPA region, then we check to see if the |
| 983 // country calling code of the number and the country calling code of the |
| 984 // region we are calling from are the same. |
| 985 // For regions that share a country calling code, the country calling code |
| 986 // need not be dialled. This also applies when dialling within a region, so |
| 987 // this if clause covers both these cases. |
| 988 // Technically this is the case for dialling from la R\xC3\xA9union to other |
| 989 // overseas departments of France (French Guiana, Martinique, Guadeloupe), |
| 990 // but not vice versa - so we don't cover this edge case for now and for |
| 991 // those cases return the version including country calling code. |
| 992 // Details here: |
| 993 // http://www.petitfute.com/voyage/225-info-pratiques-reunion |
| 994 Format(number, NATIONAL, formatted_number); |
| 995 return; |
| 996 } |
| 997 string formatted_national_number; |
| 998 FormatNationalNumber(national_significant_number, region_code, INTERNATIONAL, |
| 999 &formatted_national_number); |
| 1000 const PhoneMetadata* metadata = GetMetadataForRegion(calling_from); |
| 1001 const string& international_prefix = metadata->international_prefix(); |
| 1002 string formatted_extension; |
| 1003 MaybeGetFormattedExtension(number, region_code, INTERNATIONAL, |
| 1004 &formatted_extension); |
| 1005 // For regions that have multiple international prefixes, the international |
| 1006 // format of the number is returned, unless there is a preferred international |
| 1007 // prefix. |
| 1008 const string international_prefix_for_formatting( |
| 1009 unique_international_prefix->FullMatch(international_prefix) |
| 1010 ? international_prefix |
| 1011 : metadata->preferred_international_prefix()); |
| 1012 if (!international_prefix_for_formatting.empty()) { |
| 1013 formatted_number->assign( |
| 1014 StrCat(international_prefix_for_formatting, " ", country_code, " ", |
| 1015 formatted_national_number, formatted_extension)); |
| 1016 } else { |
| 1017 FormatNumberByFormat(country_code, INTERNATIONAL, formatted_national_number, |
| 1018 formatted_extension, formatted_number); |
| 1019 } |
| 1020 } |
| 1021 |
| 1022 void PhoneNumberUtil::FormatInOriginalFormat(const PhoneNumber& number, |
| 1023 const string& region_calling_from, |
| 1024 string* formatted_number) const { |
| 1025 DCHECK(formatted_number); |
| 1026 |
| 1027 if (!number.has_country_code_source()) { |
| 1028 Format(number, NATIONAL, formatted_number); |
| 1029 return; |
| 1030 } |
| 1031 switch (number.country_code_source()) { |
| 1032 case PhoneNumber::FROM_NUMBER_WITH_PLUS_SIGN: |
| 1033 Format(number, INTERNATIONAL, formatted_number); |
| 1034 return; |
| 1035 case PhoneNumber::FROM_NUMBER_WITH_IDD: |
| 1036 FormatOutOfCountryCallingNumber(number, region_calling_from, |
| 1037 formatted_number); |
| 1038 return; |
| 1039 case PhoneNumber::FROM_NUMBER_WITHOUT_PLUS_SIGN: |
| 1040 Format(number, INTERNATIONAL, formatted_number); |
| 1041 formatted_number->erase(formatted_number->begin()); |
| 1042 return; |
| 1043 case PhoneNumber::FROM_DEFAULT_COUNTRY: |
| 1044 default: |
| 1045 Format(number, NATIONAL, formatted_number); |
| 1046 } |
| 1047 } |
| 1048 |
| 1049 void PhoneNumberUtil::FormatOutOfCountryKeepingAlphaChars( |
| 1050 const PhoneNumber& number, |
| 1051 const string& calling_from, |
| 1052 string* formatted_number) const { |
| 1053 // If there is no raw input, then we can't keep alpha characters because there |
| 1054 // aren't any. In this case, we return FormatOutOfCountryCallingNumber. |
| 1055 if (number.raw_input().empty()) { |
| 1056 FormatOutOfCountryCallingNumber(number, calling_from, formatted_number); |
| 1057 return; |
| 1058 } |
| 1059 string region_code; |
| 1060 GetRegionCodeForCountryCode(number.country_code(), ®ion_code); |
| 1061 if (!HasValidRegionCode(region_code, number.country_code(), |
| 1062 number.raw_input())) { |
| 1063 formatted_number->assign(number.raw_input()); |
| 1064 return; |
| 1065 } |
| 1066 // Strip any prefix such as country calling code, IDD, that was present. We do |
| 1067 // this by comparing the number in raw_input with the parsed number. |
| 1068 string raw_input_copy(number.raw_input()); |
| 1069 // Normalize punctuation. We retain number grouping symbols such as " " only. |
| 1070 NormalizeHelper(*all_plus_number_grouping_symbols, true, &raw_input_copy); |
| 1071 // Now we trim everything before the first three digits in the parsed number. |
| 1072 // We choose three because all valid alpha numbers have 3 digits at the start |
| 1073 // - if it does not, then we don't trim anything at all. Similarly, if the |
| 1074 // national number was less than three digits, we don't trim anything at all. |
| 1075 string national_number; |
| 1076 GetNationalSignificantNumber(number, &national_number); |
| 1077 if (national_number.length() > 3) { |
| 1078 size_t first_national_number_digit = |
| 1079 raw_input_copy.find(national_number.substr(0, 3)); |
| 1080 if (first_national_number_digit != string::npos) { |
| 1081 raw_input_copy = raw_input_copy.substr(first_national_number_digit); |
| 1082 } |
| 1083 } |
| 1084 const PhoneMetadata* metadata = GetMetadataForRegion(calling_from); |
| 1085 if (number.country_code() == kNanpaCountryCode) { |
| 1086 if (IsNANPACountry(calling_from)) { |
| 1087 formatted_number->assign(StrCat(number.country_code(), " ", |
| 1088 raw_input_copy)); |
| 1089 return; |
| 1090 } |
| 1091 } else if (number.country_code() == GetCountryCodeForRegion(calling_from)) { |
| 1092 // Here we copy the formatting rules so we can modify the pattern we expect |
| 1093 // to match against. |
| 1094 RepeatedPtrField<NumberFormat> available_formats = |
| 1095 metadata->number_format(); |
| 1096 for (RepeatedPtrField<NumberFormat>::iterator |
| 1097 it = available_formats.begin(); it != available_formats.end(); ++it) { |
| 1098 // The first group is the first group of digits that the user determined. |
| 1099 it->set_pattern("(\\d+)(.*)"); |
| 1100 // Here we just concatenate them back together after the national prefix |
| 1101 // has been fixed. |
| 1102 it->set_format("$1$2"); |
| 1103 } |
| 1104 // Now we format using these patterns instead of the default pattern, but |
| 1105 // with the national prefix prefixed if necessary, by choosing the format |
| 1106 // rule based on the leading digits present in the unformatted national |
| 1107 // number. |
| 1108 // This will not work in the cases where the pattern (and not the |
| 1109 // leading digits) decide whether a national prefix needs to be used, since |
| 1110 // we have overridden the pattern to match anything, but that is not the |
| 1111 // case in the metadata to date. |
| 1112 FormatAccordingToFormats(national_number, available_formats, |
| 1113 NATIONAL, raw_input_copy, formatted_number); |
| 1114 return; |
| 1115 } |
| 1116 |
| 1117 const string& international_prefix = metadata->international_prefix(); |
| 1118 // For regions that have multiple international prefixes, the international |
| 1119 // format of the number is returned, unless there is a preferred international |
| 1120 // prefix. |
| 1121 const string international_prefix_for_formatting( |
| 1122 unique_international_prefix->FullMatch(international_prefix) |
| 1123 ? international_prefix |
| 1124 : metadata->preferred_international_prefix()); |
| 1125 if (!international_prefix_for_formatting.empty()) { |
| 1126 formatted_number->assign( |
| 1127 StrCat(international_prefix_for_formatting, " ", number.country_code(), |
| 1128 " ", raw_input_copy)); |
| 1129 } else { |
| 1130 FormatNumberByFormat(number.country_code(), INTERNATIONAL, raw_input_copy, |
| 1131 "", formatted_number); |
| 1132 } |
| 1133 } |
| 1134 |
| 1135 void PhoneNumberUtil::FormatNationalNumber( |
| 1136 const string& number, |
| 1137 const string& region_code, |
| 1138 PhoneNumberFormat number_format, |
| 1139 string* formatted_number) const { |
| 1140 DCHECK(formatted_number); |
| 1141 FormatNationalNumberWithCarrier(number, region_code, number_format, "", |
| 1142 formatted_number); |
| 1143 } |
| 1144 |
| 1145 // Note in some regions, the national number can be written in two completely |
| 1146 // different ways depending on whether it forms part of the NATIONAL format or |
| 1147 // INTERNATIONAL format. The number_format parameter here is used to specify |
| 1148 // which format to use for those cases. If a carrier_code is specified, this |
| 1149 // will be inserted into the formatted string to replace $CC. |
| 1150 void PhoneNumberUtil::FormatNationalNumberWithCarrier( |
| 1151 const string& number, |
| 1152 const string& region_code, |
| 1153 PhoneNumberFormat number_format, |
| 1154 const string& carrier_code, |
| 1155 string* formatted_number) const { |
| 1156 DCHECK(formatted_number); |
| 1157 const PhoneMetadata* metadata = GetMetadataForRegion(region_code); |
| 1158 // When the intl_number_formats exists, we use that to format national number |
| 1159 // for the INTERNATIONAL format instead of using the number_formats. |
| 1160 const RepeatedPtrField<NumberFormat> available_formats = |
| 1161 (metadata->intl_number_format_size() == 0 || number_format == NATIONAL) |
| 1162 ? metadata->number_format() |
| 1163 : metadata->intl_number_format(); |
| 1164 FormatAccordingToFormatsWithCarrier(number, available_formats, number_format, |
| 1165 number, carrier_code, formatted_number); |
| 1166 if (number_format == RFC3966) { |
| 1167 // Replace all separators with a "-". |
| 1168 separator_pattern->GlobalReplace(formatted_number, "-"); |
| 1169 } |
| 1170 } |
| 1171 |
| 1172 // Gets the formatted extension of a phone number, if the phone number had an |
| 1173 // extension specified. If not, it returns an empty string. |
| 1174 void PhoneNumberUtil::MaybeGetFormattedExtension( |
| 1175 const PhoneNumber& number, |
| 1176 const string& region_code, |
| 1177 PhoneNumberFormat number_format, |
| 1178 string* extension) const { |
| 1179 DCHECK(extension); |
| 1180 if (!number.has_extension() || number.extension().length() == 0) { |
| 1181 extension->assign(""); |
| 1182 } else { |
| 1183 if (number_format == RFC3966) { |
| 1184 StrAppend(extension, kRfc3966ExtnPrefix, number.extension()); |
| 1185 return; |
| 1186 } |
| 1187 FormatExtension(number.extension(), region_code, extension); |
| 1188 } |
| 1189 } |
| 1190 |
| 1191 // Formats the extension part of the phone number by prefixing it with the |
| 1192 // appropriate extension prefix. This will be the default extension prefix, |
| 1193 // unless overridden by a preferred extension prefix for this region. |
| 1194 void PhoneNumberUtil::FormatExtension(const string& extension_digits, |
| 1195 const string& region_code, |
| 1196 string* extension) const { |
| 1197 DCHECK(extension); |
| 1198 const PhoneMetadata* metadata = GetMetadataForRegion(region_code); |
| 1199 if (metadata->has_preferred_extn_prefix()) { |
| 1200 extension->assign(StrCat(metadata->preferred_extn_prefix(), |
| 1201 extension_digits)); |
| 1202 } else { |
| 1203 extension->assign(StrCat(kDefaultExtnPrefix, extension_digits)); |
| 1204 } |
| 1205 } |
| 1206 |
| 1207 bool PhoneNumberUtil::IsNANPACountry(const string& region_code) const { |
| 1208 return nanpa_regions_->find(region_code) != nanpa_regions_->end(); |
| 1209 } |
| 1210 |
| 1211 // Returns the region codes that matches the specific country calling code. In |
| 1212 // the case of no region code being found, region_codes will be left empty. |
| 1213 void PhoneNumberUtil::GetRegionCodesForCountryCallingCode( |
| 1214 int country_calling_code, |
| 1215 list<string>* region_codes) const { |
| 1216 DCHECK(region_codes); |
| 1217 // Create a IntRegionsPair with the country_code passed in, and use it to |
| 1218 // locate the pair with the same country_code in the sorted vector. |
| 1219 IntRegionsPair target_pair; |
| 1220 target_pair.first = country_calling_code; |
| 1221 typedef vector<IntRegionsPair>::const_iterator ConstIterator; |
| 1222 pair<ConstIterator, ConstIterator> range = equal_range( |
| 1223 country_calling_code_to_region_code_map_->begin(), |
| 1224 country_calling_code_to_region_code_map_->end(), |
| 1225 target_pair, OrderByFirst()); |
| 1226 if (range.first != range.second) { |
| 1227 region_codes->insert(region_codes->begin(), |
| 1228 range.first->second->begin(), |
| 1229 range.first->second->end()); |
| 1230 } |
| 1231 } |
| 1232 |
| 1233 // Returns the region code that matches the specific country calling code. In |
| 1234 // the case of no region code being found, ZZ will be returned. |
| 1235 void PhoneNumberUtil::GetRegionCodeForCountryCode( |
| 1236 int country_calling_code, |
| 1237 string* region_code) const { |
| 1238 DCHECK(region_code); |
| 1239 list<string> region_codes; |
| 1240 |
| 1241 GetRegionCodesForCountryCallingCode(country_calling_code, ®ion_codes); |
| 1242 *region_code = (region_codes.size() > 0) |
| 1243 ? region_codes.front() : RegionCode::GetUnknown(); |
| 1244 } |
| 1245 |
| 1246 void PhoneNumberUtil::GetRegionCodeForNumber(const PhoneNumber& number, |
| 1247 string* region_code) const { |
| 1248 DCHECK(region_code); |
| 1249 int country_calling_code = number.country_code(); |
| 1250 list<string> region_codes; |
| 1251 GetRegionCodesForCountryCallingCode(country_calling_code, ®ion_codes); |
| 1252 if (region_codes.size() == 0) { |
| 1253 string number_string; |
| 1254 GetNationalSignificantNumber(number, &number_string); |
| 1255 LOG(WARNING) << "Missing/invalid country calling code (" |
| 1256 << country_calling_code |
| 1257 << ") for number " << number_string; |
| 1258 *region_code = RegionCode::GetUnknown(); |
| 1259 return; |
| 1260 } |
| 1261 if (region_codes.size() == 1) { |
| 1262 *region_code = region_codes.front(); |
| 1263 } else { |
| 1264 GetRegionCodeForNumberFromRegionList(number, region_codes, region_code); |
| 1265 } |
| 1266 } |
| 1267 |
| 1268 void PhoneNumberUtil::GetRegionCodeForNumberFromRegionList( |
| 1269 const PhoneNumber& number, const list<string>& region_codes, |
| 1270 string* region_code) const { |
| 1271 DCHECK(region_code); |
| 1272 string national_number; |
| 1273 GetNationalSignificantNumber(number, &national_number); |
| 1274 for (list<string>::const_iterator it = region_codes.begin(); |
| 1275 it != region_codes.end(); ++it) { |
| 1276 const PhoneMetadata* metadata = GetMetadataForRegion(*it); |
| 1277 if (metadata->has_leading_digits()) { |
| 1278 const scoped_ptr<RegExpInput> number( |
| 1279 RegExpInput::Create(national_number)); |
| 1280 if (regexp_cache->GetRegExp(metadata->leading_digits()).Consume( |
| 1281 number.get())) { |
| 1282 *region_code = *it; |
| 1283 return; |
| 1284 } |
| 1285 } else if (GetNumberTypeHelper(national_number, *metadata) != UNKNOWN) { |
| 1286 *region_code = *it; |
| 1287 return; |
| 1288 } |
| 1289 } |
| 1290 *region_code = RegionCode::GetUnknown(); |
| 1291 } |
| 1292 |
| 1293 int PhoneNumberUtil::GetCountryCodeForRegion(const string& region_code) const { |
| 1294 if (!IsValidRegionCode(region_code)) { |
| 1295 LOG(ERROR) << "Invalid or unknown region code provided."; |
| 1296 return 0; |
| 1297 } |
| 1298 const PhoneMetadata* metadata = GetMetadataForRegion(region_code); |
| 1299 return metadata->country_code(); |
| 1300 } |
| 1301 |
| 1302 // Gets a valid fixed-line number for the specified region_code. Returns false |
| 1303 // if the country was unknown or if no number exists. |
| 1304 bool PhoneNumberUtil::GetExampleNumber(const string& region_code, |
| 1305 PhoneNumber* number) const { |
| 1306 DCHECK(number); |
| 1307 return GetExampleNumberForType(region_code, |
| 1308 FIXED_LINE, |
| 1309 number); |
| 1310 } |
| 1311 |
| 1312 // Gets a valid number for the specified region_code and type. Returns false if |
| 1313 // the country was unknown or if no number exists. |
| 1314 bool PhoneNumberUtil::GetExampleNumberForType( |
| 1315 const string& region_code, |
| 1316 PhoneNumberUtil::PhoneNumberType type, |
| 1317 PhoneNumber* number) const { |
| 1318 DCHECK(number); |
| 1319 if (!IsValidRegionCode(region_code)) { |
| 1320 LOG(WARNING) << "Invalid or unknown region code provided."; |
| 1321 return false; |
| 1322 } |
| 1323 const PhoneMetadata* region_metadata = GetMetadataForRegion(region_code); |
| 1324 const PhoneNumberDesc* description = |
| 1325 GetNumberDescByType(*region_metadata, type); |
| 1326 if (description && description->has_example_number()) { |
| 1327 return (Parse(description->example_number(), |
| 1328 region_code, |
| 1329 number) == NO_PARSING_ERROR); |
| 1330 } |
| 1331 return false; |
| 1332 } |
| 1333 |
| 1334 PhoneNumberUtil::ErrorType PhoneNumberUtil::Parse(const string& number_to_parse, |
| 1335 const string& default_region, |
| 1336 PhoneNumber* number) const { |
| 1337 DCHECK(number); |
| 1338 return ParseHelper(number_to_parse, default_region, false, true, number); |
| 1339 } |
| 1340 |
| 1341 PhoneNumberUtil::ErrorType PhoneNumberUtil::ParseAndKeepRawInput( |
| 1342 const string& number_to_parse, |
| 1343 const string& default_region, |
| 1344 PhoneNumber* number) const { |
| 1345 DCHECK(number); |
| 1346 return ParseHelper(number_to_parse, default_region, true, true, number); |
| 1347 } |
| 1348 |
| 1349 // Checks to see that the region code used is valid, or if it is not valid, that |
| 1350 // the number to parse starts with a + symbol so that we can attempt to infer |
| 1351 // the country from the number. Returns false if it cannot use the region |
| 1352 // provided and the region cannot be inferred. |
| 1353 bool PhoneNumberUtil::CheckRegionForParsing( |
| 1354 const string& number_to_parse, |
| 1355 const string& default_region) const { |
| 1356 if (!IsValidRegionCode(default_region) && !number_to_parse.empty()) { |
| 1357 const scoped_ptr<RegExpInput> number(RegExpInput::Create(number_to_parse)); |
| 1358 if (!plus_chars_pattern->Consume(number.get())) { |
| 1359 return false; |
| 1360 } |
| 1361 } |
| 1362 return true; |
| 1363 } |
| 1364 |
| 1365 PhoneNumberUtil::ErrorType PhoneNumberUtil::ParseHelper( |
| 1366 const string& number_to_parse, |
| 1367 const string& default_region, |
| 1368 bool keep_raw_input, |
| 1369 bool check_region, |
| 1370 PhoneNumber* phone_number) const { |
| 1371 DCHECK(phone_number); |
| 1372 // Extract a possible number from the string passed in (this strips leading |
| 1373 // characters that could not be the start of a phone number.) |
| 1374 string national_number; |
| 1375 ExtractPossibleNumber(number_to_parse, &national_number); |
| 1376 if (!IsViablePhoneNumber(national_number)) { |
| 1377 VLOG(2) << "The string supplied did not seem to be a phone number."; |
| 1378 return NOT_A_NUMBER; |
| 1379 } |
| 1380 |
| 1381 if (check_region && |
| 1382 !CheckRegionForParsing(national_number, default_region)) { |
| 1383 VLOG(1) << "Missing or invalid default country."; |
| 1384 return INVALID_COUNTRY_CODE_ERROR; |
| 1385 } |
| 1386 PhoneNumber temp_number; |
| 1387 if (keep_raw_input) { |
| 1388 temp_number.set_raw_input(number_to_parse); |
| 1389 } |
| 1390 // Attempt to parse extension first, since it doesn't require country-specific |
| 1391 // data and we want to have the non-normalised number here. |
| 1392 string extension; |
| 1393 MaybeStripExtension(&national_number, &extension); |
| 1394 if (!extension.empty()) { |
| 1395 temp_number.set_extension(extension); |
| 1396 } |
| 1397 const PhoneMetadata* country_metadata = GetMetadataForRegion(default_region); |
| 1398 // Check to see if the number is given in international format so we know |
| 1399 // whether this number is from the default country or not. |
| 1400 string normalized_national_number(national_number); |
| 1401 ErrorType country_code_error = |
| 1402 MaybeExtractCountryCode(country_metadata, keep_raw_input, |
| 1403 &normalized_national_number, &temp_number); |
| 1404 int country_code = temp_number.country_code(); |
| 1405 if (country_code_error != NO_PARSING_ERROR) { |
| 1406 return country_code_error; |
| 1407 } |
| 1408 if (country_code != 0) { |
| 1409 string phone_number_region; |
| 1410 GetRegionCodeForCountryCode(country_code, &phone_number_region); |
| 1411 if (phone_number_region != default_region) { |
| 1412 country_metadata = GetMetadataForRegion(phone_number_region); |
| 1413 } |
| 1414 } else if (country_metadata) { |
| 1415 // If no extracted country calling code, use the region supplied instead. |
| 1416 // Note that the national number was already normalized by |
| 1417 // MaybeExtractCountryCode. |
| 1418 country_code = country_metadata->country_code(); |
| 1419 } |
| 1420 if (normalized_national_number.length() < kMinLengthForNsn) { |
| 1421 VLOG(2) << "The string supplied is too short to be a phone number."; |
| 1422 return TOO_SHORT_NSN; |
| 1423 } |
| 1424 if (country_metadata) { |
| 1425 string* carrier_code = keep_raw_input ? |
| 1426 temp_number.mutable_preferred_domestic_carrier_code() : NULL; |
| 1427 MaybeStripNationalPrefixAndCarrierCode(*country_metadata, |
| 1428 &normalized_national_number, |
| 1429 carrier_code); |
| 1430 } |
| 1431 size_t normalized_national_number_length = |
| 1432 normalized_national_number.length(); |
| 1433 if (normalized_national_number_length < kMinLengthForNsn) { |
| 1434 VLOG(2) << "The string supplied is too short to be a phone number."; |
| 1435 return TOO_SHORT_NSN; |
| 1436 } |
| 1437 if (normalized_national_number_length > kMaxLengthForNsn) { |
| 1438 VLOG(2) << "The string supplied is too long to be a phone number."; |
| 1439 return TOO_LONG_NSN; |
| 1440 } |
| 1441 temp_number.set_country_code(country_code); |
| 1442 if (country_metadata && |
| 1443 country_metadata->leading_zero_possible() && |
| 1444 normalized_national_number[0] == '0') { |
| 1445 temp_number.set_italian_leading_zero(true); |
| 1446 } |
| 1447 uint64 number_as_int; |
| 1448 safe_strtou64(normalized_national_number, &number_as_int); |
| 1449 temp_number.set_national_number(number_as_int); |
| 1450 phone_number->MergeFrom(temp_number); |
| 1451 return NO_PARSING_ERROR; |
| 1452 } |
| 1453 |
| 1454 // Attempts to extract a possible number from the string passed in. This |
| 1455 // currently strips all leading characters that could not be used to start a |
| 1456 // phone number. Characters that can be used to start a phone number are |
| 1457 // defined in the valid_start_char_pattern. If none of these characters are |
| 1458 // found in the number passed in, an empty string is returned. This function |
| 1459 // also attempts to strip off any alternative extensions or endings if two or |
| 1460 // more are present, such as in the case of: (530) 583-6985 x302/x2303. The |
| 1461 // second extension here makes this actually two phone numbers, (530) 583-6985 |
| 1462 // x302 and (530) 583-6985 x2303. We remove the second extension so that the |
| 1463 // first number is parsed correctly. |
| 1464 void PhoneNumberUtil::ExtractPossibleNumber(const string& number, |
| 1465 string* extracted_number) const { |
| 1466 DCHECK(extracted_number); |
| 1467 |
| 1468 UnicodeText number_as_unicode; |
| 1469 number_as_unicode.PointToUTF8(number.data(), number.size()); |
| 1470 char current_char[5]; |
| 1471 int len; |
| 1472 UnicodeText::const_iterator it; |
| 1473 for (it = number_as_unicode.begin(); it != number_as_unicode.end(); ++it) { |
| 1474 len = it.get_utf8(current_char); |
| 1475 current_char[len] = '\0'; |
| 1476 if (valid_start_char_pattern->FullMatch(current_char)) { |
| 1477 break; |
| 1478 } |
| 1479 } |
| 1480 |
| 1481 if (it == number_as_unicode.end()) { |
| 1482 // No valid start character was found. extracted_number should be set to |
| 1483 // empty string. |
| 1484 extracted_number->assign(""); |
| 1485 return; |
| 1486 } |
| 1487 |
| 1488 extracted_number->assign( |
| 1489 UnicodeText::UTF8Substring(it, number_as_unicode.end())); |
| 1490 TrimUnwantedEndChars(extracted_number); |
| 1491 if (extracted_number->length() == 0) { |
| 1492 return; |
| 1493 } |
| 1494 |
| 1495 VLOG(3) << "After stripping starting and trailing characters, left with: " |
| 1496 << *extracted_number; |
| 1497 |
| 1498 // Now remove any extra numbers at the end. |
| 1499 capture_up_to_second_number_start_pattern->PartialMatch(*extracted_number, |
| 1500 extracted_number); |
| 1501 } |
| 1502 |
| 1503 bool PhoneNumberUtil::IsPossibleNumber(const PhoneNumber& number) const { |
| 1504 return IsPossibleNumberWithReason(number) == IS_POSSIBLE; |
| 1505 } |
| 1506 |
| 1507 bool PhoneNumberUtil::IsPossibleNumberForString( |
| 1508 const string& number, |
| 1509 const string& region_dialing_from) const { |
| 1510 PhoneNumber number_proto; |
| 1511 if (Parse(number, region_dialing_from, &number_proto) == NO_PARSING_ERROR) { |
| 1512 return IsPossibleNumber(number_proto); |
| 1513 } else { |
| 1514 return false; |
| 1515 } |
| 1516 } |
| 1517 |
| 1518 PhoneNumberUtil::ValidationResult PhoneNumberUtil::IsPossibleNumberWithReason( |
| 1519 const PhoneNumber& number) const { |
| 1520 string national_number; |
| 1521 GetNationalSignificantNumber(number, &national_number); |
| 1522 int country_code = number.country_code(); |
| 1523 // Note: For Russian Fed and NANPA numbers, we just use the rules from the |
| 1524 // default region (US or Russia) since the GetRegionCodeForNumber will not |
| 1525 // work if the number is possible but not valid. This would need to be |
| 1526 // revisited if the possible number pattern ever differed between various |
| 1527 // regions within those plans. |
| 1528 string region_code; |
| 1529 GetRegionCodeForCountryCode(country_code, ®ion_code); |
| 1530 if (!HasValidRegionCode(region_code, country_code, national_number)) { |
| 1531 return INVALID_COUNTRY_CODE; |
| 1532 } |
| 1533 const PhoneNumberDesc& general_num_desc = |
| 1534 GetMetadataForRegion(region_code)->general_desc(); |
| 1535 // Handling case of numbers with no metadata. |
| 1536 if (!general_num_desc.has_national_number_pattern()) { |
| 1537 size_t number_length = national_number.length(); |
| 1538 if (number_length < kMinLengthForNsn) { |
| 1539 return TOO_SHORT; |
| 1540 } else if (number_length > kMaxLengthForNsn) { |
| 1541 return TOO_LONG; |
| 1542 } else { |
| 1543 return IS_POSSIBLE; |
| 1544 } |
| 1545 } |
| 1546 const RegExp& possible_number_pattern = regexp_cache->GetRegExp( |
| 1547 StrCat("(", general_num_desc.possible_number_pattern(), ")")); |
| 1548 return TestNumberLengthAgainstPattern(possible_number_pattern, |
| 1549 national_number); |
| 1550 } |
| 1551 |
| 1552 bool PhoneNumberUtil::TruncateTooLongNumber(PhoneNumber* number) const { |
| 1553 if (IsValidNumber(*number)) { |
| 1554 return true; |
| 1555 } |
| 1556 PhoneNumber number_copy(*number); |
| 1557 uint64 national_number = number->national_number(); |
| 1558 do { |
| 1559 national_number /= 10; |
| 1560 number_copy.set_national_number(national_number); |
| 1561 if (IsPossibleNumberWithReason(number_copy) == TOO_SHORT || |
| 1562 national_number == 0) { |
| 1563 return false; |
| 1564 } |
| 1565 } while (!IsValidNumber(number_copy)); |
| 1566 number->set_national_number(national_number); |
| 1567 return true; |
| 1568 } |
| 1569 |
| 1570 PhoneNumberUtil::PhoneNumberType PhoneNumberUtil::GetNumberType( |
| 1571 const PhoneNumber& number) const { |
| 1572 string region_code; |
| 1573 GetRegionCodeForNumber(number, ®ion_code); |
| 1574 if (!IsValidRegionCode(region_code)) { |
| 1575 return UNKNOWN; |
| 1576 } |
| 1577 string national_significant_number; |
| 1578 GetNationalSignificantNumber(number, &national_significant_number); |
| 1579 return GetNumberTypeHelper(national_significant_number, |
| 1580 *GetMetadataForRegion(region_code)); |
| 1581 } |
| 1582 |
| 1583 bool PhoneNumberUtil::IsValidNumber(const PhoneNumber& number) const { |
| 1584 string region_code; |
| 1585 GetRegionCodeForNumber(number, ®ion_code); |
| 1586 return IsValidRegionCode(region_code) && |
| 1587 IsValidNumberForRegion(number, region_code); |
| 1588 } |
| 1589 |
| 1590 bool PhoneNumberUtil::IsValidNumberForRegion(const PhoneNumber& number, |
| 1591 const string& region_code) const { |
| 1592 if (number.country_code() != GetCountryCodeForRegion(region_code)) { |
| 1593 return false; |
| 1594 } |
| 1595 const PhoneMetadata* metadata = GetMetadataForRegion(region_code); |
| 1596 const PhoneNumberDesc& general_desc = metadata->general_desc(); |
| 1597 string national_number; |
| 1598 GetNationalSignificantNumber(number, &national_number); |
| 1599 |
| 1600 // For regions where we don't have metadata for PhoneNumberDesc, we treat |
| 1601 // any number passed in as a valid number if its national significant number |
| 1602 // is between the minimum and maximum lengths defined by ITU for a national |
| 1603 // significant number. |
| 1604 if (!general_desc.has_national_number_pattern()) { |
| 1605 VLOG(3) << "Validating number with incomplete metadata."; |
| 1606 size_t number_length = national_number.length(); |
| 1607 return number_length > kMinLengthForNsn && |
| 1608 number_length <= kMaxLengthForNsn; |
| 1609 } |
| 1610 return GetNumberTypeHelper(national_number, *metadata) != UNKNOWN; |
| 1611 } |
| 1612 |
| 1613 bool PhoneNumberUtil::IsLeadingZeroPossible(int country_calling_code) const { |
| 1614 string region_code; |
| 1615 GetRegionCodeForCountryCode(country_calling_code, ®ion_code); |
| 1616 const PhoneMetadata* main_metadata_for_calling_code = |
| 1617 GetMetadataForRegion(region_code); |
| 1618 if (!main_metadata_for_calling_code) return false; |
| 1619 return main_metadata_for_calling_code->leading_zero_possible(); |
| 1620 } |
| 1621 |
| 1622 void PhoneNumberUtil::GetNationalSignificantNumber( |
| 1623 const PhoneNumber& number, |
| 1624 string* national_number) const { |
| 1625 // The leading zero in the national (significant) number of an Italian phone |
| 1626 // number has a special meaning. Unlike the rest of the world, it indicates |
| 1627 // the number is a landline number. There have been plans to migrate landline |
| 1628 // numbers to start with the digit two since December 2000, but it has not yet |
| 1629 // happened. |
| 1630 // See http://en.wikipedia.org/wiki/%2B39 for more details. |
| 1631 // Other regions such as Cote d'Ivoire and Gabon use this for their mobile |
| 1632 // numbers. |
| 1633 DCHECK(national_number); |
| 1634 StrAppend(national_number, |
| 1635 (IsLeadingZeroPossible(number.country_code()) && |
| 1636 number.has_italian_leading_zero() && |
| 1637 number.italian_leading_zero()) |
| 1638 ? "0" |
| 1639 : ""); |
| 1640 StrAppend(national_number, number.national_number()); |
| 1641 } |
| 1642 |
| 1643 int PhoneNumberUtil::GetLengthOfGeographicalAreaCode( |
| 1644 const PhoneNumber& number) const { |
| 1645 string region_code; |
| 1646 GetRegionCodeForNumber(number, ®ion_code); |
| 1647 if (!IsValidRegionCode(region_code)) { |
| 1648 return 0; |
| 1649 } |
| 1650 const PhoneMetadata* metadata = GetMetadataForRegion(region_code); |
| 1651 DCHECK(metadata); |
| 1652 if (!metadata->has_national_prefix()) { |
| 1653 return 0; |
| 1654 } |
| 1655 |
| 1656 string national_significant_number; |
| 1657 GetNationalSignificantNumber(number, &national_significant_number); |
| 1658 PhoneNumberType type = GetNumberTypeHelper(national_significant_number, |
| 1659 *metadata); |
| 1660 // Most numbers other than the two types below have to be dialled in full. |
| 1661 if (type != FIXED_LINE && type != FIXED_LINE_OR_MOBILE) { |
| 1662 return 0; |
| 1663 } |
| 1664 |
| 1665 return GetLengthOfNationalDestinationCode(number); |
| 1666 } |
| 1667 |
| 1668 int PhoneNumberUtil::GetLengthOfNationalDestinationCode( |
| 1669 const PhoneNumber& number) const { |
| 1670 PhoneNumber copied_proto(number); |
| 1671 if (number.has_extension()) { |
| 1672 // Clear the extension so it's not included when formatting. |
| 1673 copied_proto.clear_extension(); |
| 1674 } |
| 1675 |
| 1676 string formatted_number; |
| 1677 Format(copied_proto, INTERNATIONAL, &formatted_number); |
| 1678 const scoped_ptr<RegExpInput> i18n_number( |
| 1679 RegExpInput::Create(formatted_number)); |
| 1680 string digit_group; |
| 1681 string ndc; |
| 1682 string third_group; |
| 1683 for (int i = 0; i < 3; ++i) { |
| 1684 if (!capturing_ascii_digits_pattern->FindAndConsume(i18n_number.get(), |
| 1685 &digit_group)) { |
| 1686 // We should find at least three groups. |
| 1687 return 0; |
| 1688 } |
| 1689 if (i == 1) { |
| 1690 ndc = digit_group; |
| 1691 } else if (i == 2) { |
| 1692 third_group = digit_group; |
| 1693 } |
| 1694 } |
| 1695 string region_code; |
| 1696 GetRegionCodeForNumber(number, ®ion_code); |
| 1697 if (region_code == "AR" && |
| 1698 GetNumberType(number) == MOBILE) { |
| 1699 // Argentinian mobile numbers, when formatted in the international format, |
| 1700 // are in the form of +54 9 NDC XXXX.... As a result, we take the length of |
| 1701 // the third group (NDC) and add 1 for the digit 9, which also forms part of |
| 1702 // the national significant number. |
| 1703 return third_group.size() + 1; |
| 1704 } |
| 1705 return ndc.size(); |
| 1706 } |
| 1707 |
| 1708 // static |
| 1709 void PhoneNumberUtil::NormalizeDigitsOnly(string* number) { |
| 1710 DCHECK(number); |
| 1711 static const scoped_ptr<const RegExp> non_digits_pattern(RegExp::Create( |
| 1712 StrCat("[^", kDigits, "]"))); |
| 1713 // Delete everything that isn't valid digits. |
| 1714 non_digits_pattern->GlobalReplace(number, ""); |
| 1715 // Normalize all decimal digits to ASCII digits. |
| 1716 number->assign(NormalizeUTF8::NormalizeDecimalDigits(*number)); |
| 1717 } |
| 1718 |
| 1719 bool PhoneNumberUtil::IsAlphaNumber(const string& number) const { |
| 1720 if (!IsViablePhoneNumber(number)) { |
| 1721 // Number is too short, or doesn't match the basic phone number pattern. |
| 1722 return false; |
| 1723 } |
| 1724 // Copy the number, since we are going to try and strip the extension from it. |
| 1725 string number_copy(number); |
| 1726 string extension; |
| 1727 MaybeStripExtension(&number_copy, &extension); |
| 1728 return valid_alpha_phone_pattern->FullMatch(number_copy); |
| 1729 } |
| 1730 |
| 1731 void PhoneNumberUtil::ConvertAlphaCharactersInNumber(string* number) const { |
| 1732 DCHECK(number); |
| 1733 NormalizeHelper(*alpha_phone_mappings, false, number); |
| 1734 } |
| 1735 |
| 1736 // Normalizes a string of characters representing a phone number. This performs |
| 1737 // the following conversions: |
| 1738 // - Punctuation is stripped. |
| 1739 // For ALPHA/VANITY numbers: |
| 1740 // - Letters are converted to their numeric representation on a telephone |
| 1741 // keypad. The keypad used here is the one defined in ITU Recommendation |
| 1742 // E.161. This is only done if there are 3 or more letters in the number, to |
| 1743 // lessen the risk that such letters are typos. |
| 1744 // For other numbers: |
| 1745 // - Wide-ascii digits are converted to normal ASCII (European) digits. |
| 1746 // - Arabic-Indic numerals are converted to European numerals. |
| 1747 // - Spurious alpha characters are stripped. |
| 1748 void PhoneNumberUtil::Normalize(string* number) const { |
| 1749 DCHECK(number); |
| 1750 if (valid_alpha_phone_pattern->PartialMatch(*number)) { |
| 1751 NormalizeHelper(*alpha_phone_mappings, true, number); |
| 1752 } |
| 1753 NormalizeDigitsOnly(number); |
| 1754 } |
| 1755 |
| 1756 // Checks to see if the string of characters could possibly be a phone number at |
| 1757 // all. At the moment, checks to see that the string begins with at least 3 |
| 1758 // digits, ignoring any punctuation commonly found in phone numbers. This |
| 1759 // method does not require the number to be normalized in advance - but does |
| 1760 // assume that leading non-number symbols have been removed, such as by the |
| 1761 // method ExtractPossibleNumber. |
| 1762 bool PhoneNumberUtil::IsViablePhoneNumber(const string& number) const { |
| 1763 if (number.length() < kMinLengthForNsn) { |
| 1764 VLOG(2) << "Number too short to be viable:" << number; |
| 1765 return false; |
| 1766 } |
| 1767 return valid_phone_number_pattern->FullMatch(number); |
| 1768 } |
| 1769 |
| 1770 // Strips any international prefix (such as +, 00, 011) present in the number |
| 1771 // provided, normalizes the resulting number, and indicates if an international |
| 1772 // prefix was present. |
| 1773 // |
| 1774 // possible_idd_prefix represents the international direct dialing prefix from |
| 1775 // the region we think this number may be dialed in. |
| 1776 // Returns true if an international dialing prefix could be removed from the |
| 1777 // number, otherwise false if the number did not seem to be in international |
| 1778 // format. |
| 1779 PhoneNumber::CountryCodeSource |
| 1780 PhoneNumberUtil::MaybeStripInternationalPrefixAndNormalize( |
| 1781 const string& possible_idd_prefix, |
| 1782 string* number) const { |
| 1783 DCHECK(number); |
| 1784 if (number->empty()) { |
| 1785 return PhoneNumber::FROM_DEFAULT_COUNTRY; |
| 1786 } |
| 1787 const scoped_ptr<RegExpInput> number_string_piece( |
| 1788 RegExpInput::Create(*number)); |
| 1789 if (plus_chars_pattern->Consume(number_string_piece.get())) { |
| 1790 number->assign(number_string_piece->ToString()); |
| 1791 // Can now normalize the rest of the number since we've consumed the "+" |
| 1792 // sign at the start. |
| 1793 Normalize(number); |
| 1794 return PhoneNumber::FROM_NUMBER_WITH_PLUS_SIGN; |
| 1795 } |
| 1796 // Attempt to parse the first digits as an international prefix. |
| 1797 const RegExp& idd_pattern = regexp_cache->GetRegExp(possible_idd_prefix); |
| 1798 if (ParsePrefixAsIdd(idd_pattern, number)) { |
| 1799 Normalize(number); |
| 1800 return PhoneNumber::FROM_NUMBER_WITH_IDD; |
| 1801 } |
| 1802 // If still not found, then try and normalize the number and then try again. |
| 1803 // This shouldn't be done before, since non-numeric characters (+ and ~) may |
| 1804 // legally be in the international prefix. |
| 1805 Normalize(number); |
| 1806 return ParsePrefixAsIdd(idd_pattern, number) |
| 1807 ? PhoneNumber::FROM_NUMBER_WITH_IDD |
| 1808 : PhoneNumber::FROM_DEFAULT_COUNTRY; |
| 1809 } |
| 1810 |
| 1811 // Strips any national prefix (such as 0, 1) present in the number provided. |
| 1812 // The number passed in should be the normalized telephone number that we wish |
| 1813 // to strip any national dialing prefix from. The metadata should be for the |
| 1814 // region that we think this number is from. |
| 1815 void PhoneNumberUtil::MaybeStripNationalPrefixAndCarrierCode( |
| 1816 const PhoneMetadata& metadata, |
| 1817 string* number, |
| 1818 string* carrier_code) const { |
| 1819 DCHECK(number); |
| 1820 string carrier_code_temp; |
| 1821 const string& possible_national_prefix = |
| 1822 metadata.national_prefix_for_parsing(); |
| 1823 if (number->empty() || possible_national_prefix.empty()) { |
| 1824 // Early return for numbers of zero length or with no national prefix |
| 1825 // possible. |
| 1826 return; |
| 1827 } |
| 1828 // We use two copies here since Consume modifies the phone number, and if the |
| 1829 // first if-clause fails the number will already be changed. |
| 1830 const scoped_ptr<RegExpInput> number_copy(RegExpInput::Create(*number)); |
| 1831 const scoped_ptr<RegExpInput> number_copy_without_transform( |
| 1832 RegExpInput::Create(*number)); |
| 1833 string number_string_copy(*number); |
| 1834 string captured_part_of_prefix; |
| 1835 const RegExp& national_number_rule = regexp_cache->GetRegExp( |
| 1836 metadata.general_desc().national_number_pattern()); |
| 1837 // Attempt to parse the first digits as a national prefix. We make a |
| 1838 // copy so that we can revert to the original string if necessary. |
| 1839 const string& transform_rule = metadata.national_prefix_transform_rule(); |
| 1840 const RegExp& possible_national_prefix_pattern = |
| 1841 regexp_cache->GetRegExp(possible_national_prefix); |
| 1842 if (!transform_rule.empty() && |
| 1843 (possible_national_prefix_pattern.Consume( |
| 1844 number_copy.get(), &carrier_code_temp, &captured_part_of_prefix) || |
| 1845 possible_national_prefix_pattern.Consume( |
| 1846 number_copy.get(), &captured_part_of_prefix)) && |
| 1847 !captured_part_of_prefix.empty()) { |
| 1848 // If this succeeded, then we must have had a transform rule and there must |
| 1849 // have been some part of the prefix that we captured. |
| 1850 // We make the transformation and check that the resultant number is viable. |
| 1851 // If so, replace the number and return. |
| 1852 possible_national_prefix_pattern.Replace(&number_string_copy, |
| 1853 transform_rule); |
| 1854 if (national_number_rule.FullMatch(number_string_copy)) { |
| 1855 number->assign(number_string_copy); |
| 1856 if (carrier_code) { |
| 1857 carrier_code->assign(carrier_code_temp); |
| 1858 } |
| 1859 } |
| 1860 } else if (possible_national_prefix_pattern.Consume( |
| 1861 number_copy_without_transform.get(), &carrier_code_temp) || |
| 1862 possible_national_prefix_pattern.Consume( |
| 1863 number_copy_without_transform.get())) { |
| 1864 VLOG(4) << "Parsed the first digits as a national prefix."; |
| 1865 // If captured_part_of_prefix is empty, this implies nothing was captured by |
| 1866 // the capturing groups in possible_national_prefix; therefore, no |
| 1867 // transformation is necessary, and we just remove the national prefix. |
| 1868 const string number_copy_as_string = |
| 1869 number_copy_without_transform->ToString(); |
| 1870 if (national_number_rule.FullMatch(number_copy_as_string)) { |
| 1871 number->assign(number_copy_as_string); |
| 1872 if (carrier_code) { |
| 1873 carrier_code->assign(carrier_code_temp); |
| 1874 } |
| 1875 } |
| 1876 } else { |
| 1877 VLOG(4) << "The first digits did not match the national prefix."; |
| 1878 } |
| 1879 } |
| 1880 |
| 1881 // Strips any extension (as in, the part of the number dialled after the call is |
| 1882 // connected, usually indicated with extn, ext, x or similar) from the end of |
| 1883 // the number, and returns it. The number passed in should be non-normalized. |
| 1884 bool PhoneNumberUtil::MaybeStripExtension(string* number, string* extension) |
| 1885 const { |
| 1886 DCHECK(number); |
| 1887 DCHECK(extension); |
| 1888 // There are three extension capturing groups in the regular expression. |
| 1889 string possible_extension_one; |
| 1890 string possible_extension_two; |
| 1891 string possible_extension_three; |
| 1892 string number_copy(*number); |
| 1893 const scoped_ptr<RegExpInput> number_copy_as_regexp_input( |
| 1894 RegExpInput::Create(number_copy)); |
| 1895 if (extn_pattern->Consume(number_copy_as_regexp_input.get(), |
| 1896 false, |
| 1897 &possible_extension_one, |
| 1898 &possible_extension_two, |
| 1899 &possible_extension_three)) { |
| 1900 // Replace the extensions in the original string here. |
| 1901 extn_pattern->Replace(&number_copy, ""); |
| 1902 VLOG(4) << "Found an extension. Possible extension one: " |
| 1903 << possible_extension_one |
| 1904 << ". Possible extension two: " << possible_extension_two |
| 1905 << ". Possible extension three: " << possible_extension_three |
| 1906 << ". Remaining number: " << number_copy; |
| 1907 // If we find a potential extension, and the number preceding this is a |
| 1908 // viable number, we assume it is an extension. |
| 1909 if ((!possible_extension_one.empty() || !possible_extension_two.empty() || |
| 1910 !possible_extension_three.empty()) && |
| 1911 IsViablePhoneNumber(number_copy)) { |
| 1912 number->assign(number_copy); |
| 1913 if (!possible_extension_one.empty()) { |
| 1914 extension->assign(possible_extension_one); |
| 1915 } else if (!possible_extension_two.empty()) { |
| 1916 extension->assign(possible_extension_two); |
| 1917 } else if (!possible_extension_three.empty()) { |
| 1918 extension->assign(possible_extension_three); |
| 1919 } |
| 1920 return true; |
| 1921 } |
| 1922 } |
| 1923 return false; |
| 1924 } |
| 1925 |
| 1926 // Extracts country calling code from national_number, and returns it. It |
| 1927 // assumes that the leading plus sign or IDD has already been removed. Returns 0 |
| 1928 // if national_number doesn't start with a valid country calling code, and |
| 1929 // leaves national_number unmodified. Assumes the national_number is at least 3 |
| 1930 // characters long. |
| 1931 int PhoneNumberUtil::ExtractCountryCode(string* national_number) const { |
| 1932 int potential_country_code; |
| 1933 for (size_t i = 1; i <= kMaxLengthCountryCode; ++i) { |
| 1934 safe_strto32(national_number->substr(0, i), &potential_country_code); |
| 1935 string region_code; |
| 1936 GetRegionCodeForCountryCode(potential_country_code, ®ion_code); |
| 1937 if (region_code != RegionCode::GetUnknown()) { |
| 1938 national_number->erase(0, i); |
| 1939 return potential_country_code; |
| 1940 } |
| 1941 } |
| 1942 return 0; |
| 1943 } |
| 1944 |
| 1945 // Tries to extract a country calling code from a number. Country calling codes |
| 1946 // are extracted in the following ways: |
| 1947 // - by stripping the international dialing prefix of the region the person |
| 1948 // is dialing from, if this is present in the number, and looking at the next |
| 1949 // digits |
| 1950 // - by stripping the '+' sign if present and then looking at the next digits |
| 1951 // - by comparing the start of the number and the country calling code of the |
| 1952 // default region. If the number is not considered possible for the numbering |
| 1953 // plan of the default region initially, but starts with the country calling |
| 1954 // code of this region, validation will be reattempted after stripping this |
| 1955 // country calling code. If this number is considered a possible number, then |
| 1956 // the first digits will be considered the country calling code and removed as |
| 1957 // such. |
| 1958 // |
| 1959 // Returns NO_PARSING_ERROR if a country calling code was successfully |
| 1960 // extracted or none was present, or the appropriate error otherwise, such as |
| 1961 // if a + was present but it was not followed by a valid country calling code. |
| 1962 // If NO_PARSING_ERROR is returned, the national_number without the country |
| 1963 // calling code is populated, and the country_code passed in is set to the |
| 1964 // country calling code if found, otherwise to 0. |
| 1965 PhoneNumberUtil::ErrorType PhoneNumberUtil::MaybeExtractCountryCode( |
| 1966 const PhoneMetadata* default_region_metadata, |
| 1967 bool keep_raw_input, |
| 1968 string* national_number, |
| 1969 PhoneNumber* phone_number) const { |
| 1970 DCHECK(national_number); |
| 1971 DCHECK(phone_number); |
| 1972 // Set the default prefix to be something that will never match if there is no |
| 1973 // default region. |
| 1974 string possible_country_idd_prefix = default_region_metadata |
| 1975 ? default_region_metadata->international_prefix() |
| 1976 : "NonMatch"; |
| 1977 PhoneNumber::CountryCodeSource country_code_source = |
| 1978 MaybeStripInternationalPrefixAndNormalize(possible_country_idd_prefix, |
| 1979 national_number); |
| 1980 if (keep_raw_input) { |
| 1981 phone_number->set_country_code_source(country_code_source); |
| 1982 } |
| 1983 if (country_code_source != PhoneNumber::FROM_DEFAULT_COUNTRY) { |
| 1984 if (national_number->length() < kMinLengthForNsn) { |
| 1985 VLOG(2) << "Phone number had an IDD, but after this was not " |
| 1986 << "long enough to be a viable phone number."; |
| 1987 return TOO_SHORT_AFTER_IDD; |
| 1988 } |
| 1989 int potential_country_code = ExtractCountryCode(national_number); |
| 1990 if (potential_country_code != 0) { |
| 1991 phone_number->set_country_code(potential_country_code); |
| 1992 return NO_PARSING_ERROR; |
| 1993 } |
| 1994 // If this fails, they must be using a strange country calling code that we |
| 1995 // don't recognize, or that doesn't exist. |
| 1996 return INVALID_COUNTRY_CODE_ERROR; |
| 1997 } else if (default_region_metadata) { |
| 1998 // Check to see if the number starts with the country calling code for the |
| 1999 // default region. If so, we remove the country calling code, and do some |
| 2000 // checks on the validity of the number before and after. |
| 2001 int default_country_code = default_region_metadata->country_code(); |
| 2002 string default_country_code_string(SimpleItoa(default_country_code)); |
| 2003 VLOG(4) << "Possible country calling code: " << default_country_code_string; |
| 2004 string potential_national_number; |
| 2005 if (TryStripPrefixString(*national_number, |
| 2006 default_country_code_string, |
| 2007 &potential_national_number)) { |
| 2008 const PhoneNumberDesc& general_num_desc = |
| 2009 default_region_metadata->general_desc(); |
| 2010 const RegExp& valid_number_pattern = |
| 2011 regexp_cache->GetRegExp(general_num_desc.national_number_pattern()); |
| 2012 MaybeStripNationalPrefixAndCarrierCode(*default_region_metadata, |
| 2013 &potential_national_number, |
| 2014 NULL); |
| 2015 VLOG(4) << "Number without country calling code prefix: " |
| 2016 << potential_national_number; |
| 2017 const RegExp& possible_number_pattern = regexp_cache->GetRegExp( |
| 2018 StrCat("(", general_num_desc.possible_number_pattern(), ")")); |
| 2019 // If the number was not valid before but is valid now, or if it was too |
| 2020 // long before, we consider the number with the country code stripped to |
| 2021 // be a better result and keep that instead. |
| 2022 if ((!valid_number_pattern.FullMatch(*national_number) && |
| 2023 valid_number_pattern.FullMatch(potential_national_number)) || |
| 2024 TestNumberLengthAgainstPattern(possible_number_pattern, |
| 2025 *national_number) == TOO_LONG) { |
| 2026 national_number->assign(potential_national_number); |
| 2027 if (keep_raw_input) { |
| 2028 phone_number->set_country_code_source( |
| 2029 PhoneNumber::FROM_NUMBER_WITHOUT_PLUS_SIGN); |
| 2030 } |
| 2031 phone_number->set_country_code(default_country_code); |
| 2032 return NO_PARSING_ERROR; |
| 2033 } |
| 2034 } |
| 2035 } |
| 2036 // No country calling code present. Set the country_code to 0. |
| 2037 phone_number->set_country_code(0); |
| 2038 return NO_PARSING_ERROR; |
| 2039 } |
| 2040 |
| 2041 PhoneNumberUtil::MatchType PhoneNumberUtil::IsNumberMatch( |
| 2042 const PhoneNumber& first_number_in, |
| 2043 const PhoneNumber& second_number_in) const { |
| 2044 // Make copies of the phone number so that the numbers passed in are not |
| 2045 // edited. |
| 2046 PhoneNumber first_number(first_number_in); |
| 2047 PhoneNumber second_number(second_number_in); |
| 2048 // First clear raw_input and country_code_source and |
| 2049 // preferred_domestic_carrier_code fields and any empty-string extensions so |
| 2050 // that we can use the proto-buffer equality method. |
| 2051 first_number.clear_raw_input(); |
| 2052 first_number.clear_country_code_source(); |
| 2053 first_number.clear_preferred_domestic_carrier_code(); |
| 2054 second_number.clear_raw_input(); |
| 2055 second_number.clear_country_code_source(); |
| 2056 second_number.clear_preferred_domestic_carrier_code(); |
| 2057 if (first_number.extension().empty()) { |
| 2058 first_number.clear_extension(); |
| 2059 } |
| 2060 if (second_number.extension().empty()) { |
| 2061 second_number.clear_extension(); |
| 2062 } |
| 2063 // Early exit if both had extensions and these are different. |
| 2064 if (first_number.has_extension() && second_number.has_extension() && |
| 2065 first_number.extension() != second_number.extension()) { |
| 2066 return NO_MATCH; |
| 2067 } |
| 2068 int first_number_country_code = first_number.country_code(); |
| 2069 int second_number_country_code = second_number.country_code(); |
| 2070 // Both had country calling code specified. |
| 2071 if (first_number_country_code != 0 && second_number_country_code != 0) { |
| 2072 if (ExactlySameAs(first_number, second_number)) { |
| 2073 return EXACT_MATCH; |
| 2074 } else if (first_number_country_code == second_number_country_code && |
| 2075 IsNationalNumberSuffixOfTheOther(first_number, second_number)) { |
| 2076 // A SHORT_NSN_MATCH occurs if there is a difference because of the |
| 2077 // presence or absence of an 'Italian leading zero', the presence or |
| 2078 // absence of an extension, or one NSN being a shorter variant of the |
| 2079 // other. |
| 2080 return SHORT_NSN_MATCH; |
| 2081 } |
| 2082 // This is not a match. |
| 2083 return NO_MATCH; |
| 2084 } |
| 2085 // Checks cases where one or both country calling codes were not specified. To |
| 2086 // make equality checks easier, we first set the country_code fields to be |
| 2087 // equal. |
| 2088 first_number.set_country_code(second_number_country_code); |
| 2089 // If all else was the same, then this is an NSN_MATCH. |
| 2090 if (ExactlySameAs(first_number, second_number)) { |
| 2091 return NSN_MATCH; |
| 2092 } |
| 2093 if (IsNationalNumberSuffixOfTheOther(first_number, second_number)) { |
| 2094 return SHORT_NSN_MATCH; |
| 2095 } |
| 2096 return NO_MATCH; |
| 2097 } |
| 2098 |
| 2099 PhoneNumberUtil::MatchType PhoneNumberUtil::IsNumberMatchWithTwoStrings( |
| 2100 const string& first_number, |
| 2101 const string& second_number) const { |
| 2102 PhoneNumber first_number_as_proto; |
| 2103 ErrorType error_type = |
| 2104 Parse(first_number, RegionCode::GetUnknown(), &first_number_as_proto); |
| 2105 if (error_type == NO_PARSING_ERROR) { |
| 2106 return IsNumberMatchWithOneString(first_number_as_proto, second_number); |
| 2107 } |
| 2108 if (error_type == INVALID_COUNTRY_CODE_ERROR) { |
| 2109 PhoneNumber second_number_as_proto; |
| 2110 ErrorType error_type = Parse(second_number, RegionCode::GetUnknown(), |
| 2111 &second_number_as_proto); |
| 2112 if (error_type == NO_PARSING_ERROR) { |
| 2113 return IsNumberMatchWithOneString(second_number_as_proto, first_number); |
| 2114 } |
| 2115 if (error_type == INVALID_COUNTRY_CODE_ERROR) { |
| 2116 error_type = ParseHelper(first_number, RegionCode::GetUnknown(), false, |
| 2117 false, &first_number_as_proto); |
| 2118 if (error_type == NO_PARSING_ERROR) { |
| 2119 error_type = ParseHelper(second_number, RegionCode::GetUnknown(), false, |
| 2120 false, &second_number_as_proto); |
| 2121 if (error_type == NO_PARSING_ERROR) { |
| 2122 return IsNumberMatch(first_number_as_proto, second_number_as_proto); |
| 2123 } |
| 2124 } |
| 2125 } |
| 2126 } |
| 2127 // One or more of the phone numbers we are trying to match is not a viable |
| 2128 // phone number. |
| 2129 return INVALID_NUMBER; |
| 2130 } |
| 2131 |
| 2132 PhoneNumberUtil::MatchType PhoneNumberUtil::IsNumberMatchWithOneString( |
| 2133 const PhoneNumber& first_number, |
| 2134 const string& second_number) const { |
| 2135 // First see if the second number has an implicit country calling code, by |
| 2136 // attempting to parse it. |
| 2137 PhoneNumber second_number_as_proto; |
| 2138 ErrorType error_type = |
| 2139 Parse(second_number, RegionCode::GetUnknown(), &second_number_as_proto); |
| 2140 if (error_type == NO_PARSING_ERROR) { |
| 2141 return IsNumberMatch(first_number, second_number_as_proto); |
| 2142 } |
| 2143 if (error_type == INVALID_COUNTRY_CODE_ERROR) { |
| 2144 // The second number has no country calling code. EXACT_MATCH is no longer |
| 2145 // possible. We parse it as if the region was the same as that for the |
| 2146 // first number, and if EXACT_MATCH is returned, we replace this with |
| 2147 // NSN_MATCH. |
| 2148 string first_number_region; |
| 2149 GetRegionCodeForCountryCode(first_number.country_code(), |
| 2150 &first_number_region); |
| 2151 if (first_number_region != RegionCode::GetUnknown()) { |
| 2152 PhoneNumber second_number_with_first_number_region; |
| 2153 Parse(second_number, first_number_region, |
| 2154 &second_number_with_first_number_region); |
| 2155 MatchType match = IsNumberMatch(first_number, |
| 2156 second_number_with_first_number_region); |
| 2157 if (match == EXACT_MATCH) { |
| 2158 return NSN_MATCH; |
| 2159 } |
| 2160 return match; |
| 2161 } else { |
| 2162 // If the first number didn't have a valid country calling code, then we |
| 2163 // parse the second number without one as well. |
| 2164 error_type = ParseHelper(second_number, RegionCode::GetUnknown(), false, |
| 2165 false, &second_number_as_proto); |
| 2166 if (error_type == NO_PARSING_ERROR) { |
| 2167 return IsNumberMatch(first_number, second_number_as_proto); |
| 2168 } |
| 2169 } |
| 2170 } |
| 2171 // One or more of the phone numbers we are trying to match is not a viable |
| 2172 // phone number. |
| 2173 return INVALID_NUMBER; |
| 2174 } |
| 2175 |
| 2176 } // namespace phonenumbers |
| 2177 } // namespace i18n |
OLD | NEW |