OLD | NEW |
(Empty) | |
| 1 Index: regexp_adapter.h |
| 2 =================================================================== |
| 3 --- regexp_adapter.h (revision 0) |
| 4 +++ regexp_adapter.h (revision 0) |
| 5 @@ -0,0 +1,96 @@ |
| 6 +// Copyright (C) 2011 Google Inc. |
| 7 +// |
| 8 +// Licensed under the Apache License, Version 2.0 (the "License"); |
| 9 +// you may not use this file except in compliance with the License. |
| 10 +// You may obtain a copy of the License at |
| 11 +// |
| 12 +// http://www.apache.org/licenses/LICENSE-2.0 |
| 13 +// |
| 14 +// Unless required by applicable law or agreed to in writing, software |
| 15 +// distributed under the License is distributed on an "AS IS" BASIS, |
| 16 +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 17 +// See the License for the specific language governing permissions and |
| 18 +// limitations under the License. |
| 19 + |
| 20 +// Author: George Yakovlev |
| 21 + |
| 22 +#ifndef I18N_PHONENUMBERS_REGEXP_ADAPTER_H_ |
| 23 +#define I18N_PHONENUMBERS_REGEXP_ADAPTER_H_ |
| 24 + |
| 25 +#include <string> |
| 26 + |
| 27 +// Regexp adapter to allow pluggable regexp engine, as it is external to |
| 28 +// libphonenumber. |
| 29 + |
| 30 +namespace reg_exp { |
| 31 + |
| 32 +// The reg exp input class. |
| 33 +// It supports only functions used in phonelibrary. |
| 34 +class RegularExpressionInput { |
| 35 + public: |
| 36 + virtual ~RegularExpressionInput() {}; |
| 37 + |
| 38 + // Matches string to regular expression, returns true if expression was |
| 39 + // matched, false otherwise, advances position in the match. |
| 40 + // |reg_exp| - expression to be matched. |
| 41 + // |beginning_only| - if true match would be successfull only if appears at |
| 42 + // the beginning of the tested region of the string. |
| 43 + // |matched_string1| - successfully matched first string. Can be NULL. |
| 44 + // |matched_string2| - successfully matched second string. Can be NULL. |
| 45 + virtual bool ConsumeRegExp(std::string const& reg_exp, |
| 46 + bool beginning_only, |
| 47 + std::string* matched_string1, |
| 48 + std::string* matched_string2) = 0; |
| 49 + // Convert unmatched input to a string. |
| 50 + virtual std::string ToString() const = 0; |
| 51 +}; |
| 52 + |
| 53 +// The regular expression class. |
| 54 +// It supports only functions used in phonelibrary. |
| 55 +class RegularExpression { |
| 56 + public: |
| 57 + RegularExpression() {} |
| 58 + virtual ~RegularExpression() {} |
| 59 + |
| 60 + // Matches string to regular expression, returns true if expression was |
| 61 + // matched, false otherwise, advances position in the match. |
| 62 + // |input_string| - string to be searched. |
| 63 + // |beginning_only| - if true match would be successfull only if appears at |
| 64 + // the beginning of the tested region of the string. |
| 65 + // |matched_string1| - successfully matched first string. Can be NULL. |
| 66 + // |matched_string2| - successfully matched second string. Can be NULL. |
| 67 + // |matched_string3| - successfully matched third string. Can be NULL. |
| 68 + virtual bool Consume(RegularExpressionInput* input_string, |
| 69 + bool beginning_only, |
| 70 + std::string* matched_string1 = NULL, |
| 71 + std::string* matched_string2 = NULL, |
| 72 + std::string* matched_string3 = NULL) const = 0; |
| 73 + |
| 74 + |
| 75 + // Matches string to regular expression, returns true if expression was |
| 76 + // matched, false otherwise. |
| 77 + // |input_string| - string to be searched. |
| 78 + // |full_match| - if true match would be successfull only if it matches the |
| 79 + // complete string. |
| 80 + // |matched_string| - successfully matched string. Can be NULL. |
| 81 + virtual bool Match(const char* input_string, |
| 82 + bool full_match, |
| 83 + std::string* matched_string) const = 0; |
| 84 + |
| 85 + // Replaces match(es) in the |string_to_process|. if |global| is true, |
| 86 + // replaces all the matches, only the first match otherwise. |
| 87 + // |replacement_string| - text the matches are replaced with. |
| 88 + // Returns true if expression successfully processed through the string, |
| 89 + // even if no actual replacements were made. Returns false in case of an |
| 90 + // error. |
| 91 + virtual bool Replace(std::string* string_to_process, |
| 92 + bool global, |
| 93 + const char* replacement_string) const = 0; |
| 94 +}; |
| 95 + |
| 96 +RegularExpressionInput* CreateRegularExpressionInput(const char* utf8_input); |
| 97 +RegularExpression* CreateRegularExpression(const char* utf8_regexp); |
| 98 + |
| 99 +} // namespace reg_exp |
| 100 + |
| 101 +#endif // I18N_PHONENUMBERS_REGEXP_ADAPTER_H_ |
| 102 |
| 103 Property changes on: regexp_adapter.h |
| 104 ___________________________________________________________________ |
| 105 Added: svn:eol-style |
| 106 + LF |
| 107 |
| 108 Index: regexp_adapter_re2.cc |
| 109 =================================================================== |
| 110 --- regexp_adapter_re2.cc (revision 0) |
| 111 +++ regexp_adapter_re2.cc (revision 0) |
| 112 @@ -0,0 +1,192 @@ |
| 113 +// Copyright (C) 2011 Google Inc. |
| 114 +// |
| 115 +// Licensed under the Apache License, Version 2.0 (the "License"); |
| 116 +// you may not use this file except in compliance with the License. |
| 117 +// You may obtain a copy of the License at |
| 118 +// |
| 119 +// http://www.apache.org/licenses/LICENSE-2.0 |
| 120 +// |
| 121 +// Unless required by applicable law or agreed to in writing, software |
| 122 +// distributed under the License is distributed on an "AS IS" BASIS, |
| 123 +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 124 +// See the License for the specific language governing permissions and |
| 125 +// limitations under the License. |
| 126 + |
| 127 +// Author: George Yakovlev |
| 128 +#include "regexp_adapter.h" |
| 129 + |
| 130 +#include <re2/re2.h> |
| 131 +#include <re2/stringpiece.h> |
| 132 +#include <re2/re2.h> |
| 133 + |
| 134 +namespace { |
| 135 +scoped_ptr<RE2Cache> re2_cache; |
| 136 +} // namespace |
| 137 + |
| 138 +class RE2RegularExpressionInput : public RegularExpressionInput { |
| 139 + public: |
| 140 + RE2RegularExpressionInput(const char* utf8_input); |
| 141 + |
| 142 + virtual bool ConsumeRegExp(std::string const& reg_exp, |
| 143 + bool beginning_only, |
| 144 + std::string* matched_string1, |
| 145 + std::string* matched_string2); |
| 146 + virtual std::string ToString() const; |
| 147 + private: |
| 148 + StringPiece utf8_input_; |
| 149 +}; |
| 150 + |
| 151 + |
| 152 +class RE2RegularExpression : public reg_exp::RegularExpression { |
| 153 + public: |
| 154 + RE2RegularExpression(const char* utf8_regexp); |
| 155 + |
| 156 + virtual bool Consume(reg_exp::RegularExpressionInput* input_string, |
| 157 + bool beginning_only, |
| 158 + std::string* matched_string1, |
| 159 + std::string* matched_string2, |
| 160 + std::string* matched_string3) const; |
| 161 + |
| 162 + virtual bool Match(const char* input_string, |
| 163 + bool full_match, |
| 164 + std::string* matched_string) const; |
| 165 + |
| 166 + virtual bool Replace(std::string* string_to_process, |
| 167 + bool global, |
| 168 + const char* replacement_string) const; |
| 169 + private: |
| 170 + RE2 utf8_regexp_; |
| 171 +}; |
| 172 + |
| 173 +RE2RegularExpressionInput::RE2RegularExpressionInput(const char* utf8_input) |
| 174 + : utf8_input_(utf8_input) { |
| 175 + DCHECK(utf8_input); |
| 176 +} |
| 177 + |
| 178 +bool RE2RegularExpressionInput::ConsumeRegExp(std::string const& reg_exp, |
| 179 + bool beginning_only, |
| 180 + std::string* matched_string1, |
| 181 + std::string* matched_string2) { |
| 182 + if (beginning_only) { |
| 183 + if (matched_string2) |
| 184 + return RE2::Consume(&utf8_input_, |
| 185 + RE2Cache::ScopedAccess(re2_cache.get(), reg_exp), |
| 186 + matched_string1, matched_string2); |
| 187 + else if (matched_string1) |
| 188 + return RE2::Consume(&utf8_input_, |
| 189 + RE2Cache::ScopedAccess(re2_cache.get(), reg_exp), |
| 190 + matched_string1); |
| 191 + else |
| 192 + return RE2::Consume(&utf8_input_, |
| 193 + RE2Cache::ScopedAccess(re2_cache.get(), reg_exp)); |
| 194 + } else { |
| 195 + if (matched_string2) |
| 196 + return RE2::FindAndConsume(&utf8_input_, |
| 197 + RE2Cache::ScopedAccess(re2_cache.get(), |
| 198 + reg_exp), |
| 199 + matched_string1, matched_string2); |
| 200 + else if (matched_string1) |
| 201 + return RE2::FindAndConsume(&utf8_input_, |
| 202 + RE2Cache::ScopedAccess(re2_cache.get(), |
| 203 + reg_exp), |
| 204 + matched_string1); |
| 205 + else |
| 206 + return RE2::FindAndConsume(&utf8_input_, |
| 207 + RE2Cache::ScopedAccess(re2_cache.get(), |
| 208 + reg_exp)); |
| 209 + } |
| 210 +} |
| 211 + |
| 212 +std::string RE2RegularExpressionInput::ToString() const { |
| 213 + utf8_input_.ToString(); |
| 214 +} |
| 215 + |
| 216 +RE2RegularExpression::RE2RegularExpression(const char* utf8_regexp) |
| 217 + : utf8_regexp_(utf8_regexp) { |
| 218 + DCHECK(utf8_regexp); |
| 219 +} |
| 220 + |
| 221 +bool RE2RegularExpression::Consume(RegularExpressionInput* input_string, |
| 222 + bool beginning_only, |
| 223 + std::string* matched_string1, |
| 224 + std::string* matched_string2, |
| 225 + std::string* matched_string3) const { |
| 226 + DCHECK(input_string); |
| 227 + // matched_string1 may be NULL |
| 228 + // matched_string2 may be NULL |
| 229 + if (beginning_only) { |
| 230 + if (matched_string3) { |
| 231 + return RE2::Consume(input_string, utf8_regexp_, |
| 232 + matched_string1, matched_string2, matched_string3); |
| 233 + } else if (matched_string2) { |
| 234 + return RE2::Consume(input_string, utf8_regexp_, |
| 235 + matched_string1, matched_string2); |
| 236 + } else if (matched_string1) { |
| 237 + return RE2::Consume(input_string, utf8_regexp_, matched_string1); |
| 238 + } else { |
| 239 + return RE2::Consume(input_string, utf8_regexp_); |
| 240 + } |
| 241 + } else { |
| 242 + if (matched_string3) { |
| 243 + return RE2::FindAndConsume(input_string, utf8_regexp_, |
| 244 + matched_string1, matched_string2, |
| 245 + matched_string3); |
| 246 + } else if (matched_string2) { |
| 247 + return RE2::FindAndConsume(input_string, utf8_regexp_, |
| 248 + matched_string1, matched_string2); |
| 249 + } else if (matched_string1) { |
| 250 + return RE2::FindAndConsume(input_string, utf8_regexp_, matched_string1); |
| 251 + } else { |
| 252 + return RE2::FindAndConsume(input_string, utf8_regexp_); |
| 253 + } |
| 254 + } |
| 255 +} |
| 256 + |
| 257 +bool RE2RegularExpression::Match(const char* input_string, |
| 258 + bool full_match, |
| 259 + std::string* matched_string) const { |
| 260 + DCHECK(input_string); |
| 261 + // matched_string may be NULL |
| 262 + if (full_match) { |
| 263 + if (matched_string) |
| 264 + return RE2::FullMatch(input_string, matched_string); |
| 265 + else |
| 266 + return RE2::FullMatch(input_string); |
| 267 + } else { |
| 268 + if (matched_string) |
| 269 + return RE2::PartialMatch(input_string, matched_string); |
| 270 + else |
| 271 + return RE2::PartialMatch(input_string); |
| 272 + } |
| 273 +} |
| 274 + |
| 275 +bool RE2RegularExpression::Replace(std::string* string_to_process, |
| 276 + bool global, |
| 277 + const char* replacement_string) const { |
| 278 + DCHECK(string_to_process); |
| 279 + DCHECK(replacement_string); |
| 280 + if (global) { |
| 281 + StringPiece str(replacement_string); |
| 282 + return RE2::GlobalReplace(string_to_process, str); |
| 283 + } else { |
| 284 + return RE2::Replace(string_to_process, replacement_string); |
| 285 + } |
| 286 +} |
| 287 + |
| 288 + |
| 289 +namespace reg_exp { |
| 290 + |
| 291 +RegularExpressionInput* CreateRegularExpressionInput(const char* utf8_input) { |
| 292 + if (!re2_cache.get()) |
| 293 + re2_cache.reset(new RE2Cache(64)); |
| 294 + return new RE2RegularExpressionInput(utf8_input); |
| 295 +} |
| 296 + |
| 297 +RegularExpression* CreateRegularExpression(const char* utf8_regexp) { |
| 298 + if (!re2_cache.get()) |
| 299 + re2_cache.reset(new RE2Cache(64)); |
| 300 + return new RE2RegularExpression(utf8_regexp); |
| 301 +} |
| 302 + |
| 303 +} // namespace reg_exp |
| 304 + |
| 305 |
| 306 Property changes on: regexp_adapter_re2.cc |
| 307 ___________________________________________________________________ |
| 308 Added: svn:eol-style |
| 309 + LF |
| 310 |
| 311 Index: phonenumberutil_test.cc |
| 312 =================================================================== |
| 313 --- phonenumberutil_test.cc (revision 186) |
| 314 +++ phonenumberutil_test.cc (working copy) |
| 315 @@ -21,12 +21,12 @@ |
| 316 #include <string> |
| 317 |
| 318 #include <gtest/gtest.h> |
| 319 -#include <re2/re2.h> |
| 320 |
| 321 #include "phonemetadata.pb.h" |
| 322 #include "phonenumber.h" |
| 323 #include "phonenumber.pb.h" |
| 324 #include "phonenumberutil.h" |
| 325 +#include "regexp_adapter.h" |
| 326 #include "test_metadata.h" |
| 327 |
| 328 namespace i18n { |
| 329 Index: phonenumberutil.cc |
| 330 =================================================================== |
| 331 --- phonenumberutil.cc (revision 186) |
| 332 +++ phonenumberutil.cc (working copy) |
| 333 @@ -25,8 +25,6 @@ |
| 334 #include <vector> |
| 335 |
| 336 #include <google/protobuf/message_lite.h> |
| 337 -#include <re2/re2.h> |
| 338 -#include <re2/stringpiece.h> |
| 339 #include <unicode/errorcode.h> |
| 340 #include <unicode/translit.h> |
| 341 |
| 342 @@ -38,7 +36,7 @@ |
| 343 #include "phonemetadata.pb.h" |
| 344 #include "phonenumber.h" |
| 345 #include "phonenumber.pb.h" |
| 346 -#include "re2_cache.h" |
| 347 +#include "regexp_adapter.h" |
| 348 #include "stringutil.h" |
| 349 #include "utf/unicodetext.h" |
| 350 #include "utf/utf.h" |
| 351 @@ -54,14 +52,11 @@ |
| 352 using std::stringstream; |
| 353 |
| 354 using google::protobuf::RepeatedPtrField; |
| 355 -using re2::StringPiece; |
| 356 |
| 357 namespace { |
| 358 |
| 359 scoped_ptr<LoggerAdapter> logger; |
| 360 |
| 361 -scoped_ptr<RE2Cache> re2_cache; |
| 362 - |
| 363 // These objects are created in the function InitializeStaticMapsAndSets. |
| 364 |
| 365 // These mappings map a character (key) to a specific digit that should replace |
| 366 @@ -78,7 +73,7 @@ |
| 367 const char kPlusSign[] = "+"; |
| 368 |
| 369 const char kPlusChars[] = "++"; |
| 370 -scoped_ptr<const RE2> plus_chars_pattern; |
| 371 +scoped_ptr<const reg_exp::RegularExpression> plus_chars_pattern; |
| 372 |
| 373 const char kRfc3966ExtnPrefix[] = ";ext="; |
| 374 |
| 375 @@ -89,7 +84,7 @@ |
| 376 // prefixes in a region, they will be represented as a regex string that always |
| 377 // contains character(s) other than ASCII digits. |
| 378 // Note this regex also includes tilde, which signals waiting for the tone. |
| 379 -scoped_ptr<const RE2> unique_international_prefix; |
| 380 +scoped_ptr<const reg_exp::RegularExpression> unique_international_prefix; |
| 381 |
| 382 // Digits accepted in phone numbers. |
| 383 // Both Arabic-Indic and Eastern Arabic-Indic are supported. |
| 384 @@ -97,8 +92,8 @@ |
| 385 // We accept alpha characters in phone numbers, ASCII only. We store lower-case |
| 386 // here only since our regular expressions are case-insensitive. |
| 387 const char kValidAlpha[] = "a-z"; |
| 388 -scoped_ptr<const RE2> capturing_digit_pattern; |
| 389 -scoped_ptr<const RE2> capturing_ascii_digits_pattern; |
| 390 +scoped_ptr<const reg_exp::RegularExpression> capturing_digit_pattern; |
| 391 +scoped_ptr<const reg_exp::RegularExpression> capturing_ascii_digits_pattern; |
| 392 |
| 393 // Regular expression of acceptable characters that may start a phone number |
| 394 // for the purposes of parsing. This allows us to strip away meaningless |
| 395 @@ -110,7 +105,7 @@ |
| 396 // a number. The string starting with this valid character is captured. |
| 397 // This corresponds to VALID_START_CHAR in the java version. |
| 398 scoped_ptr<const string> valid_start_char; |
| 399 -scoped_ptr<const RE2> valid_start_char_pattern; |
| 400 +scoped_ptr<const reg_exp::RegularExpression> valid_start_char_pattern; |
| 401 |
| 402 // Regular expression of characters typically used to start a second phone |
| 403 // number for the purposes of parsing. This allows us to strip off parts of |
| 404 @@ -121,7 +116,8 @@ |
| 405 // preceding this is captured. |
| 406 // This corresponds to SECOND_NUMBER_START in the java version. |
| 407 const char kCaptureUpToSecondNumberStart[] = "(.*)[\\\\/] *x"; |
| 408 -scoped_ptr<const RE2> capture_up_to_second_number_start_pattern; |
| 409 +scoped_ptr<const reg_exp::RegularExpression> |
| 410 + capture_up_to_second_number_start_pattern; |
| 411 |
| 412 // Regular expression of trailing characters that we want to remove. We remove |
| 413 // all characters that are not alpha or numerical characters. The hash |
| 414 @@ -130,7 +126,7 @@ |
| 415 // number if this was a match. |
| 416 // This corresponds to UNWANTED_END_CHARS in the java version. |
| 417 const char kUnwantedEndChar[] = "[^\\p{N}\\p{L}#]"; |
| 418 -scoped_ptr<const RE2> unwanted_end_char_pattern; |
| 419 +scoped_ptr<const reg_exp::RegularExpression> unwanted_end_char_pattern; |
| 420 |
| 421 // Regular expression of acceptable punctuation found in phone numbers. This |
| 422 // excludes punctuation found as a leading character only. This consists of |
| 423 @@ -177,20 +173,20 @@ |
| 424 scoped_ptr<const string> known_extn_patterns; |
| 425 // Regexp of all known extension prefixes used by different regions followed |
| 426 // by 1 or more valid digits, for use when parsing. |
| 427 -scoped_ptr<const RE2> extn_pattern; |
| 428 +scoped_ptr<const reg_exp::RegularExpression> extn_pattern; |
| 429 |
| 430 // We append optionally the extension pattern to the end here, as a valid phone |
| 431 // number may have an extension prefix appended, followed by 1 or more digits. |
| 432 -scoped_ptr<const RE2> valid_phone_number_pattern; |
| 433 +scoped_ptr<const reg_exp::RegularExpression> valid_phone_number_pattern; |
| 434 |
| 435 // We use this pattern to check if the phone number has at least three letters |
| 436 // in it - if so, then we treat it as a number where some phone-number digits |
| 437 // are represented by letters. |
| 438 -scoped_ptr<const RE2> valid_alpha_phone_pattern; |
| 439 +scoped_ptr<const reg_exp::RegularExpression> valid_alpha_phone_pattern; |
| 440 |
| 441 -scoped_ptr<const RE2> first_group_capturing_pattern; |
| 442 +scoped_ptr<const reg_exp::RegularExpression> first_group_capturing_pattern; |
| 443 |
| 444 -scoped_ptr<const RE2> carrier_code_pattern; |
| 445 +scoped_ptr<const reg_exp::RegularExpression> carrier_code_pattern; |
| 446 |
| 447 void TransformRegularExpressionToRE2Syntax(string* regex) { |
| 448 DCHECK(regex); |
| 449 @@ -280,18 +276,19 @@ |
| 450 it = available_formats.begin(); it != available_formats.end(); ++it) { |
| 451 int size = it->leading_digits_pattern_size(); |
| 452 if (size > 0) { |
| 453 - StringPiece number_copy(number_for_leading_digits_match); |
| 454 + scoped_ptr<reg_exp::RegularExpressionInput> |
| 455 + number_copy(reg_exp::CreateRegularExpressionInput( |
| 456 + number_for_leading_digits_match.c_str())); |
| 457 // We always use the last leading_digits_pattern, as it is the most |
| 458 // detailed. |
| 459 - if (!RE2::Consume(&number_copy, |
| 460 - RE2Cache::ScopedAccess( |
| 461 - re2_cache.get(), |
| 462 - it->leading_digits_pattern(size - 1)))) { |
| 463 + if (!number_copy->ConsumeRegExp(it->leading_digits_pattern(size - 1), |
| 464 + true, NULL, NULL)) { |
| 465 continue; |
| 466 } |
| 467 } |
| 468 - RE2Cache::ScopedAccess pattern_to_match(re2_cache.get(), it->pattern()); |
| 469 - if (RE2::FullMatch(national_number, pattern_to_match)) { |
| 470 + scoped_ptr<reg_exp::RegularExpression> pattern_to_match( |
| 471 + reg_exp::CreateRegularExpression(it->pattern().c_str())); |
| 472 + if (pattern_to_match->Match(national_number.c_str(), true, NULL)) { |
| 473 string formatting_pattern(it->format()); |
| 474 if (number_format == PhoneNumberUtil::NATIONAL && |
| 475 carrier_code.length() > 0 && |
| 476 @@ -299,11 +296,12 @@ |
| 477 // Replace the $CC in the formatting rule with the desired carrier code
. |
| 478 string carrier_code_formatting_rule = |
| 479 it->domestic_carrier_code_formatting_rule(); |
| 480 - RE2::Replace(&carrier_code_formatting_rule, *carrier_code_pattern, |
| 481 - carrier_code); |
| 482 + carrier_code_pattern->Replace(&carrier_code_formatting_rule, |
| 483 + false, carrier_code.c_str()); |
| 484 TransformRegularExpressionToRE2Syntax(&carrier_code_formatting_rule); |
| 485 - RE2::Replace(&formatting_pattern, *first_group_capturing_pattern, |
| 486 - carrier_code_formatting_rule); |
| 487 + first_group_capturing_pattern->Replace(&formatting_pattern, |
| 488 + false, |
| 489 + carrier_code_formatting_rule.c_str()); |
| 490 } else { |
| 491 // Use the national prefix formatting rule instead. |
| 492 string national_prefix_formatting_rule = |
| 493 @@ -315,14 +313,15 @@ |
| 494 // should be formatted at this point. |
| 495 TransformRegularExpressionToRE2Syntax( |
| 496 &national_prefix_formatting_rule); |
| 497 - RE2::Replace(&formatting_pattern, *first_group_capturing_pattern, |
| 498 - national_prefix_formatting_rule); |
| 499 + first_group_capturing_pattern->Replace(&formatting_pattern, |
| 500 + false, |
| 501 + national_prefix_formatting_rule.c_str()); |
| 502 } |
| 503 } |
| 504 TransformRegularExpressionToRE2Syntax(&formatting_pattern); |
| 505 formatted_number->assign(national_number); |
| 506 - RE2::GlobalReplace(formatted_number, pattern_to_match, |
| 507 - formatting_pattern); |
| 508 + pattern_to_match->Replace(formatted_number, true, |
| 509 + formatting_pattern.c_str()); |
| 510 return; |
| 511 } |
| 512 } |
| 513 @@ -361,12 +360,14 @@ |
| 514 |
| 515 bool IsNumberMatchingDesc(const string& national_number, |
| 516 const PhoneNumberDesc& number_desc) { |
| 517 - return (RE2::FullMatch(national_number, |
| 518 - RE2Cache::ScopedAccess(re2_cache.get(), |
| 519 - number_desc.possible_number_pattern())) && |
| 520 - RE2::FullMatch(national_number, |
| 521 - RE2Cache::ScopedAccess(re2_cache.get(), |
| 522 - number_desc.national_number_pattern()))); |
| 523 + scoped_ptr<const reg_exp::RegularExpression> |
| 524 + possible_pattern(reg_exp::CreateRegularExpression( |
| 525 + number_desc.possible_number_pattern().c_str())); |
| 526 + scoped_ptr<const reg_exp::RegularExpression> |
| 527 + national_pattern(reg_exp::CreateRegularExpression( |
| 528 + number_desc.national_number_pattern().c_str())); |
| 529 + return (possible_pattern->Match(national_number.c_str(), true, NULL) && |
| 530 + national_pattern->Match(national_number.c_str(), true, NULL)); |
| 531 } |
| 532 |
| 533 PhoneNumberUtil::PhoneNumberType GetNumberTypeHelper( |
| 534 @@ -452,18 +453,25 @@ |
| 535 // Initialisation helper function used to populate the regular expressions in a |
| 536 // defined order. |
| 537 void CreateRegularExpressions() { |
| 538 - unique_international_prefix.reset(new RE2("[\\d]+(?:[~⁓∼~][\\d]+)?")); |
| 539 - first_group_capturing_pattern.reset(new RE2("(\\$1)")); |
| 540 - carrier_code_pattern.reset(new RE2("\\$CC")); |
| 541 - capturing_digit_pattern.reset(new RE2(StrCat("([", kValidDigits, "])"))); |
| 542 - capturing_ascii_digits_pattern.reset(new RE2("(\\d+)")); |
| 543 + unique_international_prefix.reset( |
| 544 + reg_exp::CreateRegularExpression("[\\d]+(?:[~⁓∼~][\\d]+)?")); |
| 545 + first_group_capturing_pattern.reset( |
| 546 + reg_exp::CreateRegularExpression("(\\$1)")); |
| 547 + carrier_code_pattern.reset( |
| 548 + reg_exp::CreateRegularExpression("\\$CC")); |
| 549 + capturing_digit_pattern.reset( |
| 550 + reg_exp::CreateRegularExpression( |
| 551 + StrCat("([", kValidDigits, "])").c_str())); |
| 552 + capturing_ascii_digits_pattern.reset( |
| 553 + reg_exp::CreateRegularExpression("(\\d+)")); |
| 554 valid_start_char.reset(new string(StrCat( |
| 555 "[", kPlusChars, kValidDigits, "]"))); |
| 556 - valid_start_char_pattern.reset(new RE2(*valid_start_char)); |
| 557 - capture_up_to_second_number_start_pattern.reset(new RE2( |
| 558 - kCaptureUpToSecondNumberStart)); |
| 559 - unwanted_end_char_pattern.reset(new RE2( |
| 560 - kUnwantedEndChar)); |
| 561 + valid_start_char_pattern.reset( |
| 562 + reg_exp::CreateRegularExpression(valid_start_char->c_str())); |
| 563 + capture_up_to_second_number_start_pattern.reset( |
| 564 + reg_exp::CreateRegularExpression(kCaptureUpToSecondNumberStart)); |
| 565 + unwanted_end_char_pattern.reset( |
| 566 + reg_exp::CreateRegularExpression(kUnwantedEndChar)); |
| 567 valid_phone_number.reset(new string( |
| 568 StrCat("[", kPlusChars, "]*(?:[", kValidPunctuation, "]*[", kValidDigits, |
| 569 "]){3,}[", kValidAlpha, kValidPunctuation, kValidDigits, "]*"))); |
| 570 @@ -479,17 +487,19 @@ |
| 571 "int|int|anexo)" |
| 572 "[:\\..]?[ \\t,-]*", capturing_extn_digits, "#?|" |
| 573 "[- ]+([", kValidDigits, "]{1,5})#"))); |
| 574 - extn_pattern.reset(new RE2(StrCat("(?i)(?:", *known_extn_patterns, ")$"))); |
| 575 - valid_phone_number_pattern.reset(new RE2( |
| 576 - StrCat("(?i)", *valid_phone_number, "(?:", *known_extn_patterns, ")?"))); |
| 577 - valid_alpha_phone_pattern.reset(new RE2( |
| 578 - StrCat("(?i)(?:.*?[", kValidAlpha, "]){3}"))); |
| 579 - plus_chars_pattern.reset(new RE2(StrCat("[", kPlusChars, "]+"))); |
| 580 + extn_pattern.reset(reg_exp::CreateRegularExpression( |
| 581 + StrCat("(?i)(?:", *known_extn_patterns, ")$").c_str())); |
| 582 + valid_phone_number_pattern.reset(reg_exp::CreateRegularExpression( |
| 583 + StrCat("(?i)", *valid_phone_number, "(?:", *known_extn_patterns, |
| 584 + ")?").c_str())); |
| 585 + valid_alpha_phone_pattern.reset(reg_exp::CreateRegularExpression( |
| 586 + StrCat("(?i)(?:.*?[", kValidAlpha, "]){3}").c_str())); |
| 587 + plus_chars_pattern.reset(reg_exp::CreateRegularExpression( |
| 588 + StrCat("[", kPlusChars, "]+").c_str())); |
| 589 } |
| 590 |
| 591 void InitializeStaticMapsAndSets() { |
| 592 // Create global objects. |
| 593 - re2_cache.reset(new RE2Cache(64)); |
| 594 all_plus_number_grouping_symbols.reset(new map<char32, char>); |
| 595 alpha_mappings.reset(new map<char32, char>); |
| 596 all_normalization_mappings.reset(new map<char32, char>); |
| 597 @@ -625,36 +635,37 @@ |
| 598 |
| 599 // Strips the IDD from the start of the number if present. Helper function used |
| 600 // by MaybeStripInternationalPrefixAndNormalize. |
| 601 -bool ParsePrefixAsIdd(const RE2& idd_pattern, string* number) { |
| 602 +bool ParsePrefixAsIdd(const reg_exp::RegularExpression* idd_pattern, |
| 603 + string* number) { |
| 604 DCHECK(number); |
| 605 - StringPiece number_copy(*number); |
| 606 + scoped_ptr<reg_exp::RegularExpressionInput> number_copy( |
| 607 + reg_exp::CreateRegularExpressionInput(number->c_str())); |
| 608 // First attempt to strip the idd_pattern at the start, if present. We make a |
| 609 // copy so that we can revert to the original string if necessary. |
| 610 - if (RE2::Consume(&number_copy, idd_pattern)) { |
| 611 + if (idd_pattern->Consume(number_copy.get(), true, NULL, NULL)) { |
| 612 // Only strip this if the first digit after the match is not a 0, since |
| 613 // country calling codes cannot begin with 0. |
| 614 string extracted_digit; |
| 615 - if (RE2::PartialMatch(number_copy, |
| 616 - *capturing_digit_pattern, |
| 617 - &extracted_digit)) { |
| 618 + if (capturing_digit_pattern->Match(number_copy->ToString().c_str(), false, |
| 619 + &extracted_digit)) { |
| 620 PhoneNumberUtil::NormalizeDigitsOnly(&extracted_digit); |
| 621 if (extracted_digit == "0") { |
| 622 return false; |
| 623 } |
| 624 } |
| 625 - number->assign(number_copy.ToString()); |
| 626 + number->assign(number_copy->ToString()); |
| 627 return true; |
| 628 } |
| 629 return false; |
| 630 } |
| 631 |
| 632 PhoneNumberUtil::ValidationResult TestNumberLengthAgainstPattern( |
| 633 - const RE2& number_pattern, const string& number) { |
| 634 + const reg_exp::RegularExpression* number_pattern, const string& number) { |
| 635 string extracted_number; |
| 636 - if (RE2::FullMatch(number, number_pattern, &extracted_number)) { |
| 637 + if (number_pattern->Match(number.c_str(), true, &extracted_number)) { |
| 638 return PhoneNumberUtil::IS_POSSIBLE; |
| 639 } |
| 640 - if (RE2::PartialMatch(number, number_pattern, &extracted_number)) { |
| 641 + if (number_pattern->Match(number.c_str(), false, &extracted_number)) { |
| 642 return PhoneNumberUtil::TOO_LONG; |
| 643 } else { |
| 644 return PhoneNumberUtil::TOO_SHORT; |
| 645 @@ -862,8 +873,10 @@ |
| 646 PhoneNumberFormat number_format, |
| 647 const RepeatedPtrField<NumberFormat>& user_defined_formats, |
| 648 string* formatted_number) const { |
| 649 - static const RE2 national_prefix_pattern("\\$NP"); |
| 650 - static const RE2 first_group_pattern("\\$FG"); |
| 651 + static scoped_ptr<const reg_exp::RegularExpression> |
| 652 + national_prefix_pattern(reg_exp::CreateRegularExpression("\\$NP")); |
| 653 + static scoped_ptr<const reg_exp::RegularExpression> |
| 654 + first_group_pattern(reg_exp::CreateRegularExpression("\\$FG")); |
| 655 DCHECK(formatted_number); |
| 656 int country_calling_code = number.country_code(); |
| 657 // Note GetRegionCodeForCountryCode() is used because formatting information |
| 658 @@ -893,10 +906,12 @@ |
| 659 num_format_copy->MergeFrom(*it); |
| 660 if (!national_prefix.empty()) { |
| 661 // Replace $NP with national prefix and $FG with the first group ($1). |
| 662 - RE2::Replace(&national_prefix_formatting_rule, national_prefix_pattern, |
| 663 - national_prefix); |
| 664 - RE2::Replace(&national_prefix_formatting_rule, first_group_pattern, |
| 665 - "$1"); |
| 666 + national_prefix_pattern->Replace(&national_prefix_formatting_rule, |
| 667 + false, |
| 668 + national_prefix.c_str()); |
| 669 + first_group_pattern->Replace(&national_prefix_formatting_rule, |
| 670 + false, |
| 671 + "$1"); |
| 672 num_format_copy->set_national_prefix_formatting_rule( |
| 673 national_prefix_formatting_rule); |
| 674 } else { |
| 675 @@ -1021,7 +1036,8 @@ |
| 676 // format of the number is returned, unless there is a preferred internationa
l |
| 677 // prefix. |
| 678 string international_prefix_for_formatting( |
| 679 - RE2::FullMatch(international_prefix, *unique_international_prefix) |
| 680 + unique_international_prefix->Match(international_prefix.c_str(), |
| 681 + true, NULL) |
| 682 ? international_prefix |
| 683 : metadata->preferred_international_prefix()); |
| 684 if (!international_prefix_for_formatting.empty()) { |
| 685 @@ -1133,7 +1149,8 @@ |
| 686 // format of the number is returned, unless there is a preferred internationa
l |
| 687 // prefix. |
| 688 string international_prefix_for_formatting( |
| 689 - RE2::FullMatch(international_prefix, *unique_international_prefix) |
| 690 + unique_international_prefix->Match(international_prefix.c_str(), |
| 691 + true, NULL) |
| 692 ? international_prefix |
| 693 : metadata->preferred_international_prefix()); |
| 694 if (!international_prefix_for_formatting.empty()) { |
| 695 @@ -1179,8 +1196,10 @@ |
| 696 number, carrier_code, formatted_number); |
| 697 if (number_format == RFC3966) { |
| 698 // Replace all separators with a "-". |
| 699 - static const RE2 separator_pattern(StrCat("[", kValidPunctuation, "]+")); |
| 700 - RE2::GlobalReplace(formatted_number, separator_pattern, "-"); |
| 701 + scoped_ptr<const reg_exp::RegularExpression> separator_pattern( |
| 702 + reg_exp::CreateRegularExpression( |
| 703 + StrCat("[", kValidPunctuation, "]+").c_str())); |
| 704 + separator_pattern->Replace(formatted_number, true, "-"); |
| 705 } |
| 706 } |
| 707 |
| 708 @@ -1288,10 +1307,9 @@ |
| 709 it != region_codes.end(); ++it) { |
| 710 const PhoneMetadata* metadata = GetMetadataForRegion(*it); |
| 711 if (metadata->has_leading_digits()) { |
| 712 - StringPiece number(national_number); |
| 713 - if (RE2::Consume(&number, |
| 714 - RE2Cache::ScopedAccess(re2_cache.get(), |
| 715 - metadata->leading_digits()))) { |
| 716 + scoped_ptr<reg_exp::RegularExpressionInput> number( |
| 717 + reg_exp::CreateRegularExpressionInput(national_number.c_str())); |
| 718 + if (number->ConsumeRegExp(metadata->leading_digits(), true, NULL, NULL))
{ |
| 719 *region_code = *it; |
| 720 return; |
| 721 } |
| 722 @@ -1367,8 +1385,10 @@ |
| 723 const string& number_to_parse, |
| 724 const string& default_region) const { |
| 725 if (!IsValidRegionCode(default_region) && !number_to_parse.empty()) { |
| 726 - StringPiece number_as_string_piece(number_to_parse); |
| 727 - if (!RE2::Consume(&number_as_string_piece, *plus_chars_pattern)) { |
| 728 + scoped_ptr<reg_exp::RegularExpressionInput> number_as_string_piece( |
| 729 + reg_exp::CreateRegularExpressionInput(number_to_parse.c_str())); |
| 730 + if (!plus_chars_pattern->Consume(number_as_string_piece.get(), |
| 731 + true, NULL, NULL)) { |
| 732 return false; |
| 733 } |
| 734 } |
| 735 @@ -1435,8 +1455,6 @@ |
| 736 return TOO_SHORT_NSN; |
| 737 } |
| 738 if (country_metadata) { |
| 739 - RE2Cache::ScopedAccess valid_number_pattern(re2_cache.get(), |
| 740 - country_metadata->general_desc().national_number_pattern()); |
| 741 string* carrier_code = keep_raw_input ? |
| 742 temp_number.mutable_preferred_domestic_carrier_code() : NULL; |
| 743 MaybeStripNationalPrefixAndCarrierCode(*country_metadata, |
| 744 @@ -1489,7 +1507,7 @@ |
| 745 for (it = number_as_unicode.begin(); it != number_as_unicode.end(); ++it) { |
| 746 len = it.get_utf8(current_char); |
| 747 current_char[len] = '\0'; |
| 748 - if (RE2::FullMatch(current_char, *valid_start_char_pattern)) { |
| 749 + if (valid_start_char_pattern->Match(current_char, true, NULL)) { |
| 750 break; |
| 751 } |
| 752 } |
| 753 @@ -1505,7 +1523,7 @@ |
| 754 for (; reverse_it.base() != it; ++reverse_it) { |
| 755 len = reverse_it.get_utf8(current_char); |
| 756 current_char[len] = '\0'; |
| 757 - if (!RE2::FullMatch(current_char, *unwanted_end_char_pattern)) { |
| 758 + if (!unwanted_end_char_pattern->Match(current_char, true, NULL)) { |
| 759 break; |
| 760 } |
| 761 } |
| 762 @@ -1521,9 +1539,9 @@ |
| 763 " left with: " + *extracted_number); |
| 764 |
| 765 // Now remove any extra numbers at the end. |
| 766 - RE2::PartialMatch(*extracted_number, |
| 767 - *capture_up_to_second_number_start_pattern, |
| 768 - extracted_number); |
| 769 + capture_up_to_second_number_start_pattern->Match(extracted_number->c_str(), |
| 770 + false, |
| 771 + extracted_number); |
| 772 } |
| 773 |
| 774 bool PhoneNumberUtil::IsPossibleNumber(const PhoneNumber& number) const { |
| 775 @@ -1569,9 +1587,10 @@ |
| 776 return IS_POSSIBLE; |
| 777 } |
| 778 } |
| 779 - RE2Cache::ScopedAccess possible_number_pattern(re2_cache.get(), |
| 780 - StrCat("(", general_num_desc.possible_number_pattern(), ")")); |
| 781 - return TestNumberLengthAgainstPattern(possible_number_pattern, |
| 782 + scoped_ptr<reg_exp::RegularExpression> possible_number_pattern( |
| 783 + reg_exp::CreateRegularExpression( |
| 784 + StrCat("(", general_num_desc.possible_number_pattern(), ")").c_str())); |
| 785 + return TestNumberLengthAgainstPattern(possible_number_pattern.get(), |
| 786 national_number); |
| 787 } |
| 788 |
| 789 @@ -1701,13 +1720,16 @@ |
| 790 |
| 791 string formatted_number; |
| 792 Format(copied_proto, INTERNATIONAL, &formatted_number); |
| 793 - StringPiece i18n_number(formatted_number); |
| 794 + scoped_ptr<reg_exp::RegularExpressionInput> i18n_number( |
| 795 + reg_exp::CreateRegularExpressionInput(formatted_number.c_str())); |
| 796 string digit_group; |
| 797 string ndc; |
| 798 string third_group; |
| 799 for (int i = 0; i < 3; ++i) { |
| 800 - if (!RE2::FindAndConsume(&i18n_number, *capturing_ascii_digits_pattern, |
| 801 - &digit_group)) { |
| 802 + if (!capturing_ascii_digits_pattern->Consume(i18n_number.get(), |
| 803 + false, |
| 804 + &digit_group, |
| 805 + NULL)) { |
| 806 // We should find at least three groups. |
| 807 return 0; |
| 808 } |
| 809 @@ -1734,9 +1756,11 @@ |
| 810 void PhoneNumberUtil::NormalizeDigitsOnly(string* number) { |
| 811 DCHECK(number); |
| 812 // Delete everything that isn't valid digits. |
| 813 - static const RE2 invalid_digits_pattern(StrCat("[^", kValidDigits, "]")); |
| 814 - static const StringPiece empty; |
| 815 - RE2::GlobalReplace(number, invalid_digits_pattern, empty); |
| 816 + static scoped_ptr<reg_exp::RegularExpression> invalid_digits_pattern( |
| 817 + reg_exp::CreateRegularExpression(StrCat("[^", kValidDigits, |
| 818 + "]").c_str())); |
| 819 + static const char *empty = ""; |
| 820 + invalid_digits_pattern->Replace(number, true, empty); |
| 821 // Normalize all decimal digits to ASCII digits. |
| 822 UParseError error; |
| 823 icu::ErrorCode status; |
| 824 @@ -1778,7 +1802,7 @@ |
| 825 string number_copy(number); |
| 826 string extension; |
| 827 MaybeStripExtension(&number_copy, &extension); |
| 828 - return RE2::FullMatch(number_copy, *valid_alpha_phone_pattern); |
| 829 + return valid_alpha_phone_pattern->Match(number_copy.c_str(), true, NULL); |
| 830 } |
| 831 |
| 832 void PhoneNumberUtil::ConvertAlphaCharactersInNumber(string* number) const { |
| 833 @@ -1798,7 +1822,7 @@ |
| 834 // - Arabic-Indic numerals are converted to European numerals. |
| 835 void PhoneNumberUtil::Normalize(string* number) const { |
| 836 DCHECK(number); |
| 837 - if (RE2::PartialMatch(*number, *valid_alpha_phone_pattern)) { |
| 838 + if (valid_alpha_phone_pattern->Match(number->c_str(), false, NULL)) { |
| 839 NormalizeHelper(*all_normalization_mappings, true, number); |
| 840 } |
| 841 NormalizeDigitsOnly(number); |
| 842 @@ -1816,7 +1840,7 @@ |
| 843 logger->Debug("Number too short to be viable:" + number); |
| 844 return false; |
| 845 } |
| 846 - return RE2::FullMatch(number, *valid_phone_number_pattern); |
| 847 + return valid_phone_number_pattern->Match(number.c_str(), true, NULL); |
| 848 } |
| 849 |
| 850 // Strips any international prefix (such as +, 00, 011) present in the number |
| 851 @@ -1836,17 +1860,20 @@ |
| 852 if (number->empty()) { |
| 853 return PhoneNumber::FROM_DEFAULT_COUNTRY; |
| 854 } |
| 855 - StringPiece number_string_piece(*number); |
| 856 - if (RE2::Consume(&number_string_piece, *plus_chars_pattern)) { |
| 857 - number->assign(number_string_piece.ToString()); |
| 858 + scoped_ptr<reg_exp::RegularExpressionInput> number_string_piece( |
| 859 + reg_exp::CreateRegularExpressionInput(number->c_str())); |
| 860 + if (plus_chars_pattern->Consume(number_string_piece.get(), true, |
| 861 + NULL, NULL)) { |
| 862 + number->assign(number_string_piece->ToString()); |
| 863 // Can now normalize the rest of the number since we've consumed the "+" |
| 864 // sign at the start. |
| 865 Normalize(number); |
| 866 return PhoneNumber::FROM_NUMBER_WITH_PLUS_SIGN; |
| 867 } |
| 868 // Attempt to parse the first digits as an international prefix. |
| 869 - RE2Cache::ScopedAccess idd_pattern(re2_cache.get(), possible_idd_prefix); |
| 870 - if (ParsePrefixAsIdd(idd_pattern, number)) { |
| 871 + scoped_ptr<reg_exp::RegularExpression> idd_pattern( |
| 872 + reg_exp::CreateRegularExpression(possible_idd_prefix.c_str())); |
| 873 + if (ParsePrefixAsIdd(idd_pattern.get(), number)) { |
| 874 Normalize(number); |
| 875 return PhoneNumber::FROM_NUMBER_WITH_IDD; |
| 876 } |
| 877 @@ -1854,7 +1881,7 @@ |
| 878 // This shouldn't be done before, since non-numeric characters (+ and ~) may |
| 879 // legally be in the international prefix. |
| 880 Normalize(number); |
| 881 - return ParsePrefixAsIdd(idd_pattern, number) |
| 882 + return ParsePrefixAsIdd(idd_pattern.get(), number) |
| 883 ? PhoneNumber::FROM_NUMBER_WITH_IDD |
| 884 : PhoneNumber::FROM_DEFAULT_COUNTRY; |
| 885 } |
| 886 @@ -1879,25 +1906,25 @@ |
| 887 } |
| 888 // We use two copies here since Consume modifies the phone number, and if the |
| 889 // first if-clause fails the number will already be changed. |
| 890 - StringPiece number_copy(*number); |
| 891 - StringPiece number_copy_without_transform(*number); |
| 892 + scoped_ptr<reg_exp::RegularExpressionInput> number_copy( |
| 893 + reg_exp::CreateRegularExpressionInput(number->c_str())); |
| 894 + scoped_ptr<reg_exp::RegularExpressionInput> number_copy_without_transform( |
| 895 + reg_exp::CreateRegularExpressionInput(number->c_str())); |
| 896 + |
| 897 string number_string_copy(*number); |
| 898 string captured_part_of_prefix; |
| 899 - RE2Cache::ScopedAccess national_number_rule( |
| 900 - re2_cache.get(), |
| 901 - metadata.general_desc().national_number_pattern()); |
| 902 + scoped_ptr<reg_exp::RegularExpression> national_number_rule( |
| 903 + reg_exp::CreateRegularExpression( |
| 904 + metadata.general_desc().national_number_pattern().c_str())); |
| 905 // Attempt to parse the first digits as a national prefix. We make a |
| 906 // copy so that we can revert to the original string if necessary. |
| 907 const string& transform_rule = metadata.national_prefix_transform_rule(); |
| 908 if (!transform_rule.empty() && |
| 909 - (RE2::Consume(&number_copy, |
| 910 - RE2Cache::ScopedAccess(re2_cache.get(), |
| 911 - possible_national_prefix), |
| 912 - &carrier_code_temp, &captured_part_of_prefix) || |
| 913 - RE2::Consume(&number_copy, |
| 914 - RE2Cache::ScopedAccess(re2_cache.get(), |
| 915 - possible_national_prefix), |
| 916 - &captured_part_of_prefix)) && |
| 917 + (number_copy->ConsumeRegExp(possible_national_prefix, true, |
| 918 + &carrier_code_temp, |
| 919 + &captured_part_of_prefix) || |
| 920 + number_copy->ConsumeRegExp(possible_national_prefix, true, |
| 921 + &captured_part_of_prefix, NULL)) && |
| 922 !captured_part_of_prefix.empty()) { |
| 923 string re2_transform_rule(transform_rule); |
| 924 TransformRegularExpressionToRE2Syntax(&re2_transform_rule); |
| 925 @@ -1905,29 +1932,27 @@ |
| 926 // have been some part of the prefix that we captured. |
| 927 // We make the transformation and check that the resultant number is viable
. |
| 928 // If so, replace the number and return. |
| 929 - RE2::Replace(&number_string_copy, |
| 930 - RE2Cache::ScopedAccess(re2_cache.get(), |
| 931 - possible_national_prefix), |
| 932 - re2_transform_rule); |
| 933 - if (RE2::FullMatch(number_string_copy, national_number_rule)) { |
| 934 + scoped_ptr<reg_exp::RegularExpression> possible_national_prefix_rule( |
| 935 + reg_exp::CreateRegularExpression(possible_national_prefix.c_str())); |
| 936 + possible_national_prefix_rule->Replace(&number_string_copy, false, |
| 937 + re2_transform_rule.c_str()); |
| 938 + if (national_number_rule->Match(number_string_copy.c_str(), true, NULL)) { |
| 939 number->assign(number_string_copy); |
| 940 if (carrier_code) { |
| 941 carrier_code->assign(carrier_code_temp); |
| 942 } |
| 943 } |
| 944 - } else if (RE2::Consume(&number_copy_without_transform, |
| 945 - RE2Cache::ScopedAccess(re2_cache.get(), |
| 946 - possible_national_prefix), |
| 947 - &carrier_code_temp) || |
| 948 - RE2::Consume(&number_copy_without_transform, |
| 949 - RE2Cache::ScopedAccess(re2_cache.get(), |
| 950 - possible_national_prefix))) { |
| 951 + } else if (number_copy_without_transform->ConsumeRegExp( |
| 952 + possible_national_prefix, true, &carrier_code_temp, NULL) || |
| 953 + number_copy_without_transform->ConsumeRegExp( |
| 954 + possible_national_prefix, true, NULL, NULL)) { |
| 955 logger->Debug("Parsed the first digits as a national prefix."); |
| 956 + string unconsumed_part(number_copy_without_transform->ToString()); |
| 957 // If captured_part_of_prefix is empty, this implies nothing was captured b
y |
| 958 // the capturing groups in possible_national_prefix; therefore, no |
| 959 // transformation is necessary, and we just remove the national prefix. |
| 960 - if (RE2::FullMatch(number_copy_without_transform, national_number_rule)) { |
| 961 - number->assign(number_copy_without_transform.ToString()); |
| 962 + if (national_number_rule->Match(unconsumed_part.c_str(), true, NULL)) { |
| 963 + number->assign(unconsumed_part); |
| 964 if (carrier_code) { |
| 965 carrier_code->assign(carrier_code_temp); |
| 966 } |
| 967 @@ -1949,11 +1974,13 @@ |
| 968 string possible_extension_two; |
| 969 string possible_extension_three; |
| 970 string number_copy(*number); |
| 971 - if (RE2::PartialMatch(number_copy, *extn_pattern, |
| 972 - &possible_extension_one, &possible_extension_two, |
| 973 - &possible_extension_three)) { |
| 974 + scoped_ptr<reg_exp::RegularExpressionInput> number_copy_regex_input( |
| 975 + reg_exp::CreateRegularExpressionInput(number_copy.c_str())); |
| 976 + if (extn_pattern->Consume(number_copy_regex_input.get(), false, |
| 977 + &possible_extension_one, &possible_extension_two, |
| 978 + &possible_extension_three)) { |
| 979 // Replace the extensions in the original string here. |
| 980 - RE2::Replace(&number_copy, *extn_pattern, ""); |
| 981 + extn_pattern->Replace(&number_copy, false, ""); |
| 982 logger->Debug("Found an extension. Possible extension one: " |
| 983 + possible_extension_one |
| 984 + ". Possible extension two: " + possible_extension_two |
| 985 @@ -2061,25 +2088,29 @@ |
| 986 &potential_national_number)) { |
| 987 const PhoneNumberDesc& general_num_desc = |
| 988 default_region_metadata->general_desc(); |
| 989 - RE2Cache::ScopedAccess valid_number_pattern( |
| 990 - re2_cache.get(), |
| 991 - general_num_desc.national_number_pattern()); |
| 992 + scoped_ptr<reg_exp::RegularExpression> valid_number_pattern( |
| 993 + reg_exp::CreateRegularExpression( |
| 994 + general_num_desc.national_number_pattern().c_str())); |
| 995 + |
| 996 MaybeStripNationalPrefixAndCarrierCode(*default_region_metadata, |
| 997 &potential_national_number, |
| 998 NULL); |
| 999 logger->Debug("Number without country code prefix: " |
| 1000 + potential_national_number); |
| 1001 string extracted_number; |
| 1002 - RE2Cache::ScopedAccess possible_number_pattern( |
| 1003 - re2_cache.get(), |
| 1004 - StrCat("(", general_num_desc.possible_number_pattern(), ")")); |
| 1005 + scoped_ptr<reg_exp::RegularExpression> possible_number_pattern( |
| 1006 + reg_exp::CreateRegularExpression( |
| 1007 + StrCat("(", general_num_desc.possible_number_pattern(), |
| 1008 + ")").c_str())); |
| 1009 // If the number was not valid before but is valid now, or if it was too |
| 1010 // long before, we consider the number with the country code stripped to |
| 1011 // be a better result and keep that instead. |
| 1012 - if ((!RE2::FullMatch(*national_number, valid_number_pattern) && |
| 1013 - RE2::FullMatch(potential_national_number, valid_number_pattern)) || |
| 1014 - TestNumberLengthAgainstPattern(possible_number_pattern, |
| 1015 - *national_number) |
| 1016 + if ((!valid_number_pattern->Match(national_number->c_str(), |
| 1017 + true, NULL) && |
| 1018 + valid_number_pattern->Match(potential_national_number.c_str(), |
| 1019 + true, NULL)) || |
| 1020 + TestNumberLengthAgainstPattern(possible_number_pattern.get(), |
| 1021 + *national_number) |
| 1022 == TOO_LONG) { |
| 1023 national_number->assign(potential_national_number); |
| 1024 if (keep_raw_input) { |
| 1025 Index: regexp_adapter_unittest.cc |
| 1026 =================================================================== |
| 1027 --- regexp_adapter_unittest.cc (revision 0) |
| 1028 +++ regexp_adapter_unittest.cc (revision 0) |
| 1029 @@ -0,0 +1,142 @@ |
| 1030 +// Copyright (C) 2011 Google Inc. |
| 1031 +// |
| 1032 +// Licensed under the Apache License, Version 2.0 (the "License"); |
| 1033 +// you may not use this file except in compliance with the License. |
| 1034 +// You may obtain a copy of the License at |
| 1035 +// |
| 1036 +// http://www.apache.org/licenses/LICENSE-2.0 |
| 1037 +// |
| 1038 +// Unless required by applicable law or agreed to in writing, software |
| 1039 +// distributed under the License is distributed on an "AS IS" BASIS, |
| 1040 +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 1041 +// See the License for the specific language governing permissions and |
| 1042 +// limitations under the License. |
| 1043 + |
| 1044 +// Author: George Yakovlev |
| 1045 +#include <gtest/gtest.h> |
| 1046 + |
| 1047 +#include "base/scoped_ptr.h" |
| 1048 +#include "regexp_adapter.h" |
| 1049 + |
| 1050 +namespace reg_exp { |
| 1051 + |
| 1052 +TEST(RegExpAdapter, TestConsumeRegExp) { |
| 1053 + scoped_ptr<const reg_exp::RegularExpression> reg_exp1( |
| 1054 + reg_exp::CreateRegularExpression("[0-9a-z]+")); |
| 1055 + scoped_ptr<const reg_exp::RegularExpression> reg_exp2( |
| 1056 + reg_exp::CreateRegularExpression(" \\(([0-9a-z]+)\\)")); |
| 1057 + scoped_ptr<const reg_exp::RegularExpression> reg_exp3( |
| 1058 + reg_exp::CreateRegularExpression("([0-9a-z]+)-([0-9a-z]+)")); |
| 1059 + |
| 1060 + scoped_ptr<reg_exp::RegularExpressionInput> reg_input1( |
| 1061 + reg_exp::CreateRegularExpressionInput("+1-123-456-789")); |
| 1062 + scoped_ptr<reg_exp::RegularExpressionInput> reg_input2( |
| 1063 + reg_exp::CreateRegularExpressionInput("1 (123)456-789")); |
| 1064 + |
| 1065 + EXPECT_FALSE(reg_exp1->Consume(reg_input1.get(), true, NULL, NULL)); |
| 1066 + EXPECT_EQ(reg_input1->ToString(), "+1-123-456-789"); |
| 1067 + EXPECT_TRUE(reg_exp1->Consume(reg_input1.get(), false, NULL, NULL)); |
| 1068 + EXPECT_EQ(reg_input1->ToString(), "-123-456-789"); |
| 1069 + std::string res1, res2; |
| 1070 + EXPECT_FALSE(reg_exp2->Consume(reg_input1.get(), true, &res1, NULL)); |
| 1071 + EXPECT_FALSE(reg_exp3->Consume(reg_input1.get(), true, &res1, &res2)); |
| 1072 + EXPECT_TRUE(reg_exp3->Consume(reg_input1.get(), false, &res1, &res2)); |
| 1073 + EXPECT_EQ(reg_input1->ToString(), "-789"); |
| 1074 + EXPECT_EQ(res1, "123"); |
| 1075 + EXPECT_EQ(res2, "456"); |
| 1076 + |
| 1077 + EXPECT_EQ(reg_input2->ToString(), "1 (123)456-789"); |
| 1078 + EXPECT_TRUE(reg_exp1->Consume(reg_input2.get(), true, NULL, NULL)); |
| 1079 + EXPECT_EQ(reg_input2->ToString(), " (123)456-789"); |
| 1080 + EXPECT_TRUE(reg_exp2->Consume(reg_input2.get(), true, &res1, NULL)); |
| 1081 + EXPECT_EQ(reg_input2->ToString(), "456-789"); |
| 1082 + EXPECT_EQ(res1, "123"); |
| 1083 + EXPECT_TRUE(reg_exp3->Consume(reg_input2.get(), true, &res1, &res2)); |
| 1084 + EXPECT_EQ(reg_input2->ToString(), ""); |
| 1085 + EXPECT_EQ(res1, "456"); |
| 1086 + EXPECT_EQ(res2, "789"); |
| 1087 +} |
| 1088 + |
| 1089 +TEST(RegExpAdapter, TestConsumeInput) { |
| 1090 + scoped_ptr<reg_exp::RegularExpressionInput> reg_input( |
| 1091 + reg_exp::CreateRegularExpressionInput("1 (123)456-789")); |
| 1092 + std::string res1, res2; |
| 1093 + EXPECT_EQ(reg_input->ToString(), "1 (123)456-789"); |
| 1094 + EXPECT_FALSE(reg_input->ConsumeRegExp(std::string("\\[1\\]"), |
| 1095 + true, |
| 1096 + &res1, |
| 1097 + &res2)); |
| 1098 + EXPECT_EQ(reg_input->ToString(), "1 (123)456-789"); |
| 1099 + EXPECT_FALSE(reg_input->ConsumeRegExp(std::string("([0-9]+) \\([0-9]+\\)"), |
| 1100 + true, |
| 1101 + &res1, |
| 1102 + &res2)); |
| 1103 + EXPECT_EQ(reg_input->ToString(), "1 (123)456-789"); |
| 1104 + EXPECT_TRUE(reg_input->ConsumeRegExp(std::string("([0-9]+) \\(([0-9]+)\\)"), |
| 1105 + true, |
| 1106 + &res1, |
| 1107 + &res2)); |
| 1108 + EXPECT_EQ(reg_input->ToString(), "456-789"); |
| 1109 + EXPECT_EQ(res1, "1"); |
| 1110 + EXPECT_EQ(res2, "123"); |
| 1111 +} |
| 1112 + |
| 1113 +TEST(RegExpAdapter, TestMatch) { |
| 1114 + scoped_ptr<const reg_exp::RegularExpression> reg_exp( |
| 1115 + reg_exp::CreateRegularExpression("([0-9a-z]+)")); |
| 1116 + std::string matched; |
| 1117 + EXPECT_TRUE(reg_exp->Match("12345af", true, &matched)); |
| 1118 + EXPECT_EQ(matched, "12345af"); |
| 1119 + EXPECT_TRUE(reg_exp->Match("12345af", false, &matched)); |
| 1120 + EXPECT_EQ(matched, "12345af"); |
| 1121 + EXPECT_TRUE(reg_exp->Match("12345af", false, NULL)); |
| 1122 + EXPECT_TRUE(reg_exp->Match("12345af", true, NULL)); |
| 1123 + |
| 1124 + EXPECT_FALSE(reg_exp->Match("[12]", true, &matched)); |
| 1125 + EXPECT_TRUE(reg_exp->Match("[12]", false, &matched)); |
| 1126 + EXPECT_EQ(matched, "12"); |
| 1127 + |
| 1128 + EXPECT_FALSE(reg_exp->Match("[]", true, &matched)); |
| 1129 + EXPECT_FALSE(reg_exp->Match("[]", false, &matched)); |
| 1130 +} |
| 1131 + |
| 1132 +TEST(RegExpAdapter, TestReplace) { |
| 1133 + scoped_ptr<const reg_exp::RegularExpression> reg_exp( |
| 1134 + reg_exp::CreateRegularExpression("[0-9]")); |
| 1135 + |
| 1136 + std::string s("123-4567 "); |
| 1137 + EXPECT_TRUE(reg_exp->Replace(&s, false, "+")); |
| 1138 + EXPECT_EQ(s, "+23-4567 "); |
| 1139 + EXPECT_TRUE(reg_exp->Replace(&s, false, "+")); |
| 1140 + EXPECT_EQ(s, "++3-4567 "); |
| 1141 + EXPECT_TRUE(reg_exp->Replace(&s, true, "*")); |
| 1142 + EXPECT_EQ(s, "++*-**** "); |
| 1143 + EXPECT_TRUE(reg_exp->Replace(&s, true, "*")); |
| 1144 + EXPECT_EQ(s, "++*-**** "); |
| 1145 + |
| 1146 + scoped_ptr<const reg_exp::RegularExpression> full_number_expr( |
| 1147 + reg_exp::CreateRegularExpression("(\\d{3})(\\d{3})(\\d{4})")); |
| 1148 + s = "1234567890:0987654321"; |
| 1149 + EXPECT_TRUE(full_number_expr->Replace(&s, true, "(\\1) \\2-\\3$1")); |
| 1150 + EXPECT_EQ(s, "(123) 456-7890$1:(098) 765-4321$1"); |
| 1151 +} |
| 1152 + |
| 1153 +TEST(RegExpAdapter, TestUtf8) { |
| 1154 + // Expression: <tel symbol><opening square bracket>[<alpha>-<omega>]* |
| 1155 + // <closing square bracket> |
| 1156 + scoped_ptr<const reg_exp::RegularExpression> reg_exp( |
| 1157 + reg_exp::CreateRegularExpression( |
| 1158 + "\xe2\x84\xa1\xe2\x8a\x8f([\xce\xb1-\xcf\x89]*)\xe2\x8a\x90")); |
| 1159 + std::string matched; |
| 1160 + // The string is split to avoid problem with MSVC compiler when it thinks |
| 1161 + // 123 is a part of character code. |
| 1162 + EXPECT_FALSE(reg_exp->Match("\xe2\x84\xa1\xe2\x8a\x8f" "123\xe2\x8a\x90", |
| 1163 + true, &matched)); |
| 1164 + EXPECT_TRUE(reg_exp->Match( |
| 1165 + "\xe2\x84\xa1\xe2\x8a\x8f\xce\xb1\xce\xb2\xe2\x8a\x90", true, &matched)); |
| 1166 + // <alpha><betha> |
| 1167 + EXPECT_EQ(matched, "\xce\xb1\xce\xb2"); |
| 1168 +} |
| 1169 + |
| 1170 +} // namespace reg_exp |
| 1171 + |
| 1172 |
| 1173 Property changes on: regexp_adapter_unittest.cc |
| 1174 ___________________________________________________________________ |
| 1175 Added: svn:eol-style |
| 1176 + LF |
| 1177 |
OLD | NEW |